머신러닝기반 2023 Worlds 참가 팀 선수 분석

개발 노트/머신러닝

머신러닝기반 2023 Worlds 참가 팀 선수 분석

LeeInGyu 2023. 10. 17. 22:42

top_data = open('top.txt').readlines()
jgl_data = open('jungle.txt').readlines()
mid_data = open('mid.txt').readlines()
adc_data = open('adc.txt').readlines()
sup_data = open('support.txt').readlines()

all_line_data = top_data + jgl_data[1:] + mid_data[1:] + adc_data[1:] + sup_data[1:]

import pandas as pd

data_list = []

for i in all_line_data:
    data_list.append(i.split("\n")[:-1])

edit_list = []

for i in data_list:
    for j in i:
        edit_list.append(j.split("\t"))

pd_data = pd.DataFrame(edit_list)

pd_data.to_csv("PlayerState.csv", index=False)

# 처리 후 % 만 정리

import pandas as pd
import warnings
import numpy as np

warnings.filterwarnings(action='ignore')

df = pd.read_csv("PlayerState.csv").replace('-', np.NaN).dropna(axis=0)
df = df.rename(columns=df.iloc[0]).drop(df.index[0])
df.shape

(1124, 25)

for i in ['Win rate', 'KP%', 'FB %', 'FB Victim']:
    temp = []

    for j in df[i]:
        temp.append(float('0'+j[:-1])/100)

    df[i] = temp

df.shape

(1124, 25)

worlds_2023 = df.drop(['Player','Country','Position','Games'], axis=1)
worlds_2023.shape

(1124, 21)

spring_line = []
lines = ['TOP','JUNGLE','MID','ADC','SUPPORT']

for i in lines:
    data = df[df['Position'] == i]
    data = data.drop(['Player','Country','Position','Games'], axis=1)
    data = data.astype('float32')
    spring_line.append(data)

len(spring_line), len(spring_line[0])

(5, 279)

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

params = {
    'n_estimators' : [90,100,100],
    'max_depth' : [5,6,7],
    'min_samples_split' : [1,2,3],
    'min_samples_leaf' : [0,1,2]
}

result_table = []

for i in range(len(spring_line)):
    X = spring_line[i].drop(['Win rate'], axis=1).values
    y = (spring_line[i]['Win rate'].values / 0.25).astype('int')
    Forest_model = RandomForestClassifier(random_state=0, n_jobs=-1)
    Grid_model = GridSearchCV(estimator=Forest_model, param_grid=params, verbose=False, n_jobs=-1)
    Grid_model.fit(X, y)
    print(f"{lines[i]} Train Acc : {Grid_model.score(X, y)}")

    model_estimators = Grid_model.best_params_['n_estimators']
    model_max_depth = Grid_model.best_params_['max_depth']
    model_min_samples_split = Grid_model.best_params_['min_samples_split']
    model_min_samples_leaf = Grid_model.best_params_['min_samples_leaf']

    model = RandomForestClassifier(
        n_estimators = model_estimators,
        max_depth = model_max_depth,
        min_samples_split = model_min_samples_split,
        min_samples_leaf = model_min_samples_leaf
    )
    model.fit(X, y)
    print(f"{lines[i]} Model Acc : {model.score(X, y)}")
    result_table.append(model)

TOP Train Acc : 0.9354838709677419
TOP Model Acc : 0.9354838709677419
JUNGLE Train Acc : 0.9608695652173913
JUNGLE Model Acc : 0.9652173913043478
MID Train Acc : 0.8666666666666667
MID Model Acc : 0.8592592592592593
ADC Train Acc : 0.9786324786324786
ADC Model Acc : 0.9786324786324786
SUPPORT Train Acc : 0.9369369369369369
SUPPORT Model Acc : 0.9369369369369369

MinMax_table = pd.DataFrame()

for i in df.columns[5:]:
    MinMax_table[i] = worlds_2023[i].astype('float') / max(worlds_2023[i].astype('float'))

MinMax_table.head(3)

	KDA	Avg kills	Avg deaths	Avg assists	CSM	GPM	KP%	DMG%	DPM	VSPM	Avg WPM	Avg WCPM	Avg VWPM	GD@15	CSD@15	XPD@15	FB %	FB Victim	Solo Kills
2	0.152	0.316667	0.350	0.43125	0.728972	0.818702	0.655556	0.617284	0.629326	0.286111	0.206278	0.222222	0.164384	0.127991	0.025641	-0.046385	0.059	0.235	0.459459
3	0.136	0.150000	0.300	0.39375	0.710280	0.681298	0.631111	0.444444	0.367031	0.305556	0.161435	0.444444	0.301370	0.066221	-0.076923	0.021146	0.111	0.056	0.081081
4	0.096	0.241667	0.375	0.26250	0.766355	0.774809	0.603333	0.520988	0.396175	0.313889	0.197309	0.370370	0.301370	0.115748	0.051282	-0.053888	0.238	0.167	0.810811

from sklearn.preprocessing import MinMaxScaler

worlds_2023['Player'] = df['Player']
worlds_2023['Country'] = df['Country']
worlds_2023['Position'] = df['Position']

score = []

for i in range(worlds_2023.shape[0]):
    pos = worlds_2023['Position'].values[i]
    p = sum([i*j for i,j in zip(MinMax_table.values[i][5:], result_table[lines.index(pos)].feature_importances_)])
    score.append(p)

worlds_2023['Score'] = score
worlds_2023['Score'] = (MinMaxScaler().fit_transform([[i] for i in worlds_2023['Score']]) * 100).astype('int')

worlds_2023.shape

(1124, 25)

worlds_2023.to_csv("Result_Worlds_2023.csv", index=False)

output_df = pd.DataFrame()

output_df['Country'] = worlds_2023['Country']
output_df['Player'] = worlds_2023['Player']
output_df['Position'] = worlds_2023['Position']
output_df['Score'] = worlds_2023['Score']

output_df.to_csv('Player Score List.csv', index=False)

output_df.head(10)

	Country	Player	Position	Score
2	EE	ACD	TOP	51
3	AR	Acce	TOP	31
4	FR	Adam	TOP	45
5	DE	Addusto	TOP	37
6	CZ	Adiss	TOP	32
7	PL	Agresivoo	TOP	40
8	DE	Akirei	TOP	11
11	PL	Ariana	TOP	34
13	TR	Armut	TOP	41
14	TR	Aytekn	TOP	42

결과적으로는 점수가 그닥 잘 안나오고, 중국 팀의 자료가 없어서 제한이 있다.

728x90