개발 노트/머신러닝
머신러닝기반 2023 Worlds 참가 팀 선수 분석
LeeInGyu
2023. 10. 17. 22:42
top_data = open('top.txt').readlines()
jgl_data = open('jungle.txt').readlines()
mid_data = open('mid.txt').readlines()
adc_data = open('adc.txt').readlines()
sup_data = open('support.txt').readlines()
all_line_data = top_data + jgl_data[1:] + mid_data[1:] + adc_data[1:] + sup_data[1:]
import pandas as pd
data_list = []
for i in all_line_data:
data_list.append(i.split("\n")[:-1])
edit_list = []
for i in data_list:
for j in i:
edit_list.append(j.split("\t"))
pd_data = pd.DataFrame(edit_list)
pd_data.to_csv("PlayerState.csv", index=False)
# 처리 후 % 만 정리
import pandas as pd
import warnings
import numpy as np
warnings.filterwarnings(action='ignore')
df = pd.read_csv("PlayerState.csv").replace('-', np.NaN).dropna(axis=0)
df = df.rename(columns=df.iloc[0]).drop(df.index[0])
df.shape
(1124, 25)
for i in ['Win rate', 'KP%', 'FB %', 'FB Victim']:
temp = []
for j in df[i]:
temp.append(float('0'+j[:-1])/100)
df[i] = temp
df.shape
(1124, 25)
worlds_2023 = df.drop(['Player','Country','Position','Games'], axis=1)
worlds_2023.shape
(1124, 21)
spring_line = []
lines = ['TOP','JUNGLE','MID','ADC','SUPPORT']
for i in lines:
data = df[df['Position'] == i]
data = data.drop(['Player','Country','Position','Games'], axis=1)
data = data.astype('float32')
spring_line.append(data)
len(spring_line), len(spring_line[0])
(5, 279)
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
params = {
'n_estimators' : [90,100,100],
'max_depth' : [5,6,7],
'min_samples_split' : [1,2,3],
'min_samples_leaf' : [0,1,2]
}
result_table = []
for i in range(len(spring_line)):
X = spring_line[i].drop(['Win rate'], axis=1).values
y = (spring_line[i]['Win rate'].values / 0.25).astype('int')
Forest_model = RandomForestClassifier(random_state=0, n_jobs=-1)
Grid_model = GridSearchCV(estimator=Forest_model, param_grid=params, verbose=False, n_jobs=-1)
Grid_model.fit(X, y)
print(f"{lines[i]} Train Acc : {Grid_model.score(X, y)}")
model_estimators = Grid_model.best_params_['n_estimators']
model_max_depth = Grid_model.best_params_['max_depth']
model_min_samples_split = Grid_model.best_params_['min_samples_split']
model_min_samples_leaf = Grid_model.best_params_['min_samples_leaf']
model = RandomForestClassifier(
n_estimators = model_estimators,
max_depth = model_max_depth,
min_samples_split = model_min_samples_split,
min_samples_leaf = model_min_samples_leaf
)
model.fit(X, y)
print(f"{lines[i]} Model Acc : {model.score(X, y)}")
result_table.append(model)
TOP Train Acc : 0.9354838709677419
TOP Model Acc : 0.9354838709677419
JUNGLE Train Acc : 0.9608695652173913
JUNGLE Model Acc : 0.9652173913043478
MID Train Acc : 0.8666666666666667
MID Model Acc : 0.8592592592592593
ADC Train Acc : 0.9786324786324786
ADC Model Acc : 0.9786324786324786
SUPPORT Train Acc : 0.9369369369369369
SUPPORT Model Acc : 0.9369369369369369
MinMax_table = pd.DataFrame()
for i in df.columns[5:]:
MinMax_table[i] = worlds_2023[i].astype('float') / max(worlds_2023[i].astype('float'))
MinMax_table.head(3)
KDA | Avg kills | Avg deaths | Avg assists | CSM | GPM | KP% | DMG% | DPM | VSPM | Avg WPM | Avg WCPM | Avg VWPM | GD@15 | CSD@15 | XPD@15 | FB % | FB Victim | Penta Kills | Solo Kills | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2 | 0.152 | 0.316667 | 0.350 | 0.43125 | 0.728972 | 0.818702 | 0.655556 | 0.617284 | 0.629326 | 0.286111 | 0.206278 | 0.222222 | 0.164384 | 0.127991 | 0.025641 | -0.046385 | 0.059 | 0.235 | 0.0 | 0.459459 |
3 | 0.136 | 0.150000 | 0.300 | 0.39375 | 0.710280 | 0.681298 | 0.631111 | 0.444444 | 0.367031 | 0.305556 | 0.161435 | 0.444444 | 0.301370 | 0.066221 | -0.076923 | 0.021146 | 0.111 | 0.056 | 0.0 | 0.081081 |
4 | 0.096 | 0.241667 | 0.375 | 0.26250 | 0.766355 | 0.774809 | 0.603333 | 0.520988 | 0.396175 | 0.313889 | 0.197309 | 0.370370 | 0.301370 | 0.115748 | 0.051282 | -0.053888 | 0.238 | 0.167 | 0.0 | 0.810811 |
from sklearn.preprocessing import MinMaxScaler
worlds_2023['Player'] = df['Player']
worlds_2023['Country'] = df['Country']
worlds_2023['Position'] = df['Position']
score = []
for i in range(worlds_2023.shape[0]):
pos = worlds_2023['Position'].values[i]
p = sum([i*j for i,j in zip(MinMax_table.values[i][5:], result_table[lines.index(pos)].feature_importances_)])
score.append(p)
worlds_2023['Score'] = score
worlds_2023['Score'] = (MinMaxScaler().fit_transform([[i] for i in worlds_2023['Score']]) * 100).astype('int')
worlds_2023.shape
(1124, 25)
worlds_2023.to_csv("Result_Worlds_2023.csv", index=False)
output_df = pd.DataFrame()
output_df['Country'] = worlds_2023['Country']
output_df['Player'] = worlds_2023['Player']
output_df['Position'] = worlds_2023['Position']
output_df['Score'] = worlds_2023['Score']
output_df.to_csv('Player Score List.csv', index=False)
output_df.head(10)
Country | Player | Position | Score | |
---|---|---|---|---|
2 | EE | ACD | TOP | 51 |
3 | AR | Acce | TOP | 31 |
4 | FR | Adam | TOP | 45 |
5 | DE | Addusto | TOP | 37 |
6 | CZ | Adiss | TOP | 32 |
7 | PL | Agresivoo | TOP | 40 |
8 | DE | Akirei | TOP | 11 |
11 | PL | Ariana | TOP | 34 |
13 | TR | Armut | TOP | 41 |
14 | TR | Aytekn | TOP | 42 |
- 결과적으로는 점수가 그닥 잘 안나오고, 중국 팀의 자료가 없어서 제한이 있다.
728x90
반응형