개발 노트/머신러닝

머신러닝기반 2023 Worlds 참가 팀 선수 분석

LeeInGyu 2023. 10. 17. 22:42
top_data = open('top.txt').readlines()
jgl_data = open('jungle.txt').readlines()
mid_data = open('mid.txt').readlines()
adc_data = open('adc.txt').readlines()
sup_data = open('support.txt').readlines()

all_line_data = top_data + jgl_data[1:] + mid_data[1:] + adc_data[1:] + sup_data[1:]

import pandas as pd

data_list = []

for i in all_line_data:
    data_list.append(i.split("\n")[:-1])

edit_list = []

for i in data_list:
    for j in i:
        edit_list.append(j.split("\t"))

pd_data = pd.DataFrame(edit_list)

pd_data.to_csv("PlayerState.csv", index=False)

# 처리 후 % 만 정리
import pandas as pd
import warnings
import numpy as np

warnings.filterwarnings(action='ignore')

df = pd.read_csv("PlayerState.csv").replace('-', np.NaN).dropna(axis=0)
df = df.rename(columns=df.iloc[0]).drop(df.index[0])
df.shape
(1124, 25)
for i in ['Win rate', 'KP%', 'FB %', 'FB Victim']:
    temp = []

    for j in df[i]:
        temp.append(float('0'+j[:-1])/100)

    df[i] = temp

df.shape
(1124, 25)
worlds_2023 = df.drop(['Player','Country','Position','Games'], axis=1)
worlds_2023.shape
(1124, 21)
spring_line = []
lines = ['TOP','JUNGLE','MID','ADC','SUPPORT']

for i in lines:
    data = df[df['Position'] == i]
    data = data.drop(['Player','Country','Position','Games'], axis=1)
    data = data.astype('float32')
    spring_line.append(data)

len(spring_line), len(spring_line[0])
(5, 279)
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

params = {
    'n_estimators' : [90,100,100],
    'max_depth' : [5,6,7],
    'min_samples_split' : [1,2,3],
    'min_samples_leaf' : [0,1,2]
}

result_table = []

for i in range(len(spring_line)):
    X = spring_line[i].drop(['Win rate'], axis=1).values
    y = (spring_line[i]['Win rate'].values / 0.25).astype('int')
    Forest_model = RandomForestClassifier(random_state=0, n_jobs=-1)
    Grid_model = GridSearchCV(estimator=Forest_model, param_grid=params, verbose=False, n_jobs=-1)
    Grid_model.fit(X, y)
    print(f"{lines[i]} Train Acc : {Grid_model.score(X, y)}")

    model_estimators = Grid_model.best_params_['n_estimators']
    model_max_depth = Grid_model.best_params_['max_depth']
    model_min_samples_split = Grid_model.best_params_['min_samples_split']
    model_min_samples_leaf = Grid_model.best_params_['min_samples_leaf']

    model = RandomForestClassifier(
        n_estimators = model_estimators,
        max_depth = model_max_depth,
        min_samples_split = model_min_samples_split,
        min_samples_leaf = model_min_samples_leaf
    )
    model.fit(X, y)
    print(f"{lines[i]} Model Acc : {model.score(X, y)}")
    result_table.append(model)
TOP Train Acc : 0.9354838709677419
TOP Model Acc : 0.9354838709677419
JUNGLE Train Acc : 0.9608695652173913
JUNGLE Model Acc : 0.9652173913043478
MID Train Acc : 0.8666666666666667
MID Model Acc : 0.8592592592592593
ADC Train Acc : 0.9786324786324786
ADC Model Acc : 0.9786324786324786
SUPPORT Train Acc : 0.9369369369369369
SUPPORT Model Acc : 0.9369369369369369
MinMax_table = pd.DataFrame()

for i in df.columns[5:]:
    MinMax_table[i] = worlds_2023[i].astype('float') / max(worlds_2023[i].astype('float'))

MinMax_table.head(3)

KDA Avg kills Avg deaths Avg assists CSM GPM KP% DMG% DPM VSPM Avg WPM Avg WCPM Avg VWPM GD@15 CSD@15 XPD@15 FB % FB Victim Penta Kills Solo Kills
2 0.152 0.316667 0.350 0.43125 0.728972 0.818702 0.655556 0.617284 0.629326 0.286111 0.206278 0.222222 0.164384 0.127991 0.025641 -0.046385 0.059 0.235 0.0 0.459459
3 0.136 0.150000 0.300 0.39375 0.710280 0.681298 0.631111 0.444444 0.367031 0.305556 0.161435 0.444444 0.301370 0.066221 -0.076923 0.021146 0.111 0.056 0.0 0.081081
4 0.096 0.241667 0.375 0.26250 0.766355 0.774809 0.603333 0.520988 0.396175 0.313889 0.197309 0.370370 0.301370 0.115748 0.051282 -0.053888 0.238 0.167 0.0 0.810811
from sklearn.preprocessing import MinMaxScaler

worlds_2023['Player'] = df['Player']
worlds_2023['Country'] = df['Country']
worlds_2023['Position'] = df['Position']

score = []

for i in range(worlds_2023.shape[0]):
    pos = worlds_2023['Position'].values[i]
    p = sum([i*j for i,j in zip(MinMax_table.values[i][5:], result_table[lines.index(pos)].feature_importances_)])
    score.append(p)

worlds_2023['Score'] = score
worlds_2023['Score'] = (MinMaxScaler().fit_transform([[i] for i in worlds_2023['Score']]) * 100).astype('int')

worlds_2023.shape
(1124, 25)
worlds_2023.to_csv("Result_Worlds_2023.csv", index=False)
output_df = pd.DataFrame()

output_df['Country'] = worlds_2023['Country']
output_df['Player'] = worlds_2023['Player']
output_df['Position'] = worlds_2023['Position']
output_df['Score'] = worlds_2023['Score']

output_df.to_csv('Player Score List.csv', index=False)
output_df.head(10)

Country Player Position Score
2 EE ACD TOP 51
3 AR Acce TOP 31
4 FR Adam TOP 45
5 DE Addusto TOP 37
6 CZ Adiss TOP 32
7 PL Agresivoo TOP 40
8 DE Akirei TOP 11
11 PL Ariana TOP 34
13 TR Armut TOP 41
14 TR Aytekn TOP 42
  • 결과적으로는 점수가 그닥 잘 안나오고, 중국 팀의 자료가 없어서 제한이 있다.
728x90
반응형