본문 바로가기
개발 노트/머신러닝

RandomForest 기반 2022 MSI 분석

by LeeInGyu 2022. 5. 28.
import warnings
warnings.filterwarnings(action='ignore')
import pandas as pd

spring_table = pd.read_csv('2022_Spring_data.csv')
msi_table = pd.read_csv('2022_MSI_Data.csv')

spring_table.shape, msi_table.shape
((201, 25), (62, 25))
spring_line = []
lines = ['TOP','JUNGLE','MID','ADC','SUPPORT']

for i in lines:
    data = spring_table[spring_table['Position'] == i]
    data = data.drop(['Player','Country','Position','Games'], axis=1)
    data = data.astype('float32')
    spring_line.append(data)

len(spring_line)
5
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

params = {
    'n_estimators' : [90,100,100],
    'max_depth' : [5,6,7],
    'min_samples_split' : [1,2,3],
    'min_samples_leaf' : [0,1,2]
}

result_table = []

for i in range(len(spring_line)):
    X = spring_line[i].drop(['Win rate'], axis=1).values
    y = (spring_line[i]['Win rate'].values / 0.25).astype('int')
    Forest_model = RandomForestClassifier(random_state=0, n_jobs=-1)
    Grid_model = GridSearchCV(estimator=Forest_model, param_grid=params, verbose=False, n_jobs=-1)
    Grid_model.fit(X, y)
    print(f"{lines[i]} Train Acc : {Grid_model.score(X, y)}")

    model_estimators = Grid_model.best_params_['n_estimators']
    model_max_depth = Grid_model.best_params_['max_depth']
    model_min_samples_split = Grid_model.best_params_['min_samples_split']
    model_min_samples_leaf = Grid_model.best_params_['min_samples_leaf']

    model = RandomForestClassifier(
        n_estimators = model_estimators,
        max_depth = model_max_depth,
        min_samples_split = model_min_samples_split,
        min_samples_leaf = model_min_samples_leaf
    )
    model.fit(X, y)
    print(f"{lines[i]} Model Acc : {model.score(X, y)}")
    result_table.append(model)
TOP Train Acc : 0.9761904761904762
TOP Model Acc : 1.0
JUNGLE Train Acc : 1.0
JUNGLE Model Acc : 1.0
MID Train Acc : 0.9736842105263158
MID Model Acc : 0.9473684210526315
ADC Train Acc : 0.9743589743589743
ADC Model Acc : 1.0
SUPPORT Train Acc : 1.0
SUPPORT Model Acc : 1.0
MinMax_table = pd.DataFrame()

for i in spring_table.columns[5:]:
    MinMax_table[i] = msi_table[i] / max(msi_table[i])

MinMax_table.head(3)

KDA Avg kills Avg deaths Avg assists CSM GPM KP% DMG% DPM VSPM Avg WPM Avg WCPM Avg VWPM GD@15 CSD@15 XPD@15 FB % FB Victim Penta Kills Solo Kills
0 0.105882 0.216667 0.626866 0.15625 0.915789 0.743381 0.502793 0.606742 0.577953 0.219512 0.191388 0.367647 0.278689 -0.634301 -0.142857 -0.469976 0.167 0.000 0.0 0.0625
1 0.400000 0.883333 0.417910 0.26875 0.673684 0.800407 0.901676 0.570225 0.565354 0.271003 0.167464 0.294118 0.426230 0.379310 0.357143 0.326661 0.667 0.334 0.0 0.1875
2 0.105882 0.166667 0.552239 0.13750 0.789474 0.665988 0.435754 0.558989 0.478740 0.268293 0.239234 0.294118 0.311475 -0.860254 -1.071429 -0.387510 0.000 0.334 0.0 0.0000
from sklearn.preprocessing import MinMaxScaler

score = []

for i in range(msi_table.shape[0]):
    pos = msi_table['Position'].values[i]
    p = sum([i*j for i,j in zip(MinMax_table.values[i][5:], result_table[lines.index(pos)].feature_importances_)])
    score.append(p)

msi_table['Score'] = score
msi_table['Score'] = (MinMaxScaler().fit_transform([[i] for i in msi_table['Score']])*100).astype('int')
msi_table.head(3)

Player Country Position Games Win rate KDA Avg kills Avg deaths Avg assists CSM ... Avg WCPM Avg VWPM GD@15 CSD@15 XPD@15 FB % FB Victim Penta Kills Solo Kills Score
0 5Kid KR ADC 6 16.7 0.9 1.3 4.2 2.5 8.7 ... 0.25 0.17 -1398 -2 -587 16.7 0.0 0 1 49
1 Aegis BR JUNGLE 6 33.3 3.4 5.3 2.8 4.3 6.4 ... 0.20 0.26 836 5 408 66.7 16.7 0 3 83
2 Aloned CL MID 6 16.7 0.9 1.0 3.7 2.2 7.5 ... 0.20 0.19 -1896 -15 -484 0.0 16.7 0 0 30

3 rows × 26 columns

msi_table.to_csv('Result_MSI.csv', index=False)

728x90
반응형