top_data = open('top.txt').readlines()
jgl_data = open('jungle.txt').readlines()
mid_data = open('mid.txt').readlines()
adc_data = open('adc.txt').readlines()
sup_data = open('support.txt').readlines()
all_line_data = top_data + jgl_data[1:] + mid_data[1:] + adc_data[1:] + sup_data[1:]
import pandas as pd
data_list = []
for i in all_line_data:
data_list.append(i.split("\n")[:-1])
edit_list = []
for i in data_list:
for j in i:
edit_list.append(j.split("\t"))
pd_data = pd.DataFrame(edit_list)
pd_data.to_csv("PlayerState.csv", index=False)
# 처리 후 % 만 정리
import pandas as pd
import warnings
import numpy as np
warnings.filterwarnings(action='ignore')
df = pd.read_csv("PlayerState.csv").replace('-', np.NaN).dropna(axis=0)
df = df.rename(columns=df.iloc[0]).drop(df.index[0])
df.shape
(1124, 25)
for i in ['Win rate', 'KP%', 'FB %', 'FB Victim']:
temp = []
for j in df[i]:
temp.append(float('0'+j[:-1])/100)
df[i] = temp
df.shape
(1124, 25)
worlds_2023 = df.drop(['Player','Country','Position','Games'], axis=1)
worlds_2023.shape
(1124, 21)
spring_line = []
lines = ['TOP','JUNGLE','MID','ADC','SUPPORT']
for i in lines:
data = df[df['Position'] == i]
data = data.drop(['Player','Country','Position','Games'], axis=1)
data = data.astype('float32')
spring_line.append(data)
len(spring_line), len(spring_line[0])
(5, 279)
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
params = {
'n_estimators' : [90,100,100],
'max_depth' : [5,6,7],
'min_samples_split' : [1,2,3],
'min_samples_leaf' : [0,1,2]
}
result_table = []
for i in range(len(spring_line)):
X = spring_line[i].drop(['Win rate'], axis=1).values
y = (spring_line[i]['Win rate'].values / 0.25).astype('int')
Forest_model = RandomForestClassifier(random_state=0, n_jobs=-1)
Grid_model = GridSearchCV(estimator=Forest_model, param_grid=params, verbose=False, n_jobs=-1)
Grid_model.fit(X, y)
print(f"{lines[i]} Train Acc : {Grid_model.score(X, y)}")
model_estimators = Grid_model.best_params_['n_estimators']
model_max_depth = Grid_model.best_params_['max_depth']
model_min_samples_split = Grid_model.best_params_['min_samples_split']
model_min_samples_leaf = Grid_model.best_params_['min_samples_leaf']
model = RandomForestClassifier(
n_estimators = model_estimators,
max_depth = model_max_depth,
min_samples_split = model_min_samples_split,
min_samples_leaf = model_min_samples_leaf
)
model.fit(X, y)
print(f"{lines[i]} Model Acc : {model.score(X, y)}")
result_table.append(model)
TOP Train Acc : 0.9354838709677419
TOP Model Acc : 0.9354838709677419
JUNGLE Train Acc : 0.9608695652173913
JUNGLE Model Acc : 0.9652173913043478
MID Train Acc : 0.8666666666666667
MID Model Acc : 0.8592592592592593
ADC Train Acc : 0.9786324786324786
ADC Model Acc : 0.9786324786324786
SUPPORT Train Acc : 0.9369369369369369
SUPPORT Model Acc : 0.9369369369369369
MinMax_table = pd.DataFrame()
for i in df.columns[5:]:
MinMax_table[i] = worlds_2023[i].astype('float') / max(worlds_2023[i].astype('float'))
MinMax_table.head(3)
|
KDA |
Avg kills |
Avg deaths |
Avg assists |
CSM |
GPM |
KP% |
DMG% |
DPM |
VSPM |
Avg WPM |
Avg WCPM |
Avg VWPM |
GD@15 |
CSD@15 |
XPD@15 |
FB % |
FB Victim |
Penta Kills |
Solo Kills |
2 |
0.152 |
0.316667 |
0.350 |
0.43125 |
0.728972 |
0.818702 |
0.655556 |
0.617284 |
0.629326 |
0.286111 |
0.206278 |
0.222222 |
0.164384 |
0.127991 |
0.025641 |
-0.046385 |
0.059 |
0.235 |
0.0 |
0.459459 |
3 |
0.136 |
0.150000 |
0.300 |
0.39375 |
0.710280 |
0.681298 |
0.631111 |
0.444444 |
0.367031 |
0.305556 |
0.161435 |
0.444444 |
0.301370 |
0.066221 |
-0.076923 |
0.021146 |
0.111 |
0.056 |
0.0 |
0.081081 |
4 |
0.096 |
0.241667 |
0.375 |
0.26250 |
0.766355 |
0.774809 |
0.603333 |
0.520988 |
0.396175 |
0.313889 |
0.197309 |
0.370370 |
0.301370 |
0.115748 |
0.051282 |
-0.053888 |
0.238 |
0.167 |
0.0 |
0.810811 |
from sklearn.preprocessing import MinMaxScaler
worlds_2023['Player'] = df['Player']
worlds_2023['Country'] = df['Country']
worlds_2023['Position'] = df['Position']
score = []
for i in range(worlds_2023.shape[0]):
pos = worlds_2023['Position'].values[i]
p = sum([i*j for i,j in zip(MinMax_table.values[i][5:], result_table[lines.index(pos)].feature_importances_)])
score.append(p)
worlds_2023['Score'] = score
worlds_2023['Score'] = (MinMaxScaler().fit_transform([[i] for i in worlds_2023['Score']]) * 100).astype('int')
worlds_2023.shape
(1124, 25)
worlds_2023.to_csv("Result_Worlds_2023.csv", index=False)
output_df = pd.DataFrame()
output_df['Country'] = worlds_2023['Country']
output_df['Player'] = worlds_2023['Player']
output_df['Position'] = worlds_2023['Position']
output_df['Score'] = worlds_2023['Score']
output_df.to_csv('Player Score List.csv', index=False)
output_df.head(10)
|
Country |
Player |
Position |
Score |
2 |
EE |
ACD |
TOP |
51 |
3 |
AR |
Acce |
TOP |
31 |
4 |
FR |
Adam |
TOP |
45 |
5 |
DE |
Addusto |
TOP |
37 |
6 |
CZ |
Adiss |
TOP |
32 |
7 |
PL |
Agresivoo |
TOP |
40 |
8 |
DE |
Akirei |
TOP |
11 |
11 |
PL |
Ariana |
TOP |
34 |
13 |
TR |
Armut |
TOP |
41 |
14 |
TR |
Aytekn |
TOP |
42 |
- 결과적으로는 점수가 그닥 잘 안나오고, 중국 팀의 자료가 없어서 제한이 있다.