xgboost のstratifiedkfold は使っても意味ないのか
xgboost の training にstratifiedkfoldを使ってみた。
from xgboost import XGBClassifier from sklearn.model_selection import GridSearchCV from sklearn.model_selection import StratifiedKFold from sklearn.metrics import accuracy_score param_grid = [{'min_child_weight': np.arange(0.1, 10.1, 0.1)}] import warnings warnings.simplefilter('ignore', DeprecationWarning)
kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=1) _model = GridSearchCV(XGBClassifier(), param_grid, cv=kf.split(X, y), scoring= 'f1',iid=True) _model.fit(X, y) print (model.best_params_) pred=_model.predict(X) print('accuracy_score',accuracy_score(y,pred))
{'min_child_weight': 3.5000000000000004} accuracy_score 0.8529741863075196
kf = StratifiedKFold(n_splits=10,random_state=1,shuffle=True) clf = XGBClassifier(**_model.best_params_) for i, (train_index,test_index) in enumerate(kf.split(X,y)): print('\n{} of kfold {}'.format(i,kf.n_splits)) xtr,xvl = X.loc[train_index],X.loc[test_index] ytr,yvl = y[train_index],y[test_index] clf.fit(xtr, ytr, eval_set=[(xtr, ytr), (xvl, yvl)], eval_metric='logloss', verbose=False, ) evals_result = clf.evals_result() print (_model.best_params_) pred=model.predict(xvl) print('accuracy_score',accuracy_score(yvl,pred))
0 of kfold 10 {'min_child_weight': 3.5000000000000004} accuracy_score 0.8333333333333334 1 of kfold 10 {'min_child_weight': 3.5000000000000004} accuracy_score 0.8666666666666667 2 of kfold 10 {'min_child_weight': 3.5000000000000004} accuracy_score 0.8202247191011236 3 of kfold 10 {'min_child_weight': 3.5000000000000004} accuracy_score 0.8539325842696629 4 of kfold 10 {'min_child_weight': 3.5000000000000004} accuracy_score 0.8314606741573034 5 of kfold 10 {'min_child_weight': 3.5000000000000004} accuracy_score 0.898876404494382 6 of kfold 10 {'min_child_weight': 3.5000000000000004} accuracy_score 0.8764044943820225 7 of kfold 10 {'min_child_weight': 3.5000000000000004} accuracy_score 0.8314606741573034 8 of kfold 10 {'min_child_weight': 3.5000000000000004} accuracy_score 0.8764044943820225 9 of kfold 10 {'min_child_weight': 3.5000000000000004} accuracy_score 0.8409090909090909