グリッドサーチを使ったチューニング
サポートベクトルマシンのパイプラインのトレーニング
from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import StandardScaler from sklearn.pipeline import make_pipeline from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC import numpy as np import pandas as pd df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None) X = df.loc[:, 2:].values y = df.loc[:, 1].values le = LabelEncoder() y = le.fit_transform(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, stratify=y, random_state=1) pipe_svc = make_pipeline(StandardScaler(), SVC(random_state=1)) param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0] ## C : 誤分類に、寛容かどうか。 γ : 過学習する度合いを制御。のパラメータ param_grid = [ {'svc__C': param_range, 'svc__kernel': ['linear']}, {'svc__C': param_range, 'svc__gamma': param_range, 'svc__kernel': ['rbf']} ] gs = GridSearchCV( estimator=pipe_svc, param_grid=param_grid, scoring='accuracy', cv=10, n_jobs=-1 ) gs.fit(X_train, y_train) print(gs.best_score_) print(gs.best_params_)
0.9846153846153847 {'svc__C': 100.0, 'svc__gamma': 0.001, 'svc__kernel': 'rbf'}
- テスト結果
clf = gs.best_estimator_ clf.fit(X_train, y_train) print('Test accuracy : %.3f' % clf.score(X_test, y_test))
Test accuracy : 0.974
入れ子式動作検証 (5 x 2 交差検証)
モデル同士の比較に使える
- サポートベクトルマシン
gs = GridSearchCV( estimator=pipe_svc, param_grid=param_grid, scoring='accuracy', cv=2, n_jobs=-1 ) scores = cross_val_score(gs, X_train, y_train, scoring='accuracy', cv=5) print('CV accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores)))
CV accuracy: 0.974 +/- 0.015
- 決定木分類器
from sklearn.tree import DecisionTreeClassifier gs = GridSearchCV( estimator=DecisionTreeClassifier(random_state=0), param_grid=[{'max_depth': [1, 2, 3, 4, 5, 6, 7, None]}], scoring='accuracy', cv=2, n_jobs=-1 ) scores = cross_val_score(gs, X_train, y_train, scoring='accuracy', cv=5) print('CV accuracy: %.3f +/- %.3f' % (np.mean(scores), np.std(scores)))
CV accuracy: 0.934 +/- 0.016