House Priceの分析4
XGBRegressorっていう、回帰モデルがあるので確認。 そもそも xgboost が結構界隈では有名らしい。
import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import Imputer data = pd.read_csv('kaggle/kaggle1/train.csv') data.isnull().any(axis=0) # Imputerは欠損値を mean(平均), median(中央値), mode(最頻値)のどれかに置き換える data.dropna(axis=0, subset=['SalePrice'], inplace=True) y = data.SalePrice X = data.drop(['SalePrice'], axis=1).select_dtypes(exclude=['object']) train_X, test_X, train_y, test_y = train_test_split(X.as_matrix(), y.as_matrix(), test_size=0.25) my_imputer = Imputer() train_X = my_imputer.fit_transform(train_X) test_X = my_imputer.transform(test_X) from xgboost import XGBRegressor my_model = XGBRegressor() # Add silent=True to avoid printing out updates with each cycle my_model.fit(train_X, train_y, verbose=False) # make predictions predictions = my_model.predict(test_X) from sklearn.metrics import mean_absolute_error print("Mean Absolute Error : " + str(mean_absolute_error(predictions, test_y))) my_model = XGBRegressor(n_estimators=1000) my_model.fit(train_X, train_y, early_stopping_rounds=5, eval_set=[(test_X, test_y)], verbose=False) my_model = XGBRegressor(n_estimators=1000, learning_rate=0.05) my_model.fit(train_X, train_y, early_stopping_rounds=5, eval_set=[(test_X, test_y)], verbose=False)