get_dummies を逆変換する方法
In [1]: import pandas as pd In [2]: s = pd.Series(['a', 'b', 'a', 'c']) In [3]: s Out[3]: 0 a 1 b 2 a 3 c dtype: object In [4]: dummies = pd.get_dummies(s) In [5]: dummies Out[5]: a b c 0 1 0 0 1 0 1 0 2 1 0 0 3 0 0 1 In [6]: s2 = dummies.idxmax(axis=1) In [7]: s2 Out[7]: 0 a 1 b 2 a 3 c dtype: object In [8]: (s2 == s).all() Out[8]: True
curl で timeoutしたときのトラブルシューティング
以下のコマンドで一旦確認。
- アプリケーションがある場合
curl -v http://...
telnet XXX.XXX.XXX.XXX 443 # 接続成功 telnet XXX.XXX.XXX.XXX 443 Trying XXX.XXX.XXX.XXX... Connected to XXX.XXX.XXX.XXX. Escape character is '^]'. # 接続失敗 telnet XXX.XXX.XXX.XXX 443 Trying XXX.XXX.XXX.XXX... telnet: connect to address XXX.XXX.XXX.XXX: Connection timed out
- おまけ。sudo の挙動確認
sudo -l
pandas の Time Series の分析
statsmodel の分析も結構使えそう
tffm レコメンド性能高そう
$ pip install tffm
order とかよく挙動がわからない。パラメータがあるけど。 この辺、レコメンドエンジンに使えそう。
from sklearn.model_selection import train_test_split X_tr, X_te, y_tr, y_te = train_test_split(df.values, df['tfidf'].values, random_state=42, test_size=0.3) from tffm import TFFMRegressor from sklearn.metrics import mean_squared_error order = 3 model = TFFMRegressor( order=order, rank=10, optimizer=tf.train.AdamOptimizer(learning_rate=0.001), n_epochs=50, batch_size=1024, init_std=0.001, reg=0.01, input_type='sparse' ) # translate Y from {0,1} to {-10, 10} model.fit(X_tr_sparse, y_tr, show_progress=True) predictions = model.predict(X_te_sparse) import scipy.sparse as sp # only CSR format supported X_tr_sparse = sp.csr_matrix(X_tr) X_te_sparse = sp.csr_matrix(X_te) from sklearn.metrics import roc_auc_score, accuracy_score print('MSE: {}'.format(mean_squared_error(y_te, predictions)))
MySQL のテーブル更新情報
information_schema.tables に色々情報詰まってそう
mysql> SELECT TABLE_NAME,UPDATE_TIME FROM information_schema.tables WHERE TABLE_SCHEMA = 'dbname'; +---------------------+---------------------+ | TABLE_NAME | UPDATE_TIME | +---------------------+---------------------+ | tabel_a | 2019-01-30 15:40:04 | +---------------------+---------------------+
xgboost のstratifiedkfold は使っても意味ないのか
xgboost の training にstratifiedkfoldを使ってみた。
from xgboost import XGBClassifier from sklearn.model_selection import GridSearchCV from sklearn.model_selection import StratifiedKFold from sklearn.metrics import accuracy_score param_grid = [{'min_child_weight': np.arange(0.1, 10.1, 0.1)}] import warnings warnings.simplefilter('ignore', DeprecationWarning)
kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=1) _model = GridSearchCV(XGBClassifier(), param_grid, cv=kf.split(X, y), scoring= 'f1',iid=True) _model.fit(X, y) print (model.best_params_) pred=_model.predict(X) print('accuracy_score',accuracy_score(y,pred))
{'min_child_weight': 3.5000000000000004} accuracy_score 0.8529741863075196
kf = StratifiedKFold(n_splits=10,random_state=1,shuffle=True) clf = XGBClassifier(**_model.best_params_) for i, (train_index,test_index) in enumerate(kf.split(X,y)): print('\n{} of kfold {}'.format(i,kf.n_splits)) xtr,xvl = X.loc[train_index],X.loc[test_index] ytr,yvl = y[train_index],y[test_index] clf.fit(xtr, ytr, eval_set=[(xtr, ytr), (xvl, yvl)], eval_metric='logloss', verbose=False, ) evals_result = clf.evals_result() print (_model.best_params_) pred=model.predict(xvl) print('accuracy_score',accuracy_score(yvl,pred))
0 of kfold 10 {'min_child_weight': 3.5000000000000004} accuracy_score 0.8333333333333334 1 of kfold 10 {'min_child_weight': 3.5000000000000004} accuracy_score 0.8666666666666667 2 of kfold 10 {'min_child_weight': 3.5000000000000004} accuracy_score 0.8202247191011236 3 of kfold 10 {'min_child_weight': 3.5000000000000004} accuracy_score 0.8539325842696629 4 of kfold 10 {'min_child_weight': 3.5000000000000004} accuracy_score 0.8314606741573034 5 of kfold 10 {'min_child_weight': 3.5000000000000004} accuracy_score 0.898876404494382 6 of kfold 10 {'min_child_weight': 3.5000000000000004} accuracy_score 0.8764044943820225 7 of kfold 10 {'min_child_weight': 3.5000000000000004} accuracy_score 0.8314606741573034 8 of kfold 10 {'min_child_weight': 3.5000000000000004} accuracy_score 0.8764044943820225 9 of kfold 10 {'min_child_weight': 3.5000000000000004} accuracy_score 0.8409090909090909