get_dummies を逆変換する方法

In [1]: import pandas as pd

In [2]: s = pd.Series(['a', 'b', 'a', 'c'])

In [3]: s
Out[3]: 
0    a
1    b
2    a
3    c
dtype: object

In [4]: dummies = pd.get_dummies(s)

In [5]: dummies
Out[5]: 
   a  b  c
0  1  0  0
1  0  1  0
2  1  0  0
3  0  0  1

In [6]: s2 = dummies.idxmax(axis=1)

In [7]: s2
Out[7]: 
0    a
1    b
2    a
3    c
dtype: object

In [8]: (s2 == s).all()
Out[8]: True

stackoverflow.com

curl で timeoutしたときのトラブルシューティング

以下のコマンドで一旦確認。

  • アプリケーションがある場合
curl -v http://...
telnet XXX.XXX.XXX.XXX 443

# 接続成功
telnet XXX.XXX.XXX.XXX 443
Trying XXX.XXX.XXX.XXX...
Connected to XXX.XXX.XXX.XXX.
Escape character is '^]'.

# 接続失敗
telnet XXX.XXX.XXX.XXX 443
Trying XXX.XXX.XXX.XXX...
telnet: connect to address XXX.XXX.XXX.XXX: Connection timed out

qiita.com

  • おまけ。sudo の挙動確認
sudo -l

tffm レコメンド性能高そう

github.com

$ pip install tffm

order とかよく挙動がわからない。パラメータがあるけど。 この辺、レコメンドエンジンに使えそう。

from sklearn.model_selection import train_test_split

X_tr, X_te, y_tr, y_te = train_test_split(df.values, df['tfidf'].values, random_state=42, test_size=0.3)


from tffm import TFFMRegressor
from sklearn.metrics import mean_squared_error

order = 3
model = TFFMRegressor(
    order=order, 
    rank=10, 
    optimizer=tf.train.AdamOptimizer(learning_rate=0.001), 
    n_epochs=50, 
    batch_size=1024,
    init_std=0.001,
    reg=0.01,
    input_type='sparse'
)
# translate Y from {0,1} to {-10, 10}
model.fit(X_tr_sparse, y_tr, show_progress=True)
predictions = model.predict(X_te_sparse)

import scipy.sparse as sp
# only CSR format supported

X_tr_sparse = sp.csr_matrix(X_tr)
X_te_sparse = sp.csr_matrix(X_te)


from sklearn.metrics import roc_auc_score, accuracy_score
print('MSE: {}'.format(mean_squared_error(y_te, predictions)))

taisablog.com

MySQL のテーブル更新情報

information_schema.tables に色々情報詰まってそう

mysql> SELECT TABLE_NAME,UPDATE_TIME FROM   information_schema.tables WHERE  TABLE_SCHEMA = 'dbname';
+---------------------+---------------------+
| TABLE_NAME          | UPDATE_TIME         |
+---------------------+---------------------+
| tabel_a       | 2019-01-30 15:40:04 |
+---------------------+---------------------+

stackoverflow.com

xgboost のstratifiedkfold は使っても意味ないのか

xgboost の training にstratifiedkfoldを使ってみた。

from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

param_grid = [{'min_child_weight': np.arange(0.1, 10.1, 0.1)}] 

import warnings
warnings.simplefilter('ignore', DeprecationWarning)
kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
_model = GridSearchCV(XGBClassifier(), param_grid, cv=kf.split(X, y), scoring= 'f1',iid=True)
_model.fit(X, y)
print (model.best_params_)
pred=_model.predict(X)
print('accuracy_score',accuracy_score(y,pred))
{'min_child_weight': 3.5000000000000004}
accuracy_score 0.8529741863075196
kf = StratifiedKFold(n_splits=10,random_state=1,shuffle=True)
clf = XGBClassifier(**_model.best_params_)
for i, (train_index,test_index) in enumerate(kf.split(X,y)):
    print('\n{} of kfold {}'.format(i,kf.n_splits))
    xtr,xvl = X.loc[train_index],X.loc[test_index]
    ytr,yvl = y[train_index],y[test_index]
    clf.fit(xtr, ytr,
            eval_set=[(xtr, ytr), (xvl, yvl)],
            eval_metric='logloss',
            verbose=False,
    )
    evals_result = clf.evals_result()
    print (_model.best_params_)
    pred=model.predict(xvl)
    print('accuracy_score',accuracy_score(yvl,pred))
0 of kfold 10
{'min_child_weight': 3.5000000000000004}
accuracy_score 0.8333333333333334

1 of kfold 10
{'min_child_weight': 3.5000000000000004}
accuracy_score 0.8666666666666667

2 of kfold 10
{'min_child_weight': 3.5000000000000004}
accuracy_score 0.8202247191011236

3 of kfold 10
{'min_child_weight': 3.5000000000000004}
accuracy_score 0.8539325842696629

4 of kfold 10
{'min_child_weight': 3.5000000000000004}
accuracy_score 0.8314606741573034

5 of kfold 10
{'min_child_weight': 3.5000000000000004}
accuracy_score 0.898876404494382

6 of kfold 10
{'min_child_weight': 3.5000000000000004}
accuracy_score 0.8764044943820225

7 of kfold 10
{'min_child_weight': 3.5000000000000004}
accuracy_score 0.8314606741573034

8 of kfold 10
{'min_child_weight': 3.5000000000000004}
accuracy_score 0.8764044943820225

9 of kfold 10
{'min_child_weight': 3.5000000000000004}
accuracy_score 0.8409090909090909