mean encoding の方法、Kfold

import pandas as pd
import numpy as np

index_cols = ['shop_id', 'item_id', 'cnt']
global_mean = 0.2

df = pd.read_csv(filename)

# groupby した
gb = df.groupby(index_cols,as_index=False).agg({'cnt':{'target':'sum'}})

#fix column names
gb.columns = [col[0] if col[-1]=='' else col[-1] for col in gb.columns.values]


# mean encoding
# 1.  計算後、gb に 代入
item_id_target_mean = gb.groupby('item_id').target.mean()
gb['item_target_enc'] = gb['item_id'].map(item_id_target_mean)

# 2.  計算結果を、そのまま gb に 代入
gb['item_target_enc'] = gb.groupby('item_id')['target'].transform('mean')

# 
gb['item_target_enc'].fillna(global_mean, inplace=True) 


from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=5, shuffle=False)

for tr_ind, val_ind in skf.split(gb.values, gb['target'].values):
    X_tr, X_val = gb.iloc[tr_ind], gb.iloc[val_ind]
    X_val['item_target_enc'] = X_tr.groupby('item_id')['target'].transform('mean')
    X_val['item_target_enc'].fillna(global_mean, inplace=True)