word2vecすごいぞ
結構すごい。。表記ゆれとかも吸収できそう。
from gensim.models import word2vec ls = [] for row in df_id['review_comment'].values[:100000]: ls.append(_split_to_rawwords(row)) model = word2vec.Word2Vec(ls, size=500, window=5, min_count=5, workers=4) model.wv.most_similar(positive=['エアコン']) ... model.save("./review.model") model = word2vec.Word2Vec.load("./review.model")