데이터분석/Machine Learning
gensim word2vec simple usage
늘근이
2018. 12. 16. 21:29
from gensim.models import word2vec
token = [['나는','너를', '사랑해'],['나도','너를','사랑해']]
embedding = word2vec.Word2Vec(token, size=5, window=1, negative=3, min_count=1)
embedding.save('model') #모델 저장
embedding.wv.save_word2vec_format('my.embedding', binary=False) #모델 저장
embedding.wv['너를']
embedding.most_similar('너를')
from gensim.models.keyedvectors import KeyedVectors
embedding.wv.save_word2vec_format('my.embedding', binary=False) #모델 저장
model = KeyedVectors.load_word2vec_format('my.embedding', binary=False, encoding='utf-8')