word2vec随记（word2vec中的bin文件转换为txt）

fromgensim.modelsimportword2vecmodel=word2vec.Word2Vec.load_word2vec_format('/home/ubuntu/word2vec/PubMed-w2v.bin',binary=True)model.save_word2vec_format('/home/ubuntu/word2vec/PubMed-w2v.txt',binary=

from gensim.models import word2vec model = word2vec.Word2Vec.load_word2vec_format('/home/ubuntu/word2vec/PubMed-w2v.bin', binary=True)model.save_word2vec_format('/home/ubuntu/word2vec/PubMed-w2v.txt', binary=False)

运行代码时提醒错误：

作为轻度强迫症的我，看到这个UserWarning 极为不爽快，于是就安装 Pattern, 也是各种安装不成功，网上搜索原因的时候发现：python2.x 才支持Pattern ，而我用的是python3.5.2, 不能因为个Warning退回Python2.x 于是我忍了。果然只是轻度强迫症

第二种方法（与第一种大同小异，不过也记录一下）

from gensim.models import word2vec model = word2vec.Word2Vec.load_word2vec_format('Path/to/GoogleNews-vectors-negative300.bin', binary=True)model.save("file.txt")

第三种方法（其实都一样啦）import codecsfrom gensim.models import Word2Vec def main(): path_to_model = 'GoogleNews-vectors-negative300.bin' output_file = 'GoogleNews-vectors-negative300_test.txt' export_to_file(path_to_model, output_file)def export_to_file(path_to_model, output_file): output = codecs.open(output_file, 'w' , 'utf-8') model = Word2Vec.load_word2vec_format(path_to_model, binary=True) print('done loading Word2Vec') vocab = model.vocab for mid in vocab: #print(model[mid]) #print(mid) vector = list() for dimension in model[mid]: vector.append(str(dimension)) #line = { "mid": mid, "vector": vector } vector_str = ",".join(vector) line = mid"\t"vector_str #line = json.dumps(line) output.write(line"\n") output.close() if __name__ == "__main__": main()