本文整理汇总了Python中gensim.models.word2vec.Word2Vec方法的典型用法代码示例。如果您正苦于以下问题:Python word2vec.Word2Vec方法的具体用法?Python word2vec.Word2Vec怎么用?Python word2vec.Word2Vec使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.models.word2vec
的用法示例。
在下文中一共展示了word2vec.Word2Vec方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: creat_dict
# 需要导入模块: from gensim.models import word2vec [as 别名]
# 或者: from gensim.models.word2vec import Word2Vec [as 别名]
def creat_dict(texts_cut=None,
sg=1,
size=128,
window=5,
min_count=1):
'''
训练词向量模型词典
:param texts_cut: Word list of texts
:param sg: 0 CBOW,1 skip-gram
:param size: The dimensionality of the feature vectors
:param window: The maximum distance between the current and predicted word within a sentence
:param min_count: Ignore all words with total frequency lower than this
:return:
'''
model_word2vec = word2vec.Word2Vec(texts_cut, sg=sg, size=size, window=window, min_count=min_count)
return model_word2vec
示例2: _default_word2vec_model
# 需要导入模块: from gensim.models import word2vec [as 别名]
# 或者: from gensim.models.word2vec import Word2Vec [as 别名]
def _default_word2vec_model(self):
from gensim.models import word2vec
return word2vec.Word2Vec(size=100,
alpha=0.025,
window=5,
min_count=5,
max_vocab_size=None,
sample=0,
seed=1,
workers=1,
min_alpha=0.0001,
sg=1,
hs=1,
negative=0,
cbow_mean=0,
iter=1,
null_word=0,
trim_rule=None,
sorted_vocab=1)
示例3: __init__
# 需要导入模块: from gensim.models import word2vec [as 别名]
# 或者: from gensim.models.word2vec import Word2Vec [as 别名]
def __init__(self, corpus, word2vec_model=None):
'''
Parameters
----------
corpus: ParsedCorpus
from which to build word2vec model
word2vec_model: word2vec.Word2Vec
Gensim instance to be used to train word2vec model
'''
try:
from gensim.models import word2vec
assert word2vec_model is None or isinstance(word2vec_model, word2vec.Word2Vec)
except:
warnings.warn("You should really install gensim, but we're going to duck-type your model and pray it works")
assert isinstance(corpus, ParsedCorpus)
self.corpus = corpus
self.model = self._get_word2vec_model(word2vec_model)
示例4: embed_category
# 需要导入模块: from gensim.models import word2vec [as 别名]
# 或者: from gensim.models.word2vec import Word2Vec [as 别名]
def embed_category(self, category, model=None):
'''
:param model: gensim word2vec.Word2Vec model
:param term_acceptance_re : SRE_Pattern, Regular expression to identify
valid terms, default re.compile('[a-z]{3,}')
:return: EmbeddingsResolver
'''
self._verify_category(category)
if self.term_acceptance_re is not None:
acceptable_terms = set([t for t in self.corpus_.get_terms() if self.term_acceptance_re.match(t)])
else:
acceptable_terms = set(self.corpus_.get_terms())
trained_model = CategorySpecificWord2VecFromParsedCorpus(self.corpus_, category, model).train()
self.category_word2vec_model_[category] = trained_model
word2dwe = {word: trained_model[word] for word in trained_model.wv.vocab.keys()}
self.category_embeddings_[category] = word2dwe
return self
示例5: load_trainsform
# 需要导入模块: from gensim.models import word2vec [as 别名]
# 或者: from gensim.models.word2vec import Word2Vec [as 别名]
def load_trainsform(self,X):
"""
载入模型,并且生成wv向量
:param X:读入的文档,list
:return:np.array
"""
print '载入模型中'
model = word2vec.Word2Vec.load('20w_size_win100_300.model') #填写你的路径
print '加载成功'
res=np.zeros((len(X),self.size))
print '生成w2v向量中..'
for i,line in enumerate(X):
line=line.decode('utf-8')
terms=line.split()
count=0
for j,term in enumerate(terms):
try:#---try失败说明X中有单词不在model中,训练的时候model的模型是min_count的 忽略了一部分单词
count += 1
res[i]+=np.array(model[term])
except:
1 == 1
if count!=0:
res[i]=res[i]/float(count) # 求均值
return res
示例6: testTrainingCbow
# 需要导入模块: from gensim.models import word2vec [as 别名]
# 或者: from gensim.models.word2vec import Word2Vec [as 别名]
def testTrainingCbow(self):
"""Test CBOW word2vec training."""
# to test training, make the corpus larger by repeating its sentences over and over
# build vocabulary, don't train yet
model = word2vec.Word2Vec(size=2, min_count=1, sg=0)
model.build_vocab(sentences)
self.assertTrue(model.syn0.shape == (len(model.vocab), 2))
self.assertTrue(model.syn1.shape == (len(model.vocab), 2))
model.train(sentences)
sims = model.most_similar('graph', topn=10)
# self.assertTrue(sims[0][0] == 'trees', sims) # most similar
# test querying for "most similar" by vector
graph_vector = model.syn0norm[model.vocab['graph'].index]
sims2 = model.most_similar(positive=[graph_vector], topn=11)
self.assertEqual(sims, sims2[1:]) # ignore first element of sims2, which is 'graph' itself
# build vocab and train in one step; must be the same as above
model2 = word2vec.Word2Vec(sentences, size=2, min_count=1, sg=0)
self.models_equal(model, model2)
示例7: testTrainingSgNegative
# 需要导入模块: from gensim.models import word2vec [as 别名]
# 或者: from gensim.models.word2vec import Word2Vec [as 别名]
def testTrainingSgNegative(self):
"""Test skip-gram (negative sampling) word2vec training."""
# to test training, make the corpus larger by repeating its sentences over and over
# build vocabulary, don't train yet
model = word2vec.Word2Vec(size=2, min_count=1, hs=0, negative=2)
model.build_vocab(sentences)
self.assertTrue(model.syn0.shape == (len(model.vocab), 2))
self.assertTrue(model.syn1neg.shape == (len(model.vocab), 2))
model.train(sentences)
sims = model.most_similar('graph', topn=10)
# self.assertTrue(sims[0][0] == 'trees', sims) # most similar
# test querying for "most similar" by vector
graph_vector = model.syn0norm[model.vocab['graph'].index]
sims2 = model.most_similar(positive=[graph_vector], topn=11)
self.assertEqual(sims, sims2[1:]) # ignore first element of sims2, which is 'graph' itself
# build vocab and train in one step; must be the same as above
model2 = word2vec.Word2Vec(sentences, size=2, min_count=1, hs=0, negative=2)
self.models_equal(model, model2)
示例8: testTrainingCbowNegative
# 需要导入模块: from gensim.models import word2vec [as 别名]
# 或者: from gensim.models.word2vec import Word2Vec [as 别名]
def testTrainingCbowNegative(self):
"""Test CBOW (negative sampling) word2vec training."""
# to test training, make the corpus larger by repeating its sentences over and over
# build vocabulary, don't train yet
model = word2vec.Word2Vec(size=2, min_count=1, sg=0, hs=0, negative=2)
model.build_vocab(sentences)
self.assertTrue(model.syn0.shape == (len(model.vocab), 2))
self.assertTrue(model.syn1neg.shape == (len(model.vocab), 2))
model.train(sentences)
sims = model.most_similar('graph', topn=10)
# self.assertTrue(sims[0][0] == 'trees', sims) # most similar
# test querying for "most similar" by vector
graph_vector = model.syn0norm[model.vocab['graph'].index]
sims2 = model.most_similar(positive=[graph_vector], topn=11)
self.assertEqual(sims, sims2[1:]) # ignore first element of sims2, which is 'graph' itself
# build vocab and train in one step; must be the same as above
model2 = word2vec.Word2Vec(sentences, size=2, min_count=1, sg=0, hs=0, negative=2)
self.models_equal(model, model2)
示例9: fit
# 需要导入模块: from gensim.models import word2vec [as 别名]
# 或者: from gensim.models.word2vec import Word2Vec [as 别名]
def fit(self, graph):
"""
Fitting a Diff2Vec model.
Arg types:
* **graph** *(NetworkX graph)* - The graph to be embedded.
"""
self._set_seed()
self._check_graph(graph)
diffuser = EulerianDiffuser(self.diffusion_number, self.diffusion_cover)
diffuser.do_diffusions(graph)
model = Word2Vec(diffuser.diffusions,
hs=1,
alpha=self.learning_rate,
iter=self.epochs,
size=self.dimensions,
window=self.window_size,
min_count=self.min_count,
workers=self.workers,
seed=self.seed)
num_of_nodes = graph.number_of_nodes()
self._embedding = [model[str(n)] for n in range(num_of_nodes)]
示例10: fit
# 需要导入模块: from gensim.models import word2vec [as 别名]
# 或者: from gensim.models.word2vec import Word2Vec [as 别名]
def fit(self, graph):
"""
Fitting a DeepWalk model.
Arg types:
* **graph** *(NetworkX graph)* - The graph to be embedded.
"""
self._set_seed()
self._check_graph(graph)
walker = RandomWalker(self.walk_length, self.walk_number)
walker.do_walks(graph)
model = Word2Vec(walker.walks,
hs=1,
alpha=self.learning_rate,
iter=self.epochs,
size=self.dimensions,
window=self.window_size,
min_count=self.min_count,
workers=self.workers,
seed=self.seed)
num_of_nodes = graph.number_of_nodes()
self._embedding = [model[str(n)] for n in range(num_of_nodes)]
示例11: create_embedding
# 需要导入模块: from gensim.models import word2vec [as 别名]
# 或者: from gensim.models.word2vec import Word2Vec [as 别名]
def create_embedding(self):
"""
Creating a multi-scale embedding.
"""
self.embedding = []
for index in range(1, self.args.window_size+1):
print("\nOptimization round: "+str(index)+"/"+str(self.args.window_size)+".")
print("Creating documents.")
clean_documents = self.walk_extracts(index)
print("Fitting model.")
model = Word2Vec(clean_documents,
size=self.args.dimensions,
window=1,
min_count=self.args.min_count,
sg=1,
workers=self.args.workers)
new_embedding = self.get_embedding(model)
self.embedding = self.embedding + [new_embedding]
self.embedding = np.concatenate(self.embedding, axis=1)
示例12: get_embedding_matrix
# 需要导入模块: from gensim.models import word2vec [as 别名]
# 或者: from gensim.models.word2vec import Word2Vec [as 别名]
def get_embedding_matrix(model_filepath, word2id):
"""
Get the embedding matrix of the word2vec model
:param model_filepath: the file path to the pre-build word2vec model
:param word2id: the directory mapping from word to id
:return: the embedding matrix of the word2vec model
"""
word2vec_model = Word2Vec.load(model_filepath)
embeddings_dict = __get_embedding_dict(model_filepath)
embedding_matrix = np.zeros((len(word2id) + 1, word2vec_model.vector_size))
for word, idx in word2id.items():
embedding_vector = embeddings_dict.get(word)
if embedding_vector is not None:
embedding_matrix[idx] = embedding_vector
return embedding_matrix
示例13: predictData
# 需要导入模块: from gensim.models import word2vec [as 别名]
# 或者: from gensim.models.word2vec import Word2Vec [as 别名]
def predictData():
"""
使用模型预测真实数据
"""
input_texts = ["很好很满意","不好不满意","质量有问题","商家态度很差","售后很渣,渣渣"]
# word_model = word2vec.Word2Vec.load('./models/Word2vec_model.model')
# w2indx, w2vec, texts = create_dictionaries(word_model, texts)
# print(texts)
texts = predict_wordtoVect(input_texts)
model = get_model()
# # 预测
pred_result = model.predict_classes(texts)
print(pred_result)
labels = [int(round(x[0])) for x in pred_result]
label2word = {1: '正面', 0: '负面'}
for i in range(len(pred_result)):
print('{0} -------- {1}'.format(label2word[labels[i]], input_texts[i]))
示例14: main
# 需要导入模块: from gensim.models import word2vec [as 别名]
# 或者: from gensim.models.word2vec import Word2Vec [as 别名]
def main():
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
sentences = word2vec.LineSentence("wiki_seg.txt")
model = word2vec.Word2Vec(sentences, size=250)
#保存模型,供日後使用
model.save(u"word2vec.model")
#模型讀取方式
# model = word2vec.Word2Vec.load("your_model_name")
示例15: main
# 需要导入模块: from gensim.models import word2vec [as 别名]
# 或者: from gensim.models.word2vec import Word2Vec [as 别名]
def main():
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
sentences = word2vec.LineSentence("wiki_seg.txt")
model = word2vec.Word2Vec(sentences, size=250)
#保存模型,供日後使用
model.save("word2vec.model")
#模型讀取方式
# model = word2vec.Word2Vec.load("your_model_name")