本文整理汇总了Python中gensim.models.Word2Vec方法的典型用法代码示例。如果您正苦于以下问题:Python models.Word2Vec方法的具体用法?Python models.Word2Vec怎么用?Python models.Word2Vec使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.models
的用法示例。
在下文中一共展示了models.Word2Vec方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: skipgram_baseline
# 需要导入模块: from gensim import models [as 别名]
# 或者: from gensim.models import Word2Vec [as 别名]
def skipgram_baseline(graph, **kwargs):
scale = kwargs.get('scale', -1)
representation_size = kwargs.get('representation_size', 128)
if scale == 1:
edges, weights = graph.get_edges()
else:
path_length = kwargs.get('path_length', 40)
num_paths = kwargs.get('num_paths', 80)
output = kwargs.get('output', 'default')
edges = graph_coarsening.build_deepwalk_corpus(graph, num_paths, path_length, output)
if kwargs['hs'] == 0:
print ('Training the Negative Sampling Model...')
model = Word2Vec(edges, size=representation_size, window=kwargs['window_size'], min_count=0, sg=1, hs=0, iter=kwargs['iter_count'], negative=5, workers=20)
else:
print ('Training the Hierarchical Softmax Model...')
model = Word2Vec(edges, size=kwargs['representation_size'], window=kwargs['window_size'], min_count=0, sg=1, hs=1, iter=kwargs['iter_count'], workers=20)
print ('Finish training the Skip-gram model.')
return model
示例2: sum_trigram
# 需要导入模块: from gensim import models [as 别名]
# 或者: from gensim.models import Word2Vec [as 别名]
def sum_trigram(sent, model):
sent = sent.split()
first = True
second = True
tot = 0
for i in range(len(sent)):
try:
if first:
tot += model[None, None][sent[i]]
first = False
elif second:
tot += model[None, sent[i-1]][sent[i]]
second = False
else:
tot += model[sent[i-2], sent[i-1]][sent[i]]
except:
continue
return tot
#Word2Vec Training(Returns Vector):
示例3: train_save
# 需要导入模块: from gensim import models [as 别名]
# 或者: from gensim.models import Word2Vec [as 别名]
def train_save(self, list_csv):
sentences = MySentences(list_csv)
num_features = 256
min_word_count = 1
num_workers = 20
context = 5
epoch = 20
sample = 1e-5
model = Word2Vec(
sentences,
size=num_features,
min_count=min_word_count,
workers=num_workers,
sample=sample,
window=context,
iter=epoch,
)
#model.save(model_fn)
return model
示例4: train
# 需要导入模块: from gensim import models [as 别名]
# 或者: from gensim.models import Word2Vec [as 别名]
def train(self, embed_size=128, window_size=5, workers=3, iter=5, **kwargs):
kwargs["sentences"] = self.sentences
kwargs["min_count"] = kwargs.get("min_count", 0)
kwargs["size"] = embed_size
kwargs["sg"] = 1 # skip gram
kwargs["hs"] = 1 # deepwalk use Hierarchical Softmax
kwargs["workers"] = workers
kwargs["window"] = window_size
kwargs["iter"] = iter
print("Learning embedding vectors...")
model = Word2Vec(**kwargs)
print("Learning embedding vectors done!")
self.w2v_model = model
return model
示例5: train
# 需要导入模块: from gensim import models [as 别名]
# 或者: from gensim.models import Word2Vec [as 别名]
def train(self, embed_size=128, window_size=5, workers=3, iter=5, **kwargs):
kwargs["sentences"] = self.sentences
kwargs["min_count"] = kwargs.get("min_count", 0)
kwargs["size"] = embed_size
kwargs["sg"] = 1
kwargs["hs"] = 0 # node2vec not use Hierarchical Softmax
kwargs["workers"] = workers
kwargs["window"] = window_size
kwargs["iter"] = iter
print("Learning embedding vectors...")
model = Word2Vec(**kwargs)
print("Learning embedding vectors done!")
self.w2v_model = model
return model
示例6: train_word2vec_by_word
# 需要导入模块: from gensim import models [as 别名]
# 或者: from gensim.models import Word2Vec [as 别名]
def train_word2vec_by_word():
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')
logging.root.setLevel(level=logging.INFO)
logging.info("running")
inp = "cut_zhwiki_wiki_parse.txt"
outp1 = "w2v_model_wiki.model"
outp2 = "w2v_model_wiki_word.vec"
print(multiprocessing.cpu_count())
model = Word2Vec(LineSentence(inp), size=300, window=10,
# 这里用skip-heriber
min_count=1, sg=1, hs=1, iter=10, workers=multiprocessing.cpu_count())
model.save(outp1)
model.wv.save_word2vec_format(outp2, binary=False)
示例7: learn_base_embedding
# 需要导入模块: from gensim import models [as 别名]
# 或者: from gensim.models import Word2Vec [as 别名]
def learn_base_embedding(self):
"""
Learning an embedding of nodes in the base graph.
:return self.embedding: Embedding of nodes in the latent space.
"""
self.paths = [[str(node) for node in walk] for walk in self.paths]
model = Word2Vec(self.paths,
size=self.args.dimensions,
window=self.args.window_size,
min_count=1,
sg=1,
workers=self.args.workers,
iter=1)
self.embedding = np.array([list(model[str(n)]) for n in self.graph.nodes()])
return self.embedding
示例8: build
# 需要导入模块: from gensim import models [as 别名]
# 或者: from gensim.models import Word2Vec [as 别名]
def build(train_seg_path, test_seg_path, out_path=None, sentence_path='',
w2v_bin_path="w2v.bin", min_count=1, col_sep='\t'):
sentences = extract_sentence(train_seg_path, test_seg_path, col_sep=col_sep)
save_sentence(sentences, sentence_path)
print('train w2v model...')
# train model
w2v = Word2Vec(sg=1, sentences=LineSentence(sentence_path),
size=256, window=5, min_count=min_count, iter=40)
w2v.wv.save_word2vec_format(w2v_bin_path, binary=True)
print("save %s ok." % w2v_bin_path)
# test
# sim = w2v.wv.similarity('大', '小')
# print('大 vs 小 similarity score:', sim)
# load model
model = KeyedVectors.load_word2vec_format(w2v_bin_path, binary=True)
word_dict = {}
for word in model.vocab:
word_dict[word] = model[word]
save_pkl(word_dict, out_path, overwrite=True)
示例9: train
# 需要导入模块: from gensim import models [as 别名]
# 或者: from gensim.models import Word2Vec [as 别名]
def train(self, G):
self.G = G
walks = self._simulate_walks(self.walk_length, self.walk_num)
walks = [[str(node) for node in walk] for walk in walks]
model = Word2Vec(
walks,
size=self.dimension,
window=self.window_size,
min_count=0,
sg=1,
workers=self.worker,
iter=self.iteration,
)
id2node = dict([(vid, node) for vid, node in enumerate(G.nodes())])
embeddings = np.asarray([model[str(id2node[i])] for i in range(len(id2node))])
return embeddings
示例10: train
# 需要导入模块: from gensim import models [as 别名]
# 或者: from gensim.models import Word2Vec [as 别名]
def train(self, G):
self.G = G
is_directed = nx.is_directed(self.G)
for i, j in G.edges():
G[i][j]["weight"] = G[i][j].get("weight", 1.0)
if not is_directed:
G[j][i]["weight"] = G[j][i].get("weight", 1.0)
self._preprocess_transition_probs()
walks = self._simulate_walks(self.walk_num, self.walk_length)
walks = [[str(node) for node in walk] for walk in walks]
model = Word2Vec(
walks,
size=self.dimension,
window=self.window_size,
min_count=0,
sg=1,
workers=self.worker,
iter=self.iteration,
)
id2node = dict([(vid, node) for vid, node in enumerate(G.nodes())])
self.embeddings = np.asarray(
[model[str(id2node[i])] for i in range(len(id2node))]
)
return self.embeddings
示例11: learn_pooled_embeddings
# 需要导入模块: from gensim import models [as 别名]
# 或者: from gensim.models import Word2Vec [as 别名]
def learn_pooled_embeddings(walks, counts, args):
"""
Method to learn an embedding given the sequences and arguments.
:param walks: Linear vertex sequences.
:param counts: Number of nodes.
:param args: Arguments.
"""
model = Word2Vec(walks,
size=args.dimensions,
window=args.window_size,
min_count=1,
sg=1,
workers=args.workers,
iter=args.iter,
alpha=args.alpha)
save_embedding(args, model, counts)
示例12: uptrain
# 需要导入模块: from gensim import models [as 别名]
# 或者: from gensim.models import Word2Vec [as 别名]
def uptrain(corpus,
model_path=None,
binary=True,
lockf=0.0,
min_count=1,
size=300,
**word2vec_params):
wv = Word2Vec(min_count=min_count, size=size, **word2vec_params)
print("Building vocabulary...")
wv.build_vocab(corpus)
print("Found %d distinct words." % len(wv.index2word))
if model_path is not None:
print("Intersecting with", model_path, "...")
wv.intersect_word2vec_format(model_path, binary=binary, lockf=lockf)
print("Intersected vectors locked with", lockf)
total_examples = len(corpus)
print("Training on %d documents..." % total_examples)
wv.train(corpus, total_examples=total_examples)
return wv
示例13: learn_embeddings
# 需要导入模块: from gensim import models [as 别名]
# 或者: from gensim.models import Word2Vec [as 别名]
def learn_embeddings(self, output):
"""
Learn embeddings by optimizing the Skipgram objective using SGD.
"""
walks = self._simulate_walks() # simulate random walks
model = Word2Vec(walks, size=self.dimensions, window=self.window_size, min_count=0,
workers=self.workers, iter=self.iter, negative=25, sg=1)
print("defined model using w2v")
model.wv.save_word2vec_format(output, binary=True)
# free memory
del walks
self.alias_nodes = None
self.alias_edges = None
self.G = None
print("saved model in word2vec binary format")
return
示例14: train
# 需要导入模块: from gensim import models [as 别名]
# 或者: from gensim.models import Word2Vec [as 别名]
def train(self, G):
self.G = G
walks = self._simulate_walks(self.walk_length, self.walk_num)
walks = [[str(node) for node in walk] for walk in walks]
model = Word2Vec(
walks,
size=self.dimension,
window=self.window_size,
min_count=0,
sg=1,
workers=self.worker,
iter=self.iteration,
)
id2node = dict([(vid, node) for vid, node in enumerate(G.nodes())])
embeddings = np.asarray([model.wv[str(id2node[i])] for i in range(len(id2node))])
return embeddings
示例15: train
# 需要导入模块: from gensim import models [as 别名]
# 或者: from gensim.models import Word2Vec [as 别名]
def train(self, G):
self.G = G
is_directed = nx.is_directed(self.G)
for i, j in G.edges():
G[i][j]["weight"] = G[i][j].get("weight", 1.0)
if not is_directed:
G[j][i]["weight"] = G[j][i].get("weight", 1.0)
self._preprocess_transition_probs()
walks = self._simulate_walks(self.walk_num, self.walk_length)
walks = [[str(node) for node in walk] for walk in walks]
model = Word2Vec(
walks,
size=self.dimension,
window=self.window_size,
min_count=0,
sg=1,
workers=self.worker,
iter=self.iteration,
)
id2node = dict([(vid, node) for vid, node in enumerate(G.nodes())])
self.embeddings = np.asarray(
[model.wv[str(id2node[i])] for i in range(len(id2node))]
)
return self.embeddings