本文整理汇总了Python中gensim.models.Word2Vec.load方法的典型用法代码示例。如果您正苦于以下问题:Python Word2Vec.load方法的具体用法?Python Word2Vec.load怎么用?Python Word2Vec.load使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.models.Word2Vec
的用法示例。
在下文中一共展示了Word2Vec.load方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: embedding_sentences
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def embedding_sentences(sentences, embedding_size = 128, window = 5, min_count = 5, file_to_load = None, file_to_save = None):
if file_to_load is not None:
w2vModel = Word2Vec.load(file_to_load)
else:
w2vModel = Word2Vec(sentences, size = embedding_size, window = window, min_count = min_count, workers = multiprocessing.cpu_count())
if file_to_save is not None:
w2vModel.save(file_to_save)
all_vectors = []
embeddingDim = w2vModel.vector_size
embeddingUnknown = [0 for i in range(embeddingDim)]
for sentence in sentences:
this_vector = []
for word in sentence:
if word in w2vModel.wv.vocab:
this_vector.append(w2vModel[word])
else:
this_vector.append(embeddingUnknown)
all_vectors.append(this_vector)
return all_vectors
示例2: __init__
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def __init__(self, test_model=False, verify_model=True):
model = Word2Vec.load(modelfile)
if(test_model):
acc = model.accuracy(questionfile)
logger.info("Test model " + modelfile + " in " + questionfile)
self.vector_size = model.vector_size
self.vocab_size = len(model.wv.vocab) + 1
self.word2index = self.GetWord2Index(model)
self.index2word = self.GetIndex2Word(model)
self.wordvector = self.GetWordVector(model)
if(verify_model):
logger.info("Verifing imported word2vec model")
random_state = check_random_state(12)
check_index = random_state.randint(low=0, high=self.vocab_size-2,size=1000)
for index in check_index:
word_wv = model.wv.index2word[index]
word_our = self.index2word[index+1]
#print(index, word_wv, word_our)
assert word_wv == word_our
assert model.wv.vocab[word_our].index == self.word2index[word_our] - 1
assert np.array_equal(model.wv[word_our], self.wordvector[self.word2index[word_our]])
logger.info("Imported word2vec model is verified")
示例3: compute_epoch_accuracies
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def compute_epoch_accuracies(root, prefix, analogy_file):
filenames = glob.glob(os.path.join(root, prefix+"_epoch*.model"))
nr_epochs = len(filenames)
accuracies = dict()
losses = [0] * nr_epochs
for filename in filenames:
epoch = int(re.search("\d+\.model", filename).group()[:-6])
m = Word2Vec.load(filename)
losses[epoch] = m.get_latest_training_loss()
sections = m.wv.accuracy(analogy_file)
for sec in sections:
if sec["section"] not in accuracies:
accuracies[sec["section"]] = [0] * nr_epochs
correct, incorrect = len(sec["correct"]), len(sec["incorrect"])
if incorrect > 0:
accuracy = correct / (correct + incorrect)
else:
accuracy = 0
accuracies[sec["section"]][epoch] = (correct, incorrect, accuracy)
save_obj(accuracies, os.path.join("models", prefix + "_accuracies"))
save_obj(np.concatenate([np.array([losses[0]]), np.diff(losses)]), os.path.join("models", prefix + "_loss"))
示例4: get_embedding_matrix
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def get_embedding_matrix(model_filepath, word2id):
"""
Get the embedding matrix of the word2vec model
:param model_filepath: the file path to the pre-build word2vec model
:param word2id: the directory mapping from word to id
:return: the embedding matrix of the word2vec model
"""
word2vec_model = Word2Vec.load(model_filepath)
embeddings_dict = __get_embedding_dict(model_filepath)
embedding_matrix = np.zeros((len(word2id) + 1, word2vec_model.vector_size))
for word, idx in word2id.items():
embedding_vector = embeddings_dict.get(word)
if embedding_vector is not None:
embedding_matrix[idx] = embedding_vector
return embedding_matrix
示例5: load_word_embeddings
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def load_word_embeddings(self, vecs_fname, method):
if method == "word2vec":
model = Word2Vec.load(vecs_fname)
words = model.wv.index2word
vecs = model.wv.vectors
else:
words, vecs = [], []
with open(vecs_fname, 'r', encoding='utf-8') as f1:
if "fasttext" in method:
next(f1) # skip head line
for line in f1:
if method == "swivel":
splited_line = line.replace("\n", "").strip().split("\t")
else:
splited_line = line.replace("\n", "").strip().split(" ")
words.append(splited_line[0])
vec = [float(el) for el in splited_line[1:]]
vecs.append(vec)
return words, vecs
示例6: load_vectors
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def load_vectors(self, vecs_fname, method):
if method == "word2vec":
model = Word2Vec.load(vecs_fname)
words = model.wv.index2word
vecs = model.wv.vectors
else:
words, vecs = [], []
with open(vecs_fname, 'r', encoding='utf-8') as f:
if "fasttext" in method:
next(f) # skip head line
for line in f:
if method == "swivel":
splited_line = line.strip().split("\t")
else:
splited_line = line.strip().split(" ")
words.append(splited_line[0])
vec = [float(el) for el in splited_line[1:]]
vecs.append(vec)
unit_vecs = normalize(vecs, norm='l2', axis=1)
dictionary = {}
for word, vec in zip(words, unit_vecs):
dictionary[word] = vec
return dictionary, words, unit_vecs
示例7: _check_men
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def _check_men(args):
"""Check embeddings quality.
Calculate correlation with the similarity ratings in the MEN dataset.
"""
logger.info('Checking embeddings quality against MEN similarity ratings')
logger.info('Loading word2vec model...')
model = Word2Vec.load(args.w2v_model)
logger.info('Model loaded')
system_actual = []
# This is needed because we may not be able to calculate cosine for
# all pairs
human_actual = []
count = 0
for (first, second), human in Samples(source='men', shuffle=False):
if first not in model.wv.vocab or second not in model.wv.vocab:
logger.error('Could not find one of more pair item in model '
'vocabulary: {}, {}'.format(first, second))
continue
sim = _cosine_similarity(model.wv[first], model.wv[second])
system_actual.append(sim)
human_actual.append(human)
count += 1
spr = _spearman(human_actual, system_actual)
logger.info('SPEARMAN: {} calculated over {} items'.format(spr, count))
示例8: train_model
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def train_model(corpus, size=200, window=5, workers=3, model_path=None,
word_freq=None, corpus_count=None):
"""Train using Skipgram model.
Args:
corpus (str): file path of corpus
size (int): embedding size (default=200)
window (int): window size (default=5)
workers (int): number of workers (default=3)
model_path (str): file path of model we want to update
word_freq (dict): dictionary of word frequencies
corpus_count (int): corpus size
Returns:
Word2Vec: word2vec model
"""
sentences = LineSentence(corpus)
if model_path is not None:
logger.info("Updating pre-existing model: %s", model_path)
assert os.path.isfile(model_path), "File does not exist"
model = Word2Vec.load(model_path)
model.build_vocab(sentences, update=True)
model.train(sentences, total_examples=model.corpus_count,
epochs=model.iter)
else:
model = Skipgram(sentences=sentences, size=size, window=window,
min_count=1, workers=workers, raw_vocab=word_freq,
corpus_count=corpus_count)
return model
示例9: build_phrase
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def build_phrase(doc):
# load phrase model
return trigram[bigram[doc]]
示例10: load_phrase
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def load_phrase():
global bigram
global trigram
bigram = Phrases.load(os.path.join("..", "model", "bigram.model"))
trigram = Phrases.load(os.path.join("..", "model", "trigram.model"))
示例11: load_obj
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def load_obj(filename):
with open(filename) as fin:
return cPickle.load(fin)
示例12: get_weights_word2vec
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def get_weights_word2vec(word2idx, w2vfile, w2v_embed_size=300,
is_custom=False):
word2vec = None
if is_custom:
word2vec = Word2Vec.load(w2vfile)
else:
word2vec = Word2Vec.load_word2vec_format(w2vfile, binary=True)
vocab_size = len(word2idx) + 1
embedding_weights = np.zeros((vocab_size, w2v_embed_size))
for word, index in word2idx.items():
try:
embedding_weights[index, :] = word2vec[word.lower()]
except KeyError:
pass # keep as zero (not ideal, but what else can we do?)
return embedding_weights
示例13: load_obj
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def load_obj(name):
with open(name + ".pkl", "rb") as f:
return pickle.load(f)
示例14: load_tag2id
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def load_tag2id(tag2id_filepath):
"""
Load the dictionary mapping from tag to id
:param tag2id_filepath: the file path to the pre-built dictionary
:return: the dictionary mapping from tag to id
"""
with open(tag2id_filepath, "rb") as fr:
tag2id = pickle.load(fr)
return tag2id
示例15: load_vocab
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def load_vocab(vocab_filepath):
"""
Load the dictionary mapping from word to id
:param vocab_filepath: the file path to the pre-built dictionary
:return: the dictionary mapping from word to id
"""
with open(vocab_filepath, "rb") as fr:
word2id = pickle.load(fr)
return word2id