本文整理汇总了Python中gensim.models.Word2Vec.load_word2vec_format方法的典型用法代码示例。如果您正苦于以下问题:Python Word2Vec.load_word2vec_format方法的具体用法?Python Word2Vec.load_word2vec_format怎么用?Python Word2Vec.load_word2vec_format使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.models.Word2Vec
的用法示例。
在下文中一共展示了Word2Vec.load_word2vec_format方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_weights_word2vec
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 别名]
def get_weights_word2vec(word2idx, w2vfile, w2v_embed_size=300,
is_custom=False):
word2vec = None
if is_custom:
word2vec = Word2Vec.load(w2vfile)
else:
word2vec = Word2Vec.load_word2vec_format(w2vfile, binary=True)
vocab_size = len(word2idx) + 1
embedding_weights = np.zeros((vocab_size, w2v_embed_size))
for word, index in word2idx.items():
try:
embedding_weights[index, :] = word2vec[word.lower()]
except KeyError:
pass # keep as zero (not ideal, but what else can we do?)
return embedding_weights
示例2: load_w2v
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 别名]
def load_w2v(file):
model = Word2Vec.load_word2vec_format(file, binary=True)
return model
示例3: load_word2vec
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 别名]
def load_word2vec():
model = Word2Vec.load_word2vec_format('/mnt/data/sunlight/GoogleNews-vectors-negative300.bin', binary=True)
return model
示例4: load_derived_vectors
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 别名]
def load_derived_vectors(filename):
# loads derived vectors from a previous words2map as a standalone Gensim Word2Vec model (https://radimrehurek.com/gensim/models/word2vec.html)
filepath = getcwd() + "/derived_vectors/" + filename
model = Word2Vec.load_word2vec_format(filepath, binary=False)
return model
示例5: get_vec_sim
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 别名]
def get_vec_sim(self):
model_fn = self.config.get('vectors', 'model')
model_type = self.config.get('vectors', 'model_type')
logging.warning('Loading model: {0}'.format(model_fn))
if model_type == 'word2vec':
self.vec_model = Word2Vec.load_word2vec_format(model_fn,
binary=True)
elif model_type == 'gensim':
self.vec_model = Word2Vec.load(model_fn)
else:
raise Exception('Unknown LSA model format')
logging.warning('Model loaded: {0}'.format(model_fn))
示例6: main
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 别名]
def main():
parser = argparse.ArgumentParser()
parser.add_argument("model", help="word2vec model path")
parser.add_argument("format", help="1 = binary format, 0 = text format", type=int)
parser.add_argument("k", help="number of clusters", type=int)
parser.add_argument("output", help="output file")
args = parser.parse_args()
start = time.time()
print("Load word2vec model ... ", end="", flush=True)
w2v_model = Word2Vec.load_word2vec_format(args.model, binary=bool(args.format))
print("finished in {:.2f} sec.".format(time.time() - start), flush=True)
word_vectors = w2v_model.wv.syn0
n_words = word_vectors.shape[0]
vec_size = word_vectors.shape[1]
print("#words = {0}, vector size = {1}".format(n_words, vec_size))
start = time.time()
print("Compute clustering ... ", end="", flush=True)
kmeans = KMeans(n_clusters=args.k, n_jobs=-1, random_state=0)
idx = kmeans.fit_predict(word_vectors)
print("finished in {:.2f} sec.".format(time.time() - start), flush=True)
start = time.time()
print("Generate output file ... ", end="", flush=True)
word_centroid_list = list(zip(w2v_model.wv.index2word, idx))
word_centroid_list_sort = sorted(word_centroid_list, key=lambda el: el[1], reverse=False)
file_out = open(args.output, "w")
file_out.write("WORD\tCLUSTER_ID\n")
for word_centroid in word_centroid_list_sort:
line = word_centroid[0] + '\t' + str(word_centroid[1]) + '\n'
file_out.write(line)
file_out.close()
print("finished in {:.2f} sec.".format(time.time() - start), flush=True)
return
示例7: get_e2v_embedding
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 别名]
def get_e2v_embedding(embeddings_file):
model = Word2Vec.load_word2vec_format(embeddings_file, binary=True)
return model
示例8: __init__
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 别名]
def __init__(self, model_file: str) -> None:
if model_file.endswith(".bin"):
self.model = Word2Vec.load_word2vec_format(model_file, binary=True)
else:
self.model = Word2Vec.load(model_file)
示例9: evaluate_google
# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 别名]
def evaluate_google():
# see https://code.google.com/archive/p/word2vec/
# load pretrained google embeddings and test
from gensim.models import Word2Vec
model_google = Word2Vec.load_word2vec_format('data/GoogleNews-vectors-negative300.bin.gz', binary=True)
_ = accuracy(model_google, "data/questions-words.txt", False)