当前位置: 首页>>代码示例>>Python>>正文


Python Word2Vec.load_word2vec_format方法代码示例

本文整理汇总了Python中gensim.models.Word2Vec.load_word2vec_format方法的典型用法代码示例。如果您正苦于以下问题:Python Word2Vec.load_word2vec_format方法的具体用法?Python Word2Vec.load_word2vec_format怎么用?Python Word2Vec.load_word2vec_format使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gensim.models.Word2Vec的用法示例。


在下文中一共展示了Word2Vec.load_word2vec_format方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_weights_word2vec

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 别名]
def get_weights_word2vec(word2idx, w2vfile, w2v_embed_size=300, 
                         is_custom=False):
    word2vec = None
    if is_custom:
        word2vec = Word2Vec.load(w2vfile)
    else:
        word2vec = Word2Vec.load_word2vec_format(w2vfile, binary=True)
    vocab_size = len(word2idx) + 1
    embedding_weights = np.zeros((vocab_size, w2v_embed_size))
    for word, index in word2idx.items():
        try:
            embedding_weights[index, :] = word2vec[word.lower()]
        except KeyError:
            pass  # keep as zero (not ideal, but what else can we do?)
    return embedding_weights 
开发者ID:sujitpal,项目名称:dl-models-for-qa,代码行数:17,代码来源:kaggle.py

示例2: load_w2v

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 别名]
def load_w2v(file):
    model = Word2Vec.load_word2vec_format(file, binary=True)
    return model 
开发者ID:hugochan,项目名称:KATE,代码行数:5,代码来源:doc_word2vec.py

示例3: load_word2vec

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 别名]
def load_word2vec():
    model = Word2Vec.load_word2vec_format('/mnt/data/sunlight/GoogleNews-vectors-negative300.bin', binary=True)

    return model 
开发者ID:dssg,项目名称:policy_diffusion,代码行数:6,代码来源:score_alignments.py

示例4: load_derived_vectors

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 别名]
def load_derived_vectors(filename):
	# loads derived vectors from a previous words2map as a standalone Gensim Word2Vec model (https://radimrehurek.com/gensim/models/word2vec.html)
	filepath = getcwd() + "/derived_vectors/" + filename
	model = Word2Vec.load_word2vec_format(filepath, binary=False)
	return model 
开发者ID:overlap-ai,项目名称:words2map,代码行数:7,代码来源:words2map.py

示例5: get_vec_sim

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 别名]
def get_vec_sim(self):
        model_fn = self.config.get('vectors', 'model')
        model_type = self.config.get('vectors', 'model_type')
        logging.warning('Loading model: {0}'.format(model_fn))
        if model_type == 'word2vec':
            self.vec_model = Word2Vec.load_word2vec_format(model_fn,
                                                           binary=True)
        elif model_type == 'gensim':
            self.vec_model = Word2Vec.load(model_fn)
        else:
            raise Exception('Unknown LSA model format')
        logging.warning('Model loaded: {0}'.format(model_fn)) 
开发者ID:kornai,项目名称:4lang,代码行数:14,代码来源:similarity.py

示例6: main

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 别名]
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="word2vec model path")
    parser.add_argument("format", help="1 = binary format, 0 = text format", type=int)
    parser.add_argument("k", help="number of clusters", type=int)
    parser.add_argument("output", help="output file")
    args = parser.parse_args()

    start = time.time()
    print("Load word2vec model ... ", end="", flush=True)
    w2v_model = Word2Vec.load_word2vec_format(args.model, binary=bool(args.format))
    print("finished in {:.2f} sec.".format(time.time() - start), flush=True)
    word_vectors = w2v_model.wv.syn0
    n_words = word_vectors.shape[0]
    vec_size = word_vectors.shape[1]
    print("#words = {0}, vector size = {1}".format(n_words, vec_size))

    start = time.time()
    print("Compute clustering ... ", end="", flush=True)
    kmeans = KMeans(n_clusters=args.k, n_jobs=-1, random_state=0)
    idx = kmeans.fit_predict(word_vectors)
    print("finished in {:.2f} sec.".format(time.time() - start), flush=True)

    start = time.time()
    print("Generate output file ... ", end="", flush=True)
    word_centroid_list = list(zip(w2v_model.wv.index2word, idx))
    word_centroid_list_sort = sorted(word_centroid_list, key=lambda el: el[1], reverse=False)
    file_out = open(args.output, "w")
    file_out.write("WORD\tCLUSTER_ID\n")
    for word_centroid in word_centroid_list_sort:
        line = word_centroid[0] + '\t' + str(word_centroid[1]) + '\n'
        file_out.write(line)
    file_out.close()
    print("finished in {:.2f} sec.".format(time.time() - start), flush=True)

    return 
开发者ID:gaetangate,项目名称:word2vec-cluster,代码行数:38,代码来源:word2vec_cluster.py

示例7: get_e2v_embedding

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 别名]
def get_e2v_embedding(embeddings_file):

	model = Word2Vec.load_word2vec_format(embeddings_file, binary=True)

	return model 
开发者ID:D2KLab,项目名称:entity2vec,代码行数:7,代码来源:feature_generator.py

示例8: __init__

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 别名]
def __init__(self, model_file: str) -> None:
        if model_file.endswith(".bin"):
            self.model = Word2Vec.load_word2vec_format(model_file, binary=True)
        else:
            self.model = Word2Vec.load(model_file) 
开发者ID:allenai,项目名称:aristo-mini,代码行数:7,代码来源:wordtwovec.py

示例9: evaluate_google

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 别名]
def evaluate_google():
    # see https://code.google.com/archive/p/word2vec/
    # load pretrained google embeddings and test
    from gensim.models import Word2Vec
    model_google = Word2Vec.load_word2vec_format('data/GoogleNews-vectors-negative300.bin.gz', binary=True)
    _ = accuracy(model_google, "data/questions-words.txt", False) 
开发者ID:cod3licious,项目名称:conec,代码行数:8,代码来源:test_analogy.py


注:本文中的gensim.models.Word2Vec.load_word2vec_format方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。