當前位置: 首頁>>代碼示例>>Python>>正文


Python Word2Vec.load_word2vec_format方法代碼示例

本文整理匯總了Python中gensim.models.Word2Vec.load_word2vec_format方法的典型用法代碼示例。如果您正苦於以下問題:Python Word2Vec.load_word2vec_format方法的具體用法?Python Word2Vec.load_word2vec_format怎麽用?Python Word2Vec.load_word2vec_format使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在gensim.models.Word2Vec的用法示例。


在下文中一共展示了Word2Vec.load_word2vec_format方法的9個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: get_weights_word2vec

# 需要導入模塊: from gensim.models import Word2Vec [as 別名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 別名]
def get_weights_word2vec(word2idx, w2vfile, w2v_embed_size=300, 
                         is_custom=False):
    word2vec = None
    if is_custom:
        word2vec = Word2Vec.load(w2vfile)
    else:
        word2vec = Word2Vec.load_word2vec_format(w2vfile, binary=True)
    vocab_size = len(word2idx) + 1
    embedding_weights = np.zeros((vocab_size, w2v_embed_size))
    for word, index in word2idx.items():
        try:
            embedding_weights[index, :] = word2vec[word.lower()]
        except KeyError:
            pass  # keep as zero (not ideal, but what else can we do?)
    return embedding_weights 
開發者ID:sujitpal,項目名稱:dl-models-for-qa,代碼行數:17,代碼來源:kaggle.py

示例2: load_w2v

# 需要導入模塊: from gensim.models import Word2Vec [as 別名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 別名]
def load_w2v(file):
    model = Word2Vec.load_word2vec_format(file, binary=True)
    return model 
開發者ID:hugochan,項目名稱:KATE,代碼行數:5,代碼來源:doc_word2vec.py

示例3: load_word2vec

# 需要導入模塊: from gensim.models import Word2Vec [as 別名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 別名]
def load_word2vec():
    model = Word2Vec.load_word2vec_format('/mnt/data/sunlight/GoogleNews-vectors-negative300.bin', binary=True)

    return model 
開發者ID:dssg,項目名稱:policy_diffusion,代碼行數:6,代碼來源:score_alignments.py

示例4: load_derived_vectors

# 需要導入模塊: from gensim.models import Word2Vec [as 別名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 別名]
def load_derived_vectors(filename):
	# loads derived vectors from a previous words2map as a standalone Gensim Word2Vec model (https://radimrehurek.com/gensim/models/word2vec.html)
	filepath = getcwd() + "/derived_vectors/" + filename
	model = Word2Vec.load_word2vec_format(filepath, binary=False)
	return model 
開發者ID:overlap-ai,項目名稱:words2map,代碼行數:7,代碼來源:words2map.py

示例5: get_vec_sim

# 需要導入模塊: from gensim.models import Word2Vec [as 別名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 別名]
def get_vec_sim(self):
        model_fn = self.config.get('vectors', 'model')
        model_type = self.config.get('vectors', 'model_type')
        logging.warning('Loading model: {0}'.format(model_fn))
        if model_type == 'word2vec':
            self.vec_model = Word2Vec.load_word2vec_format(model_fn,
                                                           binary=True)
        elif model_type == 'gensim':
            self.vec_model = Word2Vec.load(model_fn)
        else:
            raise Exception('Unknown LSA model format')
        logging.warning('Model loaded: {0}'.format(model_fn)) 
開發者ID:kornai,項目名稱:4lang,代碼行數:14,代碼來源:similarity.py

示例6: main

# 需要導入模塊: from gensim.models import Word2Vec [as 別名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 別名]
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="word2vec model path")
    parser.add_argument("format", help="1 = binary format, 0 = text format", type=int)
    parser.add_argument("k", help="number of clusters", type=int)
    parser.add_argument("output", help="output file")
    args = parser.parse_args()

    start = time.time()
    print("Load word2vec model ... ", end="", flush=True)
    w2v_model = Word2Vec.load_word2vec_format(args.model, binary=bool(args.format))
    print("finished in {:.2f} sec.".format(time.time() - start), flush=True)
    word_vectors = w2v_model.wv.syn0
    n_words = word_vectors.shape[0]
    vec_size = word_vectors.shape[1]
    print("#words = {0}, vector size = {1}".format(n_words, vec_size))

    start = time.time()
    print("Compute clustering ... ", end="", flush=True)
    kmeans = KMeans(n_clusters=args.k, n_jobs=-1, random_state=0)
    idx = kmeans.fit_predict(word_vectors)
    print("finished in {:.2f} sec.".format(time.time() - start), flush=True)

    start = time.time()
    print("Generate output file ... ", end="", flush=True)
    word_centroid_list = list(zip(w2v_model.wv.index2word, idx))
    word_centroid_list_sort = sorted(word_centroid_list, key=lambda el: el[1], reverse=False)
    file_out = open(args.output, "w")
    file_out.write("WORD\tCLUSTER_ID\n")
    for word_centroid in word_centroid_list_sort:
        line = word_centroid[0] + '\t' + str(word_centroid[1]) + '\n'
        file_out.write(line)
    file_out.close()
    print("finished in {:.2f} sec.".format(time.time() - start), flush=True)

    return 
開發者ID:gaetangate,項目名稱:word2vec-cluster,代碼行數:38,代碼來源:word2vec_cluster.py

示例7: get_e2v_embedding

# 需要導入模塊: from gensim.models import Word2Vec [as 別名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 別名]
def get_e2v_embedding(embeddings_file):

	model = Word2Vec.load_word2vec_format(embeddings_file, binary=True)

	return model 
開發者ID:D2KLab,項目名稱:entity2vec,代碼行數:7,代碼來源:feature_generator.py

示例8: __init__

# 需要導入模塊: from gensim.models import Word2Vec [as 別名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 別名]
def __init__(self, model_file: str) -> None:
        if model_file.endswith(".bin"):
            self.model = Word2Vec.load_word2vec_format(model_file, binary=True)
        else:
            self.model = Word2Vec.load(model_file) 
開發者ID:allenai,項目名稱:aristo-mini,代碼行數:7,代碼來源:wordtwovec.py

示例9: evaluate_google

# 需要導入模塊: from gensim.models import Word2Vec [as 別名]
# 或者: from gensim.models.Word2Vec import load_word2vec_format [as 別名]
def evaluate_google():
    # see https://code.google.com/archive/p/word2vec/
    # load pretrained google embeddings and test
    from gensim.models import Word2Vec
    model_google = Word2Vec.load_word2vec_format('data/GoogleNews-vectors-negative300.bin.gz', binary=True)
    _ = accuracy(model_google, "data/questions-words.txt", False) 
開發者ID:cod3licious,項目名稱:conec,代碼行數:8,代碼來源:test_analogy.py


注:本文中的gensim.models.Word2Vec.load_word2vec_format方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。