当前位置: 首页>>代码示例>>Python>>正文


Python Word2Vec.load方法代码示例

本文整理汇总了Python中gensim.models.Word2Vec.load方法的典型用法代码示例。如果您正苦于以下问题:Python Word2Vec.load方法的具体用法?Python Word2Vec.load怎么用?Python Word2Vec.load使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gensim.models.Word2Vec的用法示例。


在下文中一共展示了Word2Vec.load方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: embedding_sentences

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def embedding_sentences(sentences, embedding_size = 128, window = 5, min_count = 5, file_to_load = None, file_to_save = None):
    if file_to_load is not None:
        w2vModel = Word2Vec.load(file_to_load)
    else:
        w2vModel = Word2Vec(sentences, size = embedding_size, window = window, min_count = min_count, workers = multiprocessing.cpu_count())
        if file_to_save is not None:
            w2vModel.save(file_to_save)
    all_vectors = []
    embeddingDim = w2vModel.vector_size
    embeddingUnknown = [0 for i in range(embeddingDim)]
    for sentence in sentences:
        this_vector = []
        for word in sentence:
            if word in w2vModel.wv.vocab:
                this_vector.append(w2vModel[word])
            else:
                this_vector.append(embeddingUnknown)
        all_vectors.append(this_vector)
    return all_vectors 
开发者ID:sfailsthy,项目名称:chinese-text-classification-with-cnn-tf,代码行数:21,代码来源:word2vec_helpers.py

示例2: __init__

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def __init__(self, test_model=False, verify_model=True):
        model = Word2Vec.load(modelfile)

        if(test_model):
            acc = model.accuracy(questionfile)
            logger.info("Test model " + modelfile + " in " + questionfile)

        self.vector_size = model.vector_size
        self.vocab_size = len(model.wv.vocab) + 1
        self.word2index = self.GetWord2Index(model)
        self.index2word = self.GetIndex2Word(model)
        self.wordvector = self.GetWordVector(model)

        if(verify_model):
            logger.info("Verifing imported word2vec model")
            random_state = check_random_state(12)
            check_index = random_state.randint(low=0, high=self.vocab_size-2,size=1000)
            for index in check_index:
                word_wv = model.wv.index2word[index]
                word_our = self.index2word[index+1]
                #print(index, word_wv, word_our)
                assert word_wv == word_our
                assert model.wv.vocab[word_our].index == self.word2index[word_our] - 1
                assert np.array_equal(model.wv[word_our], self.wordvector[self.word2index[word_our]])
            logger.info("Imported word2vec model is verified") 
开发者ID:sefira,项目名称:question-classification-cnn-rnn-attention,代码行数:27,代码来源:word2vec_helpers.py

示例3: compute_epoch_accuracies

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def compute_epoch_accuracies(root, prefix, analogy_file):
    filenames = glob.glob(os.path.join(root, prefix+"_epoch*.model"))
    nr_epochs = len(filenames)
    accuracies = dict()
    losses = [0] * nr_epochs
    for filename in filenames:
        epoch = int(re.search("\d+\.model", filename).group()[:-6])
        m = Word2Vec.load(filename)
        losses[epoch] = m.get_latest_training_loss()
        sections = m.wv.accuracy(analogy_file)
        for sec in sections:
            if sec["section"] not in accuracies:
                accuracies[sec["section"]] = [0] * nr_epochs
            correct, incorrect = len(sec["correct"]), len(sec["incorrect"])
            if incorrect > 0:
                accuracy = correct / (correct + incorrect)
            else:
                accuracy = 0
            accuracies[sec["section"]][epoch] = (correct, incorrect, accuracy)
        save_obj(accuracies, os.path.join("models", prefix + "_accuracies"))
        save_obj(np.concatenate([np.array([losses[0]]), np.diff(losses)]), os.path.join("models", prefix + "_loss")) 
开发者ID:materialsintelligence,项目名称:mat2vec,代码行数:23,代码来源:utils.py

示例4: get_embedding_matrix

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def get_embedding_matrix(model_filepath, word2id):
    """
    Get the embedding matrix of the word2vec model
    :param model_filepath: the file path to the pre-build word2vec model
    :param word2id: the directory mapping from word to id
    :return: the embedding matrix of the word2vec model
    """
    word2vec_model = Word2Vec.load(model_filepath)
    embeddings_dict = __get_embedding_dict(model_filepath)
    embedding_matrix = np.zeros((len(word2id) + 1, word2vec_model.vector_size))
    for word, idx in word2id.items():
        embedding_vector = embeddings_dict.get(word)
        if embedding_vector is not None:
            embedding_matrix[idx] = embedding_vector

    return embedding_matrix 
开发者ID:fordai,项目名称:CCKS2019-Chinese-Clinical-NER,代码行数:18,代码来源:data_utils.py

示例5: load_word_embeddings

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def load_word_embeddings(self, vecs_fname, method):
        if method == "word2vec":
            model = Word2Vec.load(vecs_fname)
            words = model.wv.index2word
            vecs = model.wv.vectors
        else:
            words, vecs = [], []
            with open(vecs_fname, 'r', encoding='utf-8') as f1:
                if "fasttext" in method:
                    next(f1)  # skip head line
                for line in f1:
                    if method == "swivel":
                        splited_line = line.replace("\n", "").strip().split("\t")
                    else:
                        splited_line = line.replace("\n", "").strip().split(" ")
                    words.append(splited_line[0])
                    vec = [float(el) for el in splited_line[1:]]
                    vecs.append(vec)
        return words, vecs 
开发者ID:ratsgo,项目名称:embedding,代码行数:21,代码来源:word_utils.py

示例6: load_vectors

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def load_vectors(self, vecs_fname, method):
        if method == "word2vec":
            model = Word2Vec.load(vecs_fname)
            words = model.wv.index2word
            vecs = model.wv.vectors
        else:
            words, vecs = [], []
            with open(vecs_fname, 'r', encoding='utf-8') as f:
                if "fasttext" in method:
                    next(f)  # skip head line
                for line in f:
                    if method == "swivel":
                        splited_line = line.strip().split("\t")
                    else:
                        splited_line = line.strip().split(" ")
                    words.append(splited_line[0])
                    vec = [float(el) for el in splited_line[1:]]
                    vecs.append(vec)
        unit_vecs = normalize(vecs, norm='l2', axis=1)
        dictionary = {}
        for word, vec in zip(words, unit_vecs):
            dictionary[word] = vec
        return dictionary, words, unit_vecs 
开发者ID:ratsgo,项目名称:embedding,代码行数:25,代码来源:word_eval.py

示例7: _check_men

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def _check_men(args):
    """Check embeddings quality.

    Calculate correlation with the similarity ratings in the MEN dataset.
    """
    logger.info('Checking embeddings quality against MEN similarity ratings')
    logger.info('Loading word2vec model...')
    model = Word2Vec.load(args.w2v_model)
    logger.info('Model loaded')
    system_actual = []
    # This is needed because we may not be able to calculate cosine for
    # all pairs
    human_actual = []
    count = 0
    for (first, second), human in Samples(source='men', shuffle=False):
        if first not in model.wv.vocab or second not in model.wv.vocab:
            logger.error('Could not find one of more pair item in model '
                         'vocabulary: {}, {}'.format(first, second))
            continue
        sim = _cosine_similarity(model.wv[first], model.wv[second])
        system_actual.append(sim)
        human_actual.append(human)
        count += 1
    spr = _spearman(human_actual, system_actual)
    logger.info('SPEARMAN: {} calculated over {} items'.format(spr, count)) 
开发者ID:minimalparts,项目名称:nonce2vec,代码行数:27,代码来源:main.py

示例8: train_model

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def train_model(corpus, size=200, window=5, workers=3, model_path=None,
                word_freq=None, corpus_count=None):
    """Train using Skipgram model.

    Args:
        corpus (str):       file path of corpus
        size (int):         embedding size (default=200)
        window (int):       window size (default=5)
        workers (int):      number of workers (default=3)
        model_path (str):   file path of model we want to update
        word_freq (dict):   dictionary of word frequencies
        corpus_count (int): corpus size

    Returns:
        Word2Vec: word2vec model
    """
    sentences = LineSentence(corpus)
    if model_path is not None:
        logger.info("Updating pre-existing model: %s", model_path)
        assert os.path.isfile(model_path), "File does not exist"
        model = Word2Vec.load(model_path)
        model.build_vocab(sentences, update=True)
        model.train(sentences, total_examples=model.corpus_count,
                    epochs=model.iter)
    else:
        model = Skipgram(sentences=sentences, size=size, window=window,
                         min_count=1, workers=workers, raw_vocab=word_freq,
                         corpus_count=corpus_count)
    return model 
开发者ID:jwplayer,项目名称:jwalk,代码行数:31,代码来源:skipgram.py

示例9: build_phrase

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def build_phrase(doc):
    # load phrase model
    return trigram[bigram[doc]] 
开发者ID:armor-ai,项目名称:IDEA,代码行数:5,代码来源:main.py

示例10: load_phrase

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def load_phrase():
    global bigram
    global trigram
    bigram = Phrases.load(os.path.join("..", "model", "bigram.model"))
    trigram = Phrases.load(os.path.join("..", "model", "trigram.model")) 
开发者ID:armor-ai,项目名称:IDEA,代码行数:7,代码来源:main.py

示例11: load_obj

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def load_obj(filename):
    with open(filename) as fin:
        return cPickle.load(fin) 
开发者ID:armor-ai,项目名称:IDEA,代码行数:5,代码来源:main.py

示例12: get_weights_word2vec

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def get_weights_word2vec(word2idx, w2vfile, w2v_embed_size=300, 
                         is_custom=False):
    word2vec = None
    if is_custom:
        word2vec = Word2Vec.load(w2vfile)
    else:
        word2vec = Word2Vec.load_word2vec_format(w2vfile, binary=True)
    vocab_size = len(word2idx) + 1
    embedding_weights = np.zeros((vocab_size, w2v_embed_size))
    for word, index in word2idx.items():
        try:
            embedding_weights[index, :] = word2vec[word.lower()]
        except KeyError:
            pass  # keep as zero (not ideal, but what else can we do?)
    return embedding_weights 
开发者ID:sujitpal,项目名称:dl-models-for-qa,代码行数:17,代码来源:kaggle.py

示例13: load_obj

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def load_obj(name):
    with open(name + ".pkl", "rb") as f:
        return pickle.load(f) 
开发者ID:materialsintelligence,项目名称:mat2vec,代码行数:5,代码来源:utils.py

示例14: load_tag2id

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def load_tag2id(tag2id_filepath):
    """
    Load the dictionary mapping from tag to id
    :param tag2id_filepath: the file path to the pre-built dictionary
    :return: the dictionary mapping from tag to id
    """
    with open(tag2id_filepath, "rb") as fr:
        tag2id = pickle.load(fr)

    return tag2id 
开发者ID:fordai,项目名称:CCKS2019-Chinese-Clinical-NER,代码行数:12,代码来源:data_utils.py

示例15: load_vocab

# 需要导入模块: from gensim.models import Word2Vec [as 别名]
# 或者: from gensim.models.Word2Vec import load [as 别名]
def load_vocab(vocab_filepath):
    """
    Load the dictionary mapping from word to id
    :param vocab_filepath: the file path to the pre-built dictionary
    :return: the dictionary mapping from word to id
    """
    with open(vocab_filepath, "rb") as fr:
        word2id = pickle.load(fr)

    return word2id 
开发者ID:fordai,项目名称:CCKS2019-Chinese-Clinical-NER,代码行数:12,代码来源:data_utils.py


注:本文中的gensim.models.Word2Vec.load方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。