當前位置: 首頁>>代碼示例>>Python>>正文


Python gensim.models方法代碼示例

本文整理匯總了Python中gensim.models方法的典型用法代碼示例。如果您正苦於以下問題:Python gensim.models方法的具體用法?Python gensim.models怎麽用?Python gensim.models使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在gensim的用法示例。


在下文中一共展示了gensim.models方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: create_metadata_file

# 需要導入模塊: import gensim [as 別名]
# 或者: from gensim import models [as 別名]
def create_metadata_file(word2vec_file, output_file):
    """
    Create the metadata file based on the corpus file (Used for the Embedding Visualization later).

    Args:
        word2vec_file: The word2vec file
        output_file: The metadata file path
    Raises:
        IOError: If word2vec model file doesn't exist
    """
    if not os.path.isfile(word2vec_file):
        raise IOError("[Error] The word2vec file doesn't exist.")

    model = gensim.models.Word2Vec.load(word2vec_file)
    word2idx = dict([(k, v.index) for k, v in model.wv.vocab.items()])
    word2idx_sorted = [(k, word2idx[k]) for k in sorted(word2idx, key=word2idx.get, reverse=False)]

    with open(output_file, 'w+') as fout:
        for word in word2idx_sorted:
            if word[0] is None:
                print("[Warning] Empty Line, should replaced by any thing else, or will cause a bug of tensorboard")
                fout.write('<Empty Line>' + '\n')
            else:
                fout.write(word[0] + '\n') 
開發者ID:RandolphVI,項目名稱:Text-Pairs-Relation-Classification,代碼行數:26,代碼來源:data_helpers.py

示例2: load_poincare_model

# 需要導入模塊: import gensim [as 別名]
# 或者: from gensim import models [as 別名]
def load_poincare_model(path, word2vec_format=True, binary=False):
    """ Load a Poincare embedding model.

    :param path: path of the file of the pre-trained Poincare embedding model
    :param word2vec_format: whether to load from word2vec format (default: True)
    :param binary: binary format (default: False)
    :return: a pre-trained Poincare embedding model
    :type path: str
    :type word2vec_format: bool
    :type binary: bool
    :rtype: gensim.models.poincare.PoincareKeyedVectors
    """
    if word2vec_format:
        return PoincareKeyedVectors.load_word2vec_format(path, binary=binary)
    else:
        return PoincareModel.load(path).kv 
開發者ID:stephenhky,項目名稱:PyShortTextCategorization,代碼行數:18,代碼來源:wordembed.py

示例3: shorttext_to_avgvec

# 需要導入模塊: import gensim [as 別名]
# 或者: from gensim import models [as 別名]
def shorttext_to_avgvec(shorttext, wvmodel):
    """ Convert the short text into an averaged embedded vector representation.

    Given a short sentence, it converts all the tokens into embedded vectors according to
    the given word-embedding model, sums
    them up, and normalize the resulting vector. It returns the resulting vector
    that represents this short sentence.

    :param shorttext: a short sentence
    :param wvmodel: word-embedding model
    :return: an embedded vector that represents the short sentence
    :type shorttext: str
    :type wvmodel: gensim.models.keyedvectors.KeyedVectors
    :rtype: numpy.ndarray
    """
    vec = np.sum([wvmodel[token] for token in tokenize(shorttext) if token in wvmodel], axis=0)

    # normalize
    norm = np.linalg.norm(vec)
    if norm != 0:
        vec /= norm

    return vec 
開發者ID:stephenhky,項目名稱:PyShortTextCategorization,代碼行數:25,代碼來源:wordembed.py

示例4: load_word2vec_matrix

# 需要導入模塊: import gensim [as 別名]
# 或者: from gensim import models [as 別名]
def load_word2vec_matrix(word2vec_file):
    """
    Return the word2vec model matrix.

    Args:
        word2vec_file: The word2vec file
    Returns:
        The word2vec model matrix
    Raises:
        IOError: If word2vec model file doesn't exist
    """
    if not os.path.isfile(word2vec_file):
        raise IOError("[Error] The word2vec file doesn't exist. ")

    model = gensim.models.Word2Vec.load(word2vec_file)
    vocab_size = model.wv.vectors.shape[0]
    embedding_size = model.vector_size
    vocab = dict([(k, v.index) for k, v in model.wv.vocab.items()])
    embedding_matrix = np.zeros([vocab_size, embedding_size])
    for key, value in vocab.items():
        if key is not None:
            embedding_matrix[value] = model[key]
    return vocab_size, embedding_size, embedding_matrix 
開發者ID:RandolphVI,項目名稱:Text-Pairs-Relation-Classification,代碼行數:25,代碼來源:data_helpers.py

示例5: compute_epoch_accuracies

# 需要導入模塊: import gensim [as 別名]
# 或者: from gensim import models [as 別名]
def compute_epoch_accuracies(root, prefix, analogy_file):
    filenames = glob.glob(os.path.join(root, prefix+"_epoch*.model"))
    nr_epochs = len(filenames)
    accuracies = dict()
    losses = [0] * nr_epochs
    for filename in filenames:
        epoch = int(re.search("\d+\.model", filename).group()[:-6])
        m = Word2Vec.load(filename)
        losses[epoch] = m.get_latest_training_loss()
        sections = m.wv.accuracy(analogy_file)
        for sec in sections:
            if sec["section"] not in accuracies:
                accuracies[sec["section"]] = [0] * nr_epochs
            correct, incorrect = len(sec["correct"]), len(sec["incorrect"])
            if incorrect > 0:
                accuracy = correct / (correct + incorrect)
            else:
                accuracy = 0
            accuracies[sec["section"]][epoch] = (correct, incorrect, accuracy)
        save_obj(accuracies, os.path.join("models", prefix + "_accuracies"))
        save_obj(np.concatenate([np.array([losses[0]]), np.diff(losses)]), os.path.join("models", prefix + "_loss")) 
開發者ID:materialsintelligence,項目名稱:mat2vec,代碼行數:23,代碼來源:utils.py

示例6: train_artistsong2vec_model

# 需要導入模塊: import gensim [as 別名]
# 或者: from gensim import models [as 別名]
def train_artistsong2vec_model(fout_path, input_datas=None, data_path=None,
                               min_count=5, sorted_vocab=1, window=10,
                               size=250,
                               iter_n=50):
    if not input_datas and data_path:
        input_datas = pickle.load(open(data_path, 'rb'))
    full_data = []
    for i in input_datas:
        tmp = []
        for j in i:
            tmp.append(j[0])
            tmp.append(j[1])
        full_data.append(tmp)
    data_process_logger.info('start training')
    wv_model = gensim.models.Word2Vec(full_data, min_count=min_count, sorted_vocab=sorted_vocab, window=window,
                                      size=size, iter=iter_n)
    with open(fout_path, 'wb') as fout:
        data_process_logger.info('start saving model')
        pickle.dump(wv_model, fout)
        print 'model saved' 
開發者ID:JayveeHe,項目名稱:MusicTaster,代碼行數:22,代碼來源:rock_gensim.py

示例7: get_word

# 需要導入模塊: import gensim [as 別名]
# 或者: from gensim import models [as 別名]
def get_word(word):
    inst = re.search(r"_\(([A-Za-z0-9_]+)\)", word)

    if inst == None:
        length = len(word.split("_"))
        if length < 5:
            return True, word
    else:
        if inst.group(1) != "disambiguation":
            word2 = re.sub(r'_\(.+\)','',word)
            if len(word2.split(" ")) <5:
                return True, word

    return False,word

# Load the trained doc2vec and word2vec models. 
開發者ID:sb1992,項目名稱:NETL-Automatic-Topic-Labelling-,代碼行數:18,代碼來源:get_indices.py

示例8: online_lda

# 需要導入模塊: import gensim [as 別名]
# 或者: from gensim import models [as 別名]
def online_lda(corpus, dictionary, k=25, alpha="symmetric", chunk_size=10000, update_every=1, passes=1):
	"""
	Build the standard online LDA topic model (see gensim:
	http://radimrehurek.com/gensim/wiki.html#latent-dirichlet-allocation)
	
	Updates model every 'update_every' chunks, does 'passes' full passes over the corpus (updating
	every 'update_every' time each pass), and breaks corpus into 'chunk_size' document chunks.

	EG: chunk_size=100, update_every=1, passes=1: Does one full pass over the corpus, updating the
	model every one chunk, breaking the whole corpus into corpus_size/chunk_size chunks. 

	500 documents => 5 chunks, updates model on every chunk.

	Alpha values can be "symmetric", "asymmetric", and "auto". See: 
	http://radimrehurek.com/gensim/models/ldamodel.html
	"""
	return models.ldamodel.LdaModel(corpus=corpus,
									id2word=dictionary,
									num_topics=k,
									alpha=alpha,
									chunksize=chunk_size,
									update_every=update_every,
									passes=passes) 
開發者ID:SMAPPNYU,項目名稱:smappPy,代碼行數:25,代碼來源:build_models.py

示例9: batch_lda

# 需要導入模塊: import gensim [as 別名]
# 或者: from gensim import models [as 別名]
def batch_lda(corpus, dictionary, k=25, alpha="symmetric", passes=20):
	"""
	Build basic batch LDA topic model (see gensim:
	http://radimrehurek.com/gensim/wiki.html#latent-dirichlet-allocation)

	Does 'passes' number of passes over the whole corpus, no chunking, and updates the model
	at the end of every full pass.

	Alpha values can be "symmetric", "asymmetric", and "auto". See: 
	http://radimrehurek.com/gensim/models/ldamodel.html
	"""
	return models.ldamodel.LdaModel(corpus=corpus,
								   id2word=dictionary,
								   num_topics=k,
								   alpha=alpha,
								   update_every=0,
								   passes=passes) 
開發者ID:SMAPPNYU,項目名稱:smappPy,代碼行數:19,代碼來源:build_models.py

示例10: predictData

# 需要導入模塊: import gensim [as 別名]
# 或者: from gensim import models [as 別名]
def predictData():
    """
    使用模型預測真實數據

    """
    input_texts = ["很好很滿意","不好不滿意","質量有問題","商家態度很差","售後很渣,渣渣"]

    # word_model = word2vec.Word2Vec.load('./models/Word2vec_model.model')
    # w2indx, w2vec, texts = create_dictionaries(word_model, texts)
    # print(texts)

    texts = predict_wordtoVect(input_texts)

    model = get_model()
    # # 預測
    pred_result = model.predict_classes(texts)
    print(pred_result)
    labels = [int(round(x[0])) for x in pred_result]
    label2word = {1: '正麵', 0: '負麵'}
    for i in range(len(pred_result)):
        print('{0} -------- {1}'.format(label2word[labels[i]], input_texts[i])) 
開發者ID:jarvisqi,項目名稱:deep_learning,代碼行數:23,代碼來源:textAnalysis.py

示例11: predict_phrase

# 需要導入模塊: import gensim [as 別名]
# 或者: from gensim import models [as 別名]
def predict_phrase(phrase):
   #load the model
   #preprocess the phrase
   
   #phrase_clean = clean_str(phrase)
   phrase_clean = phrase 
   #load the dictionary
   char_dict = np.load('EARL/models/char_dict.npy').item()
   #phrase_clean = [char for char in phrase_clean]
   #print phrase_clean
   
   phrase_clean = [char_dict[char] for char in phrase_clean]

   #print phrase_clean
   
   #print np.concatenate((np.zeros(max_len-len(phrase_clean)), phrase_clean) )
   prediction = model.predict(np.concatenate((np.zeros((270-len(phrase_clean))), phrase_clean)).reshape(1,270))

   print prediction[0]
   
   pred = np.argmax(prediction[0])

   return 'R' if pred == 0 else 'E' 
開發者ID:AskNowQA,項目名稱:EARL,代碼行數:25,代碼來源:predict_phrase.py

示例12: _expand_from

# 需要導入模塊: import gensim [as 別名]
# 或者: from gensim import models [as 別名]
def _expand_from(self, corpus, prefix=None, labels=None):
        """
        Pass through the dataset once to add the new labels to the model.
        These labels stand in one for each document/sentence and not
        for new vocabulary.
        """
        if prefix is None:
            prefix = 'SENT'
        num_lines = sum(1 for _ in corpus)
        # Expand syn0
        shape = (self.syn0.shape[0] + num_lines, self.syn0.shape[1])
        syn0 = (np.random.random(shape).astype(self.syn0.dtype) - 0.5)
        syn0 /= self.layer1_size
        syn0[:self.syn0.shape[0]] = self.syn0
        self.syn0 = syn0
        index2word_start = len(self.index2word)
        for j, line_no in enumerate(range(num_lines)):
            # Expand vocab
            newvocab = gensim.models.doc2vec.Vocab()
            newvocab.index = len(self.index2word)
            newvocab.sample_probability = 1.0
            # We insert each sentence at the root of the
            # Huffman tree. It's a hack.
            newvocab.code = [1, ] * int(math.log(line_no + 1, 2) + 1)
            label = Document2Vec._make_label(prefix, str(j))
            self.vocab[label] = newvocab
            # Expand index2word
            self.index2word.append(label)
            assert len(self.index2word) == newvocab.index + 1
        return index2word_start 
開發者ID:cemoody,項目名稱:Document2Vec,代碼行數:32,代碼來源:document2vec.py

示例13: load_word2vec_model

# 需要導入模塊: import gensim [as 別名]
# 或者: from gensim import models [as 別名]
def load_word2vec_model(path, binary=True):
    """ Load a pre-trained Word2Vec model.

    :param path: path of the file of the pre-trained Word2Vec model
    :param binary: whether the file is in binary format (Default: True)
    :return: a pre-trained Word2Vec model
    :type path: str
    :type binary: bool
    :rtype: gensim.models.keyedvectors.KeyedVectors
    """
    return KeyedVectors.load_word2vec_format(path, binary=binary) 
開發者ID:stephenhky,項目名稱:PyShortTextCategorization,代碼行數:13,代碼來源:wordembed.py

示例14: load_fasttext_model

# 需要導入模塊: import gensim [as 別名]
# 或者: from gensim import models [as 別名]
def load_fasttext_model(path, encoding='utf-8'):
    """ Load a pre-trained FastText model.

    :param path: path of the file of the pre-trained FastText model
    :return: a pre-trained FastText model
    :type path: str
    :rtype: gensim.models.keyedvectors.FastTextKeyedVectors
    """
    return gensim.models.fasttext.load_facebook_vectors(path, encoding=encoding) 
開發者ID:stephenhky,項目名稱:PyShortTextCategorization,代碼行數:11,代碼來源:wordembed.py

示例15: save

# 需要導入模塊: import gensim [as 別名]
# 或者: from gensim import models [as 別名]
def save(self, fname_or_handle, **kwargs):
        """

        :param fname_or_handle:
        :param kwargs:
        :return:
        """
        raise IOError('The class RESTfulKeyedVectors do not persist models to a file.') 
開發者ID:stephenhky,項目名稱:PyShortTextCategorization,代碼行數:10,代碼來源:wordembed.py


注:本文中的gensim.models方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。