當前位置: 首頁>>代碼示例>>Python>>正文


Python matutils.cossim方法代碼示例

本文整理匯總了Python中gensim.matutils.cossim方法的典型用法代碼示例。如果您正苦於以下問題:Python matutils.cossim方法的具體用法?Python matutils.cossim怎麽用?Python matutils.cossim使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在gensim.matutils的用法示例。


在下文中一共展示了matutils.cossim方法的8個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: similarity

# 需要導入模塊: from gensim import matutils [as 別名]
# 或者: from gensim.matutils import cossim [as 別名]
def similarity(self, t, extraction_pattern):

        (bef, bet, aft) = (0, 0, 0)

        if t.bef_vector is not None and extraction_pattern.centroid_bef is not None:
            bef = cossim(t.bef_vector, extraction_pattern.centroid_bef)

        if t.bet_vector is not None and extraction_pattern.centroid_bet is not None:
            bet = cossim(t.bet_vector, extraction_pattern.centroid_bet)

        if t.aft_vector is not None and extraction_pattern.centroid_aft is not None:
            aft = cossim(t.aft_vector, extraction_pattern.centroid_aft)

        return self.config.alpha*bef + self.config.beta*bet + self.config.gamma*aft 
開發者ID:davidsbatista,項目名稱:Snowball,代碼行數:16,代碼來源:Snowball.py

示例2: getComparable

# 需要導入模塊: from gensim import matutils [as 別名]
# 或者: from gensim.matutils import cossim [as 別名]
def getComparable(source_lsi_doc, target_lsi_corpus):
	sims = []
	for i in range(len(target_lsi_corpus)):
		sims.append( matutils.cossim(source_lsi_doc, target_lsi_corpus[i]) )
	sortedSims = sorted(enumerate(sims), key=lambda item: -item[1])
	topIndex = sortedSims[0][0]
	topSim = sortedSims[0][1]
	return sortedSims[0]

##################################################################################


##################################################################################
# takses wiki text and a list of language codes, and returns the interlanguage links
# language code list:
# ar arabic
# en english
# fr french
# es Español
# de Deutsch
# it Italiano
# pt portuguese
# fa farsi
# ur urdo
# he hebrew
# ps peshto (Afghānī)
# sd Sindhi (sindi)
# ug Uyghur أويغورية
# pnb punjabi (Pakistan - India)
# ckb kurdi
# arz egyptian 
開發者ID:motazsaad,項目名稱:comparable-text-miner,代碼行數:33,代碼來源:textpro.py

示例3: getMaxSimilarity

# 需要導入模塊: from gensim import matutils [as 別名]
# 或者: from gensim.matutils import cossim [as 別名]
def getMaxSimilarity(dictTopic, vector):
    maxValue = 0
    maxIndex = -1
    for k,cluster in dictTopic.iteritems():
        oneSimilarity = mean([matutils.cossim(vector, v) for v in cluster])
        if oneSimilarity > maxValue:
            maxValue = oneSimilarity
            maxIndex = k
    return maxIndex, maxValue 
開發者ID:HaowenHOU,項目名稱:single-pass-clustering-for-chinese-text,代碼行數:11,代碼來源:cluster_for_data.py

示例4: order_by_tf_id_rank

# 需要導入模塊: from gensim import matutils [as 別名]
# 或者: from gensim.matutils import cossim [as 別名]
def order_by_tf_id_rank(self, headline, sentences, number_of_sentences):
        headline_bow = self.vocab.doc2bow(sent2stokens_wostop(headline))
        headline_tfidf = self.tfidf_model[headline_bow]

        scored_sentences = []
        'Replace newlines with blank, since the punkt tokenizer does not recognize .[newline]'
        #sentences = sentences.replace('\n', ' ')

        for sentence in self.tokenizer.tokenize(sentences):
            sentence_tfidf = self.vocab.doc2bow(sent2stokens_wostop(sentence))
            sim = cossim(headline_tfidf, sentence_tfidf)
            #print(str(sim))
            scored_sentences.append([sentence, sim])

        sorted_sentences= sorted(scored_sentences, key=lambda scored_sentences: scored_sentences[1], reverse= True)
        '''
        for sentence in sorted_sentences:
        print(str(sentence))
        '''
        ' return sorted_sentences '

        sentences_string = ""
        current_sentence_number = 0
        for sentence in sorted_sentences:
            current_sentence_number += 1
            sentences_string += sentence[0] + ' '
            if current_sentence_number == number_of_sentences:
                break
        #print("Ranked: \n " + sentences_string)
        return sentences_string 
開發者ID:UKPLab,項目名稱:coling2018_fake-news-challenge,代碼行數:32,代碼來源:tf_idf_helpers.py

示例5: tfidf_sim

# 需要導入模塊: from gensim import matutils [as 別名]
# 或者: from gensim.matutils import cossim [as 別名]
def tfidf_sim(self, train_data, body_dict, threshold):
        '''
        :param 
        train_data : a list of training samples of type ['headline', 'bodyID', 'stance']
        body_dict : a dictionary of values containing {bodyID:'bodyText'}
        threshold : used distinguish between similar and not similar
        '''
        bodyText_list = list(body_dict.values())
        bodyIds_index = {k:index for index, k in enumerate(body_dict.keys())}
        
        bodyText_w = [sent2stokens_wostop(text) for text in bodyText_list]
        
        vocab = corpora.Dictionary(bodyText_w)
        corporaBody_bow = [vocab.doc2bow(text) for text in bodyText_w]
        tfidf_model = models.TfidfModel(corporaBody_bow)
        
        unrelated, related, y_true, y_pred = [], [], [], []
        for headline, bodyID, stance in train_data:        
            headline_bow = vocab.doc2bow(sent2stokens_wostop(headline))
            
            headlines_tfidf = tfidf_model[headline_bow]
            corporaBody_tfidf = tfidf_model[corporaBody_bow[bodyIds_index[bodyID]]]
            
            sim = cossim(headlines_tfidf, corporaBody_tfidf)
            unrelated, related, y_true, y_pred = create_lists(sim, stance, threshold, [unrelated, related, y_true, y_pred])
        
        print_results([unrelated, related, y_true, y_pred], self.model_type) 
開發者ID:UKPLab,項目名稱:coling2018_fake-news-challenge,代碼行數:29,代碼來源:models.py

示例6: tdidf_all_vec

# 需要導入模塊: from gensim import matutils [as 別名]
# 或者: from gensim.matutils import cossim [as 別名]
def tdidf_all_vec(self):

        return matutils.cossim(self.tfidf_vec1, self.tfidf_vec2) 
開發者ID:yongzhuo,項目名稱:nlp_xiaojiang,代碼行數:5,代碼來源:sentence_sim_feature.py

示例7: tdidf_all_vec_pinyin

# 需要導入模塊: from gensim import matutils [as 別名]
# 或者: from gensim.matutils import cossim [as 別名]
def tdidf_all_vec_pinyin(self):

        return matutils.cossim(self.tfidf_vec1_pinyin, self.tfidf_vec2_pinyin) 
開發者ID:yongzhuo,項目名稱:nlp_xiaojiang,代碼行數:5,代碼來源:sentence_sim_feature.py

示例8: test_lee

# 需要導入模塊: from gensim import matutils [as 別名]
# 或者: from gensim.matutils import cossim [as 別名]
def test_lee(self):
        """correlation with human data > 0.6
        (this is the value which was achieved in the original paper)
        """

        global bg_corpus, corpus

        # create a dictionary and corpus (bag of words)
        dictionary = corpora.Dictionary(bg_corpus)
        bg_corpus = [dictionary.doc2bow(text) for text in bg_corpus]
        corpus = [dictionary.doc2bow(text) for text in corpus]

        # transform the bag of words with log_entropy normalization
        log_ent = models.LogEntropyModel(bg_corpus)
        bg_corpus_ent = log_ent[bg_corpus]

        # initialize an LSI transformation from background corpus
        lsi = models.LsiModel(bg_corpus_ent, id2word=dictionary, num_topics=200)
        # transform small corpus to lsi bow->log_ent->fold-in-lsi
        corpus_lsi = lsi[log_ent[corpus]]

        # compute pairwise similarity matrix and extract upper triangular
        res = np.zeros((len(corpus), len(corpus)))
        for i, par1 in enumerate(corpus_lsi):
            for j, par2 in enumerate(corpus_lsi):
                res[i, j] = matutils.cossim(par1, par2)
        flat = res[matutils.triu_indices(len(corpus), 1)]

        cor = np.corrcoef(flat, human_sim_vector)[0, 1]
        logging.info("LSI correlation coefficient is %s" % cor)
        self.assertTrue(cor > 0.6)


    # def test_lee_mallet(self):
    #     global bg_corpus, corpus, bg_corpus2, corpus2

    #     # create a dictionary and corpus (bag of words)
    #     dictionary = corpora.Dictionary(bg_corpus2)
    #     bg_corpus = [dictionary.doc2bow(text) for text in bg_corpus2]
    #     corpus = [dictionary.doc2bow(text) for text in corpus2]

    #     # initialize an LDA transformation from background corpus
    #     lda = models.LdaMallet('/Users/kofola/Downloads/mallet-2.0.7/bin/mallet',
    #         corpus=bg_corpus, id2word=dictionary, num_topics=200, optimize_interval=10)
    #     corpus_lda = lda[corpus]

    #     # compute pairwise similarity matrix and extract upper triangular
    #     res = np.zeros((len(corpus), len(corpus)))
    #     for i, par1 in enumerate(corpus_lda):
    #         for j, par2 in enumerate(corpus_lda):
    #             res[i, j] = matutils.cossim(par1, par2)
    #     flat = res[matutils.triu_indices(len(corpus), 1)]

    #     cor = np.corrcoef(flat, human_sim_vector)[0, 1]
    #     logging.info("LDA correlation coefficient is %s" % cor)
    #     self.assertTrue(cor > 0.35) 
開發者ID:largelymfs,項目名稱:topical_word_embeddings,代碼行數:58,代碼來源:test_lee.py


注:本文中的gensim.matutils.cossim方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。