当前位置: 首页>>代码示例>>Python>>正文


Python matutils.cossim方法代码示例

本文整理汇总了Python中gensim.matutils.cossim方法的典型用法代码示例。如果您正苦于以下问题:Python matutils.cossim方法的具体用法?Python matutils.cossim怎么用?Python matutils.cossim使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gensim.matutils的用法示例。


在下文中一共展示了matutils.cossim方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: similarity

# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import cossim [as 别名]
def similarity(self, t, extraction_pattern):

        (bef, bet, aft) = (0, 0, 0)

        if t.bef_vector is not None and extraction_pattern.centroid_bef is not None:
            bef = cossim(t.bef_vector, extraction_pattern.centroid_bef)

        if t.bet_vector is not None and extraction_pattern.centroid_bet is not None:
            bet = cossim(t.bet_vector, extraction_pattern.centroid_bet)

        if t.aft_vector is not None and extraction_pattern.centroid_aft is not None:
            aft = cossim(t.aft_vector, extraction_pattern.centroid_aft)

        return self.config.alpha*bef + self.config.beta*bet + self.config.gamma*aft 
开发者ID:davidsbatista,项目名称:Snowball,代码行数:16,代码来源:Snowball.py

示例2: getComparable

# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import cossim [as 别名]
def getComparable(source_lsi_doc, target_lsi_corpus):
	sims = []
	for i in range(len(target_lsi_corpus)):
		sims.append( matutils.cossim(source_lsi_doc, target_lsi_corpus[i]) )
	sortedSims = sorted(enumerate(sims), key=lambda item: -item[1])
	topIndex = sortedSims[0][0]
	topSim = sortedSims[0][1]
	return sortedSims[0]

##################################################################################


##################################################################################
# takses wiki text and a list of language codes, and returns the interlanguage links
# language code list:
# ar arabic
# en english
# fr french
# es Español
# de Deutsch
# it Italiano
# pt portuguese
# fa farsi
# ur urdo
# he hebrew
# ps peshto (Afghānī)
# sd Sindhi (sindi)
# ug Uyghur أويغورية
# pnb punjabi (Pakistan - India)
# ckb kurdi
# arz egyptian 
开发者ID:motazsaad,项目名称:comparable-text-miner,代码行数:33,代码来源:textpro.py

示例3: getMaxSimilarity

# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import cossim [as 别名]
def getMaxSimilarity(dictTopic, vector):
    maxValue = 0
    maxIndex = -1
    for k,cluster in dictTopic.iteritems():
        oneSimilarity = mean([matutils.cossim(vector, v) for v in cluster])
        if oneSimilarity > maxValue:
            maxValue = oneSimilarity
            maxIndex = k
    return maxIndex, maxValue 
开发者ID:HaowenHOU,项目名称:single-pass-clustering-for-chinese-text,代码行数:11,代码来源:cluster_for_data.py

示例4: order_by_tf_id_rank

# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import cossim [as 别名]
def order_by_tf_id_rank(self, headline, sentences, number_of_sentences):
        headline_bow = self.vocab.doc2bow(sent2stokens_wostop(headline))
        headline_tfidf = self.tfidf_model[headline_bow]

        scored_sentences = []
        'Replace newlines with blank, since the punkt tokenizer does not recognize .[newline]'
        #sentences = sentences.replace('\n', ' ')

        for sentence in self.tokenizer.tokenize(sentences):
            sentence_tfidf = self.vocab.doc2bow(sent2stokens_wostop(sentence))
            sim = cossim(headline_tfidf, sentence_tfidf)
            #print(str(sim))
            scored_sentences.append([sentence, sim])

        sorted_sentences= sorted(scored_sentences, key=lambda scored_sentences: scored_sentences[1], reverse= True)
        '''
        for sentence in sorted_sentences:
        print(str(sentence))
        '''
        ' return sorted_sentences '

        sentences_string = ""
        current_sentence_number = 0
        for sentence in sorted_sentences:
            current_sentence_number += 1
            sentences_string += sentence[0] + ' '
            if current_sentence_number == number_of_sentences:
                break
        #print("Ranked: \n " + sentences_string)
        return sentences_string 
开发者ID:UKPLab,项目名称:coling2018_fake-news-challenge,代码行数:32,代码来源:tf_idf_helpers.py

示例5: tfidf_sim

# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import cossim [as 别名]
def tfidf_sim(self, train_data, body_dict, threshold):
        '''
        :param 
        train_data : a list of training samples of type ['headline', 'bodyID', 'stance']
        body_dict : a dictionary of values containing {bodyID:'bodyText'}
        threshold : used distinguish between similar and not similar
        '''
        bodyText_list = list(body_dict.values())
        bodyIds_index = {k:index for index, k in enumerate(body_dict.keys())}
        
        bodyText_w = [sent2stokens_wostop(text) for text in bodyText_list]
        
        vocab = corpora.Dictionary(bodyText_w)
        corporaBody_bow = [vocab.doc2bow(text) for text in bodyText_w]
        tfidf_model = models.TfidfModel(corporaBody_bow)
        
        unrelated, related, y_true, y_pred = [], [], [], []
        for headline, bodyID, stance in train_data:        
            headline_bow = vocab.doc2bow(sent2stokens_wostop(headline))
            
            headlines_tfidf = tfidf_model[headline_bow]
            corporaBody_tfidf = tfidf_model[corporaBody_bow[bodyIds_index[bodyID]]]
            
            sim = cossim(headlines_tfidf, corporaBody_tfidf)
            unrelated, related, y_true, y_pred = create_lists(sim, stance, threshold, [unrelated, related, y_true, y_pred])
        
        print_results([unrelated, related, y_true, y_pred], self.model_type) 
开发者ID:UKPLab,项目名称:coling2018_fake-news-challenge,代码行数:29,代码来源:models.py

示例6: tdidf_all_vec

# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import cossim [as 别名]
def tdidf_all_vec(self):

        return matutils.cossim(self.tfidf_vec1, self.tfidf_vec2) 
开发者ID:yongzhuo,项目名称:nlp_xiaojiang,代码行数:5,代码来源:sentence_sim_feature.py

示例7: tdidf_all_vec_pinyin

# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import cossim [as 别名]
def tdidf_all_vec_pinyin(self):

        return matutils.cossim(self.tfidf_vec1_pinyin, self.tfidf_vec2_pinyin) 
开发者ID:yongzhuo,项目名称:nlp_xiaojiang,代码行数:5,代码来源:sentence_sim_feature.py

示例8: test_lee

# 需要导入模块: from gensim import matutils [as 别名]
# 或者: from gensim.matutils import cossim [as 别名]
def test_lee(self):
        """correlation with human data > 0.6
        (this is the value which was achieved in the original paper)
        """

        global bg_corpus, corpus

        # create a dictionary and corpus (bag of words)
        dictionary = corpora.Dictionary(bg_corpus)
        bg_corpus = [dictionary.doc2bow(text) for text in bg_corpus]
        corpus = [dictionary.doc2bow(text) for text in corpus]

        # transform the bag of words with log_entropy normalization
        log_ent = models.LogEntropyModel(bg_corpus)
        bg_corpus_ent = log_ent[bg_corpus]

        # initialize an LSI transformation from background corpus
        lsi = models.LsiModel(bg_corpus_ent, id2word=dictionary, num_topics=200)
        # transform small corpus to lsi bow->log_ent->fold-in-lsi
        corpus_lsi = lsi[log_ent[corpus]]

        # compute pairwise similarity matrix and extract upper triangular
        res = np.zeros((len(corpus), len(corpus)))
        for i, par1 in enumerate(corpus_lsi):
            for j, par2 in enumerate(corpus_lsi):
                res[i, j] = matutils.cossim(par1, par2)
        flat = res[matutils.triu_indices(len(corpus), 1)]

        cor = np.corrcoef(flat, human_sim_vector)[0, 1]
        logging.info("LSI correlation coefficient is %s" % cor)
        self.assertTrue(cor > 0.6)


    # def test_lee_mallet(self):
    #     global bg_corpus, corpus, bg_corpus2, corpus2

    #     # create a dictionary and corpus (bag of words)
    #     dictionary = corpora.Dictionary(bg_corpus2)
    #     bg_corpus = [dictionary.doc2bow(text) for text in bg_corpus2]
    #     corpus = [dictionary.doc2bow(text) for text in corpus2]

    #     # initialize an LDA transformation from background corpus
    #     lda = models.LdaMallet('/Users/kofola/Downloads/mallet-2.0.7/bin/mallet',
    #         corpus=bg_corpus, id2word=dictionary, num_topics=200, optimize_interval=10)
    #     corpus_lda = lda[corpus]

    #     # compute pairwise similarity matrix and extract upper triangular
    #     res = np.zeros((len(corpus), len(corpus)))
    #     for i, par1 in enumerate(corpus_lda):
    #         for j, par2 in enumerate(corpus_lda):
    #             res[i, j] = matutils.cossim(par1, par2)
    #     flat = res[matutils.triu_indices(len(corpus), 1)]

    #     cor = np.corrcoef(flat, human_sim_vector)[0, 1]
    #     logging.info("LDA correlation coefficient is %s" % cor)
    #     self.assertTrue(cor > 0.35) 
开发者ID:largelymfs,项目名称:topical_word_embeddings,代码行数:58,代码来源:test_lee.py


注:本文中的gensim.matutils.cossim方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。