當前位置: 首頁>>代碼示例>>Python>>正文


Python BigramCollocationFinder.from_words方法代碼示例

本文整理匯總了Python中nltk.collocations.BigramCollocationFinder.from_words方法的典型用法代碼示例。如果您正苦於以下問題:Python BigramCollocationFinder.from_words方法的具體用法?Python BigramCollocationFinder.from_words怎麽用?Python BigramCollocationFinder.from_words使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在nltk.collocations.BigramCollocationFinder的用法示例。


在下文中一共展示了BigramCollocationFinder.from_words方法的13個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: collocations

# 需要導入模塊: from nltk.collocations import BigramCollocationFinder [as 別名]
# 或者: from nltk.collocations.BigramCollocationFinder import from_words [as 別名]
def collocations(self, num=20, window_size=2):
        """
        Print collocations derived from the text, ignoring stopwords.

        :seealso: find_collocations
        :param num: The maximum number of collocations to print.
        :type num: int
        :param window_size: The number of tokens spanned by a collocation (default=2)
        :type window_size: int
        """
        if not ('_collocations' in self.__dict__ and self._num == num and self._window_size == window_size):
            self._num = num
            self._window_size = window_size

            #print("Building collocations list")
            from nltk.corpus import stopwords
            ignored_words = stopwords.words('english')
            finder = BigramCollocationFinder.from_words(self.tokens, window_size)
            finder.apply_freq_filter(2)
            finder.apply_word_filter(lambda w: len(w) < 3 or w.lower() in ignored_words)
            bigram_measures = BigramAssocMeasures()
            self._collocations = finder.nbest(bigram_measures.likelihood_ratio, num)
        colloc_strings = [w1+' '+w2 for w1, w2 in self._collocations]
        print(tokenwrap(colloc_strings, separator="; ")) 
開發者ID:rafasashi,項目名稱:razzy-spinner,代碼行數:26,代碼來源:text.py

示例2: test_bigram2

# 需要導入模塊: from nltk.collocations import BigramCollocationFinder [as 別名]
# 或者: from nltk.collocations.BigramCollocationFinder import from_words [as 別名]
def test_bigram2(self):
        sent = 'this this is is a a test test'.split()

        b = BigramCollocationFinder.from_words(sent)

        #python 2.6 does not have assertItemsEqual or assertListEqual
        self.assertEqual(
            sorted(b.ngram_fd.items()),
            sorted([(('a', 'a'), 1), (('a', 'test'), 1), (('is', 'a'), 1), (('is', 'is'), 1), (('test', 'test'), 1), (('this', 'is'), 1), (('this', 'this'), 1)])
        )
        self.assertEqual(
            sorted(b.word_fd.items()),
            sorted([('a', 2), ('is', 2), ('test', 2), ('this', 2)])
        )
        self.assertTrue(len(sent) == sum(b.word_fd.values()) == sum(b.ngram_fd.values()) + 1)
        self.assertTrue(close_enough(
            sorted(b.score_ngrams(BigramAssocMeasures.pmi)),
            sorted([(('a', 'a'), 1.0), (('a', 'test'), 1.0), (('is', 'a'), 1.0), (('is', 'is'), 1.0), (('test', 'test'), 1.0), (('this', 'is'), 1.0), (('this', 'this'), 1.0)])
        )) 
開發者ID:rafasashi,項目名稱:razzy-spinner,代碼行數:21,代碼來源:test_collocations.py

示例3: test_bigram3

# 需要導入模塊: from nltk.collocations import BigramCollocationFinder [as 別名]
# 或者: from nltk.collocations.BigramCollocationFinder import from_words [as 別名]
def test_bigram3(self):
        sent = 'this this is is a a test test'.split()

        b = BigramCollocationFinder.from_words(sent, window_size=3)
        self.assertEqual(
            sorted(b.ngram_fd.items()),
            sorted([(('a', 'test'), 3), (('is', 'a'), 3), (('this', 'is'), 3), (('a', 'a'), 1), (('is', 'is'), 1), (('test', 'test'), 1), (('this', 'this'), 1)])
        )
        self.assertEqual(
            sorted(b.word_fd.items()),
            sorted([('a', 2), ('is', 2), ('test', 2), ('this', 2)])
        )
        self.assertTrue(len(sent) == sum(b.word_fd.values()) == (sum(b.ngram_fd.values()) + 2 + 1) / 2.0)
        self.assertTrue(close_enough(
            sorted(b.score_ngrams(BigramAssocMeasures.pmi)),
            sorted([(('a', 'test'), 1.584962500721156), (('is', 'a'), 1.584962500721156), (('this', 'is'), 1.584962500721156), (('a', 'a'), 0.0), (('is', 'is'), 0.0), (('test', 'test'), 0.0), (('this', 'this'), 0.0)])
        )) 
開發者ID:rafasashi,項目名稱:razzy-spinner,代碼行數:19,代碼來源:test_collocations.py

示例4: test_bigram5

# 需要導入模塊: from nltk.collocations import BigramCollocationFinder [as 別名]
# 或者: from nltk.collocations.BigramCollocationFinder import from_words [as 別名]
def test_bigram5(self):
        sent = 'this this is is a a test test'.split()

        b = BigramCollocationFinder.from_words(sent, window_size=5)
        self.assertEqual(
            sorted(b.ngram_fd.items()),
            sorted([(('a', 'test'), 4), (('is', 'a'), 4), (('this', 'is'), 4), (('is', 'test'), 3), (('this', 'a'), 3), (('a', 'a'), 1), (('is', 'is'), 1), (('test', 'test'), 1), (('this', 'this'), 1)])
        )
        self.assertEqual(
            sorted(b.word_fd.items()),
            sorted([('a', 2), ('is', 2), ('test', 2), ('this', 2)])
        )
        self.assertTrue(len(sent) == sum(b.word_fd.values()) == (sum(b.ngram_fd.values()) + 4 + 3 + 2 + 1) / 4.0)
        self.assertTrue(close_enough(
            sorted(b.score_ngrams(BigramAssocMeasures.pmi)),
            sorted([(('a', 'test'), 1.0), (('is', 'a'), 1.0), (('this', 'is'), 1.0), (('is', 'test'), 0.5849625007211562), (('this', 'a'), 0.5849625007211562), (('a', 'a'), -1.0), (('is', 'is'), -1.0), (('test', 'test'), -1.0), (('this', 'this'), -1.0)])
        )) 
開發者ID:rafasashi,項目名稱:razzy-spinner,代碼行數:19,代碼來源:test_collocations.py

示例5: calculate_ngram_diversity

# 需要導入模塊: from nltk.collocations import BigramCollocationFinder [as 別名]
# 或者: from nltk.collocations.BigramCollocationFinder import from_words [as 別名]
def calculate_ngram_diversity(corpus):
    """
    Calculates unigram and bigram diversity

    Args:
        corpus: tokenized list of sentences sampled

    Returns:
        uni_diversity: distinct-1 score
        bi_diversity: distinct-2 score

    """
    bigram_finder = BigramCollocationFinder.from_words(corpus)
    bi_diversity = len(bigram_finder.ngram_fd) / bigram_finder.N

    dist = FreqDist(corpus)
    uni_diversity = len(dist) / len(corpus)

    return uni_diversity, bi_diversity 
開發者ID:HareeshBahuleyan,項目名稱:tf-var-attention,代碼行數:21,代碼來源:eval_utils.py

示例6: collocations

# 需要導入模塊: from nltk.collocations import BigramCollocationFinder [as 別名]
# 或者: from nltk.collocations.BigramCollocationFinder import from_words [as 別名]
def collocations(self, num=20, window_size=2):
        """
        Print collocations derived from the text, ignoring stopwords.

        :seealso: find_collocations
        :param num: The maximum number of collocations to print.
        :type num: int
        :param window_size: The number of tokens spanned by a collocation (default=2)
        :type window_size: int
        """
        if not ('_collocations' in self.__dict__ and self._num == num and self._window_size == window_size):
            self._num = num
            self._window_size = window_size

            print "Building collocations list"
            from nltk.corpus import stopwords
            ignored_words = stopwords.words('english')
            finder = BigramCollocationFinder.from_words(self.tokens, window_size)
            finder.apply_freq_filter(2)
            finder.apply_word_filter(lambda w: len(w) < 3 or w.lower() in ignored_words)
            bigram_measures = BigramAssocMeasures()
            self._collocations = finder.nbest(bigram_measures.likelihood_ratio, num)
        colloc_strings = [w1+' '+w2 for w1, w2 in self._collocations]
        print tokenwrap(colloc_strings, separator="; ") 
開發者ID:blackye,項目名稱:luscan-devel,代碼行數:26,代碼來源:text.py

示例7: cal_Distinct

# 需要導入模塊: from nltk.collocations import BigramCollocationFinder [as 別名]
# 或者: from nltk.collocations.BigramCollocationFinder import from_words [as 別名]
def cal_Distinct(corpus):
    """
    Calculates unigram and bigram diversity
    Args:
        corpus: tokenized list of sentences sampled
    Returns:
        uni_diversity: distinct-1 score
        bi_diversity: distinct-2 score
    """
    bigram_finder = BigramCollocationFinder.from_words(corpus)
    bi_diversity = len(bigram_finder.ngram_fd) / bigram_finder.N

    dist = FreqDist(corpus)
    uni_diversity = len(dist) / len(corpus)

    return uni_diversity, bi_diversity 
開發者ID:gmftbyGMFTBY,項目名稱:MultiTurnDialogZoo,代碼行數:18,代碼來源:metric.py

示例8: bigrams

# 需要導入模塊: from nltk.collocations import BigramCollocationFinder [as 別名]
# 或者: from nltk.collocations.BigramCollocationFinder import from_words [as 別名]
def bigrams(text, score_fn=BigramAssocMeasures.chi_sq, n=500):
    '''Find the best n bigrams of a text by means of a give measure.'''
    words = tokenize(text)
    bigram_finder = BigramCollocationFinder.from_words(words)
    bigrams = bigram_finder.nbest(score_fn, n)
    return dict([(ngram, True) for ngram in itertools.chain(words, bigrams)]) 
開發者ID:stathius,項目名稱:yenlp,代碼行數:8,代碼來源:load_samples.py

示例9: stopword_filtered_bigrams

# 需要導入模塊: from nltk.collocations import BigramCollocationFinder [as 別名]
# 或者: from nltk.collocations.BigramCollocationFinder import from_words [as 別名]
def stopword_filtered_bigrams(text, score_fn=BigramAssocMeasures.chi_sq, n=500):
    '''Removes the stopwords and computes the best bigrams'''
    stopset = set(stopwords.words('english'))
    words = [word for word in tokenize(text) if word not in stopset]
    bigram_finder = BigramCollocationFinder.from_words(words)
    bigrams = bigram_finder.nbest(score_fn, n)
    return dict([(ngram, True) for ngram in itertools.chain(words, bigrams)]) 
開發者ID:stathius,項目名稱:yenlp,代碼行數:9,代碼來源:load_samples.py

示例10: collocation_list

# 需要導入模塊: from nltk.collocations import BigramCollocationFinder [as 別名]
# 或者: from nltk.collocations.BigramCollocationFinder import from_words [as 別名]
def collocation_list(self, num=20, window_size=2):
        """
        Return collocations derived from the text, ignoring stopwords.

        :param num: The maximum number of collocations to return.
        :type num: int
        :param window_size: The number of tokens spanned by a collocation (default=2)
        :type window_size: int
        """
        if not (
            "_collocations" in self.__dict__
            and self._num == num
            and self._window_size == window_size
        ):
            self._num = num
            self._window_size = window_size

            # print("Building collocations list")
            from nltk.corpus import stopwords

            ignored_words = stopwords.words("english")
            finder = BigramCollocationFinder.from_words(self.tokens, window_size)
            finder.apply_freq_filter(2)
            finder.apply_word_filter(lambda w: len(w) < 3 or w.lower() in ignored_words)
            bigram_measures = BigramAssocMeasures()
            self._collocations = finder.nbest(bigram_measures.likelihood_ratio, num)
        return [w1 + " " + w2 for w1, w2 in self._collocations] 
開發者ID:V1EngineeringInc,項目名稱:V1EngineeringInc-Docs,代碼行數:29,代碼來源:text.py

示例11: test_bigram2

# 需要導入模塊: from nltk.collocations import BigramCollocationFinder [as 別名]
# 或者: from nltk.collocations.BigramCollocationFinder import from_words [as 別名]
def test_bigram2(self):
        sent = 'this this is is a a test test'.split()

        b = BigramCollocationFinder.from_words(sent)

        # python 2.6 does not have assertItemsEqual or assertListEqual
        self.assertEqual(
            sorted(b.ngram_fd.items()),
            sorted(
                [
                    (('a', 'a'), 1),
                    (('a', 'test'), 1),
                    (('is', 'a'), 1),
                    (('is', 'is'), 1),
                    (('test', 'test'), 1),
                    (('this', 'is'), 1),
                    (('this', 'this'), 1),
                ]
            ),
        )
        self.assertEqual(
            sorted(b.word_fd.items()),
            sorted([('a', 2), ('is', 2), ('test', 2), ('this', 2)]),
        )
        self.assertTrue(
            len(sent) == sum(b.word_fd.values()) == sum(b.ngram_fd.values()) + 1
        )
        self.assertTrue(
            close_enough(
                sorted(b.score_ngrams(BigramAssocMeasures.pmi)),
                sorted(
                    [
                        (('a', 'a'), 1.0),
                        (('a', 'test'), 1.0),
                        (('is', 'a'), 1.0),
                        (('is', 'is'), 1.0),
                        (('test', 'test'), 1.0),
                        (('this', 'is'), 1.0),
                        (('this', 'this'), 1.0),
                    ]
                ),
            )
        ) 
開發者ID:V1EngineeringInc,項目名稱:V1EngineeringInc-Docs,代碼行數:45,代碼來源:test_collocations.py

示例12: test_bigram3

# 需要導入模塊: from nltk.collocations import BigramCollocationFinder [as 別名]
# 或者: from nltk.collocations.BigramCollocationFinder import from_words [as 別名]
def test_bigram3(self):
        sent = 'this this is is a a test test'.split()

        b = BigramCollocationFinder.from_words(sent, window_size=3)
        self.assertEqual(
            sorted(b.ngram_fd.items()),
            sorted(
                [
                    (('a', 'test'), 3),
                    (('is', 'a'), 3),
                    (('this', 'is'), 3),
                    (('a', 'a'), 1),
                    (('is', 'is'), 1),
                    (('test', 'test'), 1),
                    (('this', 'this'), 1),
                ]
            ),
        )
        self.assertEqual(
            sorted(b.word_fd.items()),
            sorted([('a', 2), ('is', 2), ('test', 2), ('this', 2)]),
        )
        self.assertTrue(
            len(sent)
            == sum(b.word_fd.values())
            == (sum(b.ngram_fd.values()) + 2 + 1) / 2.0
        )
        self.assertTrue(
            close_enough(
                sorted(b.score_ngrams(BigramAssocMeasures.pmi)),
                sorted(
                    [
                        (('a', 'test'), 1.584962500721156),
                        (('is', 'a'), 1.584962500721156),
                        (('this', 'is'), 1.584962500721156),
                        (('a', 'a'), 0.0),
                        (('is', 'is'), 0.0),
                        (('test', 'test'), 0.0),
                        (('this', 'this'), 0.0),
                    ]
                ),
            )
        ) 
開發者ID:V1EngineeringInc,項目名稱:V1EngineeringInc-Docs,代碼行數:45,代碼來源:test_collocations.py

示例13: test_bigram5

# 需要導入模塊: from nltk.collocations import BigramCollocationFinder [as 別名]
# 或者: from nltk.collocations.BigramCollocationFinder import from_words [as 別名]
def test_bigram5(self):
        sent = 'this this is is a a test test'.split()

        b = BigramCollocationFinder.from_words(sent, window_size=5)
        self.assertEqual(
            sorted(b.ngram_fd.items()),
            sorted(
                [
                    (('a', 'test'), 4),
                    (('is', 'a'), 4),
                    (('this', 'is'), 4),
                    (('is', 'test'), 3),
                    (('this', 'a'), 3),
                    (('a', 'a'), 1),
                    (('is', 'is'), 1),
                    (('test', 'test'), 1),
                    (('this', 'this'), 1),
                ]
            ),
        )
        self.assertEqual(
            sorted(b.word_fd.items()),
            sorted([('a', 2), ('is', 2), ('test', 2), ('this', 2)]),
        )
        self.assertTrue(
            len(sent)
            == sum(b.word_fd.values())
            == (sum(b.ngram_fd.values()) + 4 + 3 + 2 + 1) / 4.0
        )
        self.assertTrue(
            close_enough(
                sorted(b.score_ngrams(BigramAssocMeasures.pmi)),
                sorted(
                    [
                        (('a', 'test'), 1.0),
                        (('is', 'a'), 1.0),
                        (('this', 'is'), 1.0),
                        (('is', 'test'), 0.5849625007211562),
                        (('this', 'a'), 0.5849625007211562),
                        (('a', 'a'), -1.0),
                        (('is', 'is'), -1.0),
                        (('test', 'test'), -1.0),
                        (('this', 'this'), -1.0),
                    ]
                ),
            )
        ) 
開發者ID:V1EngineeringInc,項目名稱:V1EngineeringInc-Docs,代碼行數:49,代碼來源:test_collocations.py


注:本文中的nltk.collocations.BigramCollocationFinder.from_words方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。