當前位置: 首頁>>代碼示例>>Python>>正文


Python webtext.words方法代碼示例

本文整理匯總了Python中nltk.corpus.webtext.words方法的典型用法代碼示例。如果您正苦於以下問題:Python webtext.words方法的具體用法?Python webtext.words怎麽用?Python webtext.words使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在nltk.corpus.webtext的用法示例。


在下文中一共展示了webtext.words方法的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: from_words

# 需要導入模塊: from nltk.corpus import webtext [as 別名]
# 或者: from nltk.corpus.webtext import words [as 別名]
def from_words(cls, words, window_size=2):
        """Construct a BigramCollocationFinder for all bigrams in the given
        sequence.  When window_size > 2, count non-contiguous bigrams, in the
        style of Church and Hanks's (1990) association ratio.
        """
        wfd = FreqDist()
        bfd = FreqDist()

        if window_size < 2:
            raise ValueError("Specify window_size at least 2")

        for window in ngrams(words, window_size, pad_right=True):
            w1 = window[0]
            if w1 is None:
                continue
            wfd[w1] += 1
            for w2 in window[1:]:
                if w2 is not None:
                    bfd[(w1, w2)] += 1
        return cls(wfd, bfd, window_size=window_size) 
開發者ID:Thejas-1,項目名稱:Price-Comparator,代碼行數:22,代碼來源:collocations.py

示例2: from_words

# 需要導入模塊: from nltk.corpus import webtext [as 別名]
# 或者: from nltk.corpus.webtext import words [as 別名]
def from_words(cls, words, window_size=2):
        """Construct a BigramCollocationFinder for all bigrams in the given
        sequence.  When window_size > 2, count non-contiguous bigrams, in the
        style of Church and Hanks's (1990) association ratio.
        """
        wfd = FreqDist()
        bfd = FreqDist()

        if window_size < 2:
            raise ValueError("Specify window_size at least 2")

        for window in ngrams(words, window_size, pad_right=True):
            w1 = window[0]
            wfd[w1] += 1
            for w2 in window[1:]:
                if w2 is not None:
                    bfd[(w1, w2)] += 1
        return cls(wfd, bfd, window_size=window_size) 
開發者ID:EastonLee,項目名稱:FancyWord,代碼行數:20,代碼來源:collocations.py

示例3: _ngram_freqdist

# 需要導入模塊: from nltk.corpus import webtext [as 別名]
# 或者: from nltk.corpus.webtext import words [as 別名]
def _ngram_freqdist(words, n):
        return FreqDist(tuple(words[i:i + n]) for i in range(len(words) - 1)) 
開發者ID:Thejas-1,項目名稱:Price-Comparator,代碼行數:4,代碼來源:collocations.py

示例4: __init__

# 需要導入模塊: from nltk.corpus import webtext [as 別名]
# 或者: from nltk.corpus.webtext import words [as 別名]
def __init__(self, word_fd, bigram_fd, window_size=2):
        """Construct a BigramCollocationFinder, given FreqDists for
        appearances of words and (possibly non-contiguous) bigrams.
        """
        AbstractCollocationFinder.__init__(self, word_fd, bigram_fd)
        self.window_size = window_size 
開發者ID:Thejas-1,項目名稱:Price-Comparator,代碼行數:8,代碼來源:collocations.py

示例5: demo

# 需要導入模塊: from nltk.corpus import webtext [as 別名]
# 或者: from nltk.corpus.webtext import words [as 別名]
def demo(scorer=None, compare_scorer=None):
    """Finds bigram collocations in the files of the WebText corpus."""
    from nltk.metrics import BigramAssocMeasures, spearman_correlation, ranks_from_scores

    if scorer is None:
        scorer = BigramAssocMeasures.likelihood_ratio
    if compare_scorer is None:
        compare_scorer = BigramAssocMeasures.raw_freq

    from nltk.corpus import stopwords, webtext

    ignored_words = stopwords.words('english')
    word_filter = lambda w: len(w) < 3 or w.lower() in ignored_words

    for file in webtext.fileids():
        words = [word.lower()
                 for word in webtext.words(file)]

        cf = BigramCollocationFinder.from_words(words)
        cf.apply_freq_filter(3)
        cf.apply_word_filter(word_filter)

        corr = spearman_correlation(ranks_from_scores(cf.score_ngrams(scorer)),
                                    ranks_from_scores(cf.score_ngrams(compare_scorer)))
        print(file)
        print('\t', [' '.join(tup) for tup in cf.nbest(scorer, 15)])
        print('\t Correlation to %s: %0.4f' % (compare_scorer.__name__, corr))

# Slows down loading too much
# bigram_measures = BigramAssocMeasures()
# trigram_measures = TrigramAssocMeasures() 
開發者ID:Thejas-1,項目名稱:Price-Comparator,代碼行數:33,代碼來源:collocations.py

示例6: _ngram_freqdist

# 需要導入模塊: from nltk.corpus import webtext [as 別名]
# 或者: from nltk.corpus.webtext import words [as 別名]
def _ngram_freqdist(words, n):
        return FreqDist(tuple(words[i:i+n]) for i in range(len(words)-1)) 
開發者ID:EastonLee,項目名稱:FancyWord,代碼行數:4,代碼來源:collocations.py

示例7: demo

# 需要導入模塊: from nltk.corpus import webtext [as 別名]
# 或者: from nltk.corpus.webtext import words [as 別名]
def demo(scorer=None, compare_scorer=None):
    """Finds bigram collocations in the files of the WebText corpus."""
    from nltk.metrics import BigramAssocMeasures, spearman_correlation, ranks_from_scores

    if scorer is None:
        scorer = BigramAssocMeasures.likelihood_ratio
    if compare_scorer is None:
        compare_scorer = BigramAssocMeasures.raw_freq

    from nltk.corpus import stopwords, webtext

    ignored_words = stopwords.words('english')
    word_filter = lambda w: len(w) < 3 or w.lower() in ignored_words

    for file in webtext.fileids():
        words = [word.lower()
                 for word in webtext.words(file)]

        cf = BigramCollocationFinder.from_words(words)
        cf.apply_freq_filter(3)
        cf.apply_word_filter(word_filter)

        print(file)
        print('\t', [' '.join(tup) for tup in cf.nbest(scorer, 15)])
        print('\t Correlation to %s: %0.4f' % (compare_scorer.__name__,
                                               spearman_correlation(
                                                   ranks_from_scores(cf.score_ngrams(scorer)),
                                                   ranks_from_scores(cf.score_ngrams(compare_scorer)))))

# Slows down loading too much
# bigram_measures = BigramAssocMeasures()
# trigram_measures = TrigramAssocMeasures() 
開發者ID:EastonLee,項目名稱:FancyWord,代碼行數:34,代碼來源:collocations.py


注:本文中的nltk.corpus.webtext.words方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。