本文整理汇总了Python中nltk.metrics.BigramAssocMeasures.pmi方法的典型用法代码示例。如果您正苦于以下问题:Python BigramAssocMeasures.pmi方法的具体用法?Python BigramAssocMeasures.pmi怎么用?Python BigramAssocMeasures.pmi使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.metrics.BigramAssocMeasures
的用法示例。
在下文中一共展示了BigramAssocMeasures.pmi方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_bigram2
# 需要导入模块: from nltk.metrics import BigramAssocMeasures [as 别名]
# 或者: from nltk.metrics.BigramAssocMeasures import pmi [as 别名]
def test_bigram2(self):
sent = 'this this is is a a test test'.split()
b = BigramCollocationFinder.from_words(sent)
#python 2.6 does not have assertItemsEqual or assertListEqual
self.assertEqual(
sorted(b.ngram_fd.items()),
sorted([(('a', 'a'), 1), (('a', 'test'), 1), (('is', 'a'), 1), (('is', 'is'), 1), (('test', 'test'), 1), (('this', 'is'), 1), (('this', 'this'), 1)])
)
self.assertEqual(
sorted(b.word_fd.items()),
sorted([('a', 2), ('is', 2), ('test', 2), ('this', 2)])
)
self.assertTrue(len(sent) == sum(b.word_fd.values()) == sum(b.ngram_fd.values()) + 1)
self.assertTrue(close_enough(
sorted(b.score_ngrams(BigramAssocMeasures.pmi)),
sorted([(('a', 'a'), 1.0), (('a', 'test'), 1.0), (('is', 'a'), 1.0), (('is', 'is'), 1.0), (('test', 'test'), 1.0), (('this', 'is'), 1.0), (('this', 'this'), 1.0)])
))
示例2: test_bigram3
# 需要导入模块: from nltk.metrics import BigramAssocMeasures [as 别名]
# 或者: from nltk.metrics.BigramAssocMeasures import pmi [as 别名]
def test_bigram3(self):
sent = 'this this is is a a test test'.split()
b = BigramCollocationFinder.from_words(sent, window_size=3)
self.assertEqual(
sorted(b.ngram_fd.items()),
sorted([(('a', 'test'), 3), (('is', 'a'), 3), (('this', 'is'), 3), (('a', 'a'), 1), (('is', 'is'), 1), (('test', 'test'), 1), (('this', 'this'), 1)])
)
self.assertEqual(
sorted(b.word_fd.items()),
sorted([('a', 2), ('is', 2), ('test', 2), ('this', 2)])
)
self.assertTrue(len(sent) == sum(b.word_fd.values()) == (sum(b.ngram_fd.values()) + 2 + 1) / 2.0)
self.assertTrue(close_enough(
sorted(b.score_ngrams(BigramAssocMeasures.pmi)),
sorted([(('a', 'test'), 1.584962500721156), (('is', 'a'), 1.584962500721156), (('this', 'is'), 1.584962500721156), (('a', 'a'), 0.0), (('is', 'is'), 0.0), (('test', 'test'), 0.0), (('this', 'this'), 0.0)])
))
示例3: test_bigram5
# 需要导入模块: from nltk.metrics import BigramAssocMeasures [as 别名]
# 或者: from nltk.metrics.BigramAssocMeasures import pmi [as 别名]
def test_bigram5(self):
sent = 'this this is is a a test test'.split()
b = BigramCollocationFinder.from_words(sent, window_size=5)
self.assertEqual(
sorted(b.ngram_fd.items()),
sorted([(('a', 'test'), 4), (('is', 'a'), 4), (('this', 'is'), 4), (('is', 'test'), 3), (('this', 'a'), 3), (('a', 'a'), 1), (('is', 'is'), 1), (('test', 'test'), 1), (('this', 'this'), 1)])
)
self.assertEqual(
sorted(b.word_fd.items()),
sorted([('a', 2), ('is', 2), ('test', 2), ('this', 2)])
)
self.assertTrue(len(sent) == sum(b.word_fd.values()) == (sum(b.ngram_fd.values()) + 4 + 3 + 2 + 1) / 4.0)
self.assertTrue(close_enough(
sorted(b.score_ngrams(BigramAssocMeasures.pmi)),
sorted([(('a', 'test'), 1.0), (('is', 'a'), 1.0), (('this', 'is'), 1.0), (('is', 'test'), 0.5849625007211562), (('this', 'a'), 0.5849625007211562), (('a', 'a'), -1.0), (('is', 'is'), -1.0), (('test', 'test'), -1.0), (('this', 'this'), -1.0)])
))