本文整理汇总了Python中document.Document.parse_from_tokens方法的典型用法代码示例。如果您正苦于以下问题:Python Document.parse_from_tokens方法的具体用法?Python Document.parse_from_tokens怎么用?Python Document.parse_from_tokens使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类document.Document
的用法示例。
在下文中一共展示了Document.parse_from_tokens方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: DocumentTest
# 需要导入模块: from document import Document [as 别名]
# 或者: from document.Document import parse_from_tokens [as 别名]
class DocumentTest(unittest.TestCase):
def setUp(self):
self.document = Document(20)
self.vocabulary = Vocabulary()
self.vocabulary.load("../testdata/vocabulary.dat")
self.model = Model(20)
self.model.load('../testdata/lda_model')
self.doc_tokens = ['macbook', 'ipad', # exist in vocabulary and model
'mac os x', 'chrome', # only exist in vocabulary
'nokia', 'null'] # inexistent
def test_parse_from_tokens(self):
# initialize document during lda training.
self.document.parse_from_tokens(
self.doc_tokens, random, self.vocabulary)
self.assertEqual(4, self.document.num_words())
topic_hist = self.document.doc_topic_hist
for i in xrange(len(topic_hist.non_zeros) - 1):
self.assertGreaterEqual(topic_hist.non_zeros[i].count,
topic_hist.non_zeros[i + 1].count)
logging.info(str(self.document))
# initialize document during lda inference.
self.document.parse_from_tokens(
self.doc_tokens, random, self.vocabulary, self.model)
self.assertEqual(2, self.document.num_words())
for i in xrange(len(topic_hist.non_zeros) - 1):
self.assertGreaterEqual(topic_hist.non_zeros[i].count,
topic_hist.non_zeros[i + 1].count)
# print str(self.document)
def test_serialize_and_parse(self):
self.document.parse_from_tokens(
self.doc_tokens, random, self.vocabulary)
test_doc = Document(20)
test_doc.parse_from_string(self.document.serialize_to_string())
self.assertEqual(self.document.num_words(), test_doc.num_words())
self.assertEqual(str(self.document), str(test_doc))
def test_increase_decrease_topic(self):
self.document.parse_from_tokens(
self.doc_tokens, random, self.vocabulary, self.model)
self.document.increase_topic(0, 5)
self.document.increase_topic(4, 5)
self.document.increase_topic(9, 5)
topic_hist = self.document.doc_topic_hist
for i in xrange(len(topic_hist.non_zeros) - 1):
self.assertGreaterEqual(topic_hist.non_zeros[i].count,
topic_hist.non_zeros[i + 1].count)
self.document.decrease_topic(4, 4)
self.document.decrease_topic(9, 3)
for i in xrange(len(topic_hist.non_zeros) - 1):
self.assertGreaterEqual(topic_hist.non_zeros[i].count,
topic_hist.non_zeros[i + 1].count)