Python Document.parse_from_tokens方法代码示例

本文整理汇总了Python中document.Document.parse_from_tokens方法的典型用法代码示例。如果您正苦于以下问题：Python Document.parse_from_tokens方法的具体用法？Python Document.parse_from_tokens怎么用？Python Document.parse_from_tokens使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类document.Document的用法示例。

在下文中一共展示了Document.parse_from_tokens方法的1个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: DocumentTest

# 需要导入模块: from document import Document [as 别名]
# 或者: from document.Document import parse_from_tokens [as 别名]
class DocumentTest(unittest.TestCase):

    def setUp(self):
        self.document = Document(20)
        self.vocabulary = Vocabulary()
        self.vocabulary.load("../testdata/vocabulary.dat")

        self.model = Model(20)
        self.model.load('../testdata/lda_model')

        self.doc_tokens = ['macbook', 'ipad',  # exist in vocabulary and model
                'mac os x', 'chrome',  # only exist in vocabulary
                'nokia', 'null']  # inexistent

    def test_parse_from_tokens(self):
        # initialize document during lda training.
        self.document.parse_from_tokens(
                self.doc_tokens, random, self.vocabulary)

        self.assertEqual(4, self.document.num_words())
        topic_hist = self.document.doc_topic_hist
        for i in xrange(len(topic_hist.non_zeros) - 1):
            self.assertGreaterEqual(topic_hist.non_zeros[i].count,
                    topic_hist.non_zeros[i + 1].count)
        logging.info(str(self.document))

        # initialize document during lda inference.
        self.document.parse_from_tokens(
                self.doc_tokens, random, self.vocabulary, self.model)
        self.assertEqual(2, self.document.num_words())
        for i in xrange(len(topic_hist.non_zeros) - 1):
            self.assertGreaterEqual(topic_hist.non_zeros[i].count,
                    topic_hist.non_zeros[i + 1].count)
        # print str(self.document)

    def test_serialize_and_parse(self):
        self.document.parse_from_tokens(
                self.doc_tokens, random, self.vocabulary)

        test_doc = Document(20)
        test_doc.parse_from_string(self.document.serialize_to_string())

        self.assertEqual(self.document.num_words(), test_doc.num_words())
        self.assertEqual(str(self.document), str(test_doc))

    def test_increase_decrease_topic(self):
        self.document.parse_from_tokens(
                self.doc_tokens, random, self.vocabulary, self.model)
        self.document.increase_topic(0, 5)
        self.document.increase_topic(4, 5)
        self.document.increase_topic(9, 5)
        topic_hist = self.document.doc_topic_hist
        for i in xrange(len(topic_hist.non_zeros) - 1):
            self.assertGreaterEqual(topic_hist.non_zeros[i].count,
                    topic_hist.non_zeros[i + 1].count)

        self.document.decrease_topic(4, 4)
        self.document.decrease_topic(9, 3)
        for i in xrange(len(topic_hist.non_zeros) - 1):
            self.assertGreaterEqual(topic_hist.non_zeros[i].count,
                    topic_hist.non_zeros[i + 1].count)

开发者ID:JackieXie168，项目名称:mltk，代码行数:63，代码来源:document_test.py

注：本文中的document.Document.parse_from_tokens方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。