当前位置: 首页>>代码示例>>Python>>正文


Python corpus.Corpus类代码示例

本文整理汇总了Python中orangecontrib.text.corpus.Corpus的典型用法代码示例。如果您正苦于以下问题:Python Corpus类的具体用法?Python Corpus怎么用?Python Corpus使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Corpus类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: open_file

    def open_file(self, path=None, data=None):
        self.closeContext()
        self.Error.clear()
        self.unused_attrs_model[:] = []
        self.used_attrs_model[:] = []
        if data:
            self.corpus = Corpus.from_table(data.domain, data)
        elif path:
            try:
                self.corpus = Corpus.from_file(path)
                self.corpus.name = os.path.splitext(os.path.basename(path))[0]
            except BaseException as err:
                self.Error.read_file(path, str(err))
        else:
            return

        self.update_info()
        self.used_attrs = list(self.corpus.text_features)
        if not self.corpus.text_features:
            self.Error.corpus_without_text_features()
            self.Outputs.corpus.send(None)
            return
        self.openContext(self.corpus)
        self.used_attrs_model.extend(self.used_attrs)
        self.unused_attrs_model.extend(
            [f for f in self.corpus.domain.metas
             if f.is_string and f not in self.used_attrs_model])
开发者ID:s-alexey,项目名称:orange3-text,代码行数:27,代码来源:owcorpus.py

示例2: test_compute_values

    def test_compute_values(self):
        corpus = Corpus.from_file('deerwester')
        vect = BowVectorizer()

        bow = vect.transform(corpus)
        computed = Corpus.from_table(bow.domain, corpus)

        self.assertEqual(bow.domain, computed.domain)
        self.assertEqual((bow.X != computed.X).nnz, 0)
开发者ID:s-alexey,项目名称:orange3-text,代码行数:9,代码来源:test_bowvectorizer.py

示例3: test_infer_text_features

    def test_infer_text_features(self):
        c = Corpus.from_file('friends-transcripts')
        tf = c.text_features
        self.assertEqual(len(tf), 1)
        self.assertEqual(tf[0].name, 'Quote')

        c = Corpus.from_file('deerwester')
        tf = c.text_features
        self.assertEqual(len(tf), 1)
        self.assertEqual(tf[0].name, 'Text')
开发者ID:s-alexey,项目名称:orange3-text,代码行数:10,代码来源:test_corpus.py

示例4: test_compute_values_to_different_domain

    def test_compute_values_to_different_domain(self):
        source = Corpus.from_file('deerwester')
        destination = Corpus.from_file('book-excerpts')

        self.assertFalse(source.domain.attributes)
        self.assertFalse(destination.domain.attributes)

        bow = BowVectorizer().transform(source)
        computed = destination.transform(bow.domain)

        self.assertEqual(bow.domain.attributes, computed.domain.attributes)
开发者ID:s-alexey,项目名称:orange3-text,代码行数:11,代码来源:test_bowvectorizer.py

示例5: test_corpus_from_file

    def test_corpus_from_file(self):
        c = Corpus.from_file('book-excerpts')
        self.assertEqual(len(c), 140)
        self.assertEqual(len(c.domain), 1)
        self.assertEqual(len(c.domain.metas), 1)
        self.assertEqual(c.metas.shape, (140, 1))

        c = Corpus.from_file('deerwester')
        self.assertEqual(len(c), 9)
        self.assertEqual(len(c.domain), 1)
        self.assertEqual(len(c.domain.metas), 1)
        self.assertEqual(c.metas.shape, (9, 1))
开发者ID:s-alexey,项目名称:orange3-text,代码行数:12,代码来源:test_corpus.py

示例6: test_corpus_from_file_just_text

    def test_corpus_from_file_just_text(self):
        c = Corpus.from_file(os.path.join(DATASET_PATH, 'deerwester.tab'))

        self.assertEqual(len(c), 9)
        self.assertEqual(len(c.domain), 0)
        self.assertEqual(len(c.domain.metas), 1)
        self.assertEqual(c.metas.shape, (9, 1))
开发者ID:kafom,项目名称:orange3-text,代码行数:7,代码来源:test_corpus.py

示例7: test_corpus_from_file

    def test_corpus_from_file(self):
        c = Corpus.from_file(os.path.join(DATASET_PATH, 'bookexcerpts.txt'))
        self.assertEqual(len(c), 140)

        self.assertEqual(len(c.domain), 0)
        self.assertEqual(len(c.domain.metas), 2)
        self.assertEqual(c.metas.shape, (140, 2))
开发者ID:kernc,项目名称:orange3-text,代码行数:7,代码来源:test_corpus.py

示例8: main

def main():
    from Orange.data import Table, Domain, ContinuousVariable, StringVariable

    words = 'hey~mr. tallyman tally~me banana daylight come and me wanna go home'
    words = np.array([w.replace('~', ' ') for w in words.split()], dtype=object, ndmin=2).T
    weights = np.random.random((len(words), 1))

    data = np.zeros((len(words), 0))
    metas = []
    for i, w in enumerate(weights.T):
        data = np.column_stack((data, words, w))
        metas = metas + [StringVariable('Topic' + str(i)),
                         ContinuousVariable('weights')]
    domain = Domain([], metas=metas)
    table = Table.from_numpy(domain,
                             X=np.zeros((len(words), 0)),
                             metas=data)
    app = QtGui.QApplication([''])
    w = OWWordCloud()
    w.on_topics_change(table)
    domain = Domain([], metas=[StringVariable('text')])
    data = Corpus.from_numpy(domain, X=np.zeros((1, 0)), metas=np.array([[' '.join(words.flat)]]))
    w.on_corpus_change(data)
    w.show()
    app.exec()
开发者ID:RachitKansal,项目名称:orange3-text,代码行数:25,代码来源:owwordcloud.py

示例9: test_transform

    def test_transform(self):
        vect = BowVectorizer()
        corpus = Corpus.from_file('deerwester')

        result = vect.transform(corpus)
        self.assertIsInstance(result, Corpus)
        self.assertEqual(len(result.domain), 43)
开发者ID:s-alexey,项目名称:orange3-text,代码行数:7,代码来源:test_bowvectorizer.py

示例10: test_documents

    def test_documents(self):
        c = Corpus.from_file('book-excerpts')
        docs = c.documents
        types = set(type(i) for i in docs)

        self.assertEqual(len(docs), len(c))
        self.assertEqual(len(types), 1)
        self.assertIn(str, types)
开发者ID:s-alexey,项目名称:orange3-text,代码行数:8,代码来源:test_corpus.py

示例11: test_corpus_not_eq

    def test_corpus_not_eq(self):
        c = Corpus.from_file('book-excerpts')
        n_doc = c.X.shape[0]

        c2 = Corpus(c.domain, c.X, c.Y, c.metas, c.W, [])
        self.assertNotEqual(c, c2)

        c2 = Corpus(c.domain, np.ones((n_doc, 1)), c.Y, c.metas, c.W, c.text_features)
        self.assertNotEqual(c, c2)

        c2 = Corpus(c.domain, c.X, np.ones((n_doc, 1)), c.metas, c.W, c.text_features)
        self.assertNotEqual(c, c2)

        broken_metas = np.copy(c.metas)
        broken_metas[0, 0] = ''
        c2 = Corpus(c.domain, c.X, c.Y, broken_metas, c.W, c.text_features)
        self.assertNotEqual(c, c2)

        new_meta = [StringVariable('text2')]
        broken_domain = Domain(c.domain.attributes, c.domain.class_var, new_meta)
        c2 = Corpus(broken_domain, c.X, c.Y, c.metas, c.W, new_meta)
        self.assertNotEqual(c, c2)

        c2 = c.copy()
        c2.ngram_range = (2, 4)
        self.assertNotEqual(c, c2)
开发者ID:s-alexey,项目名称:orange3-text,代码行数:26,代码来源:test_corpus.py

示例12: set_data

 def set_data(self, data=None):
     self.reset_widget()
     self.corpus = data
     if data is not None:
         if not isinstance(data, Corpus):
             self.corpus = Corpus.from_table(data.domain, data)
         self.load_features()
         self.regenerate_docs()
     self.commit()
开发者ID:nikicc,项目名称:orange3-text,代码行数:9,代码来源:owcorpusviewer.py

示例13: test_create_bow

    def test_create_bow(self):
        corpus = Corpus.from_file('deerwester')
        bag_of_words = self.bow(corpus, use_tfidf=True)

        self.assertIsNotNone(bag_of_words.X)
        self.assertEqual(9, bag_of_words.X.shape[0])
        self.assertEqual(42, bag_of_words.X.shape[1])
        self.assertEqual(self.progress_callbacks, 4)
        self.assertEqual(self.error_callbacks, 0)
开发者ID:david-novak,项目名称:orange3-text,代码行数:9,代码来源:test_bag_of_words.py

示例14: test_empty_corpus

 def test_empty_corpus(self):
     """
     Empty data.
     GH-247
     """
     corpus = Corpus.from_file("deerwester")[:0]
     vect = BowVectorizer(norm=BowVectorizer.L1)
     out = vect.transform(corpus)
     self.assertEqual(out, corpus)
开发者ID:s-alexey,项目名称:orange3-text,代码行数:9,代码来源:test_bowvectorizer.py

示例15: test_POSTagger

 def test_POSTagger(self):
     corpus = Corpus.from_file('deerwester')
     tagger = tag.AveragedPerceptronTagger()
     result = tagger.tag_corpus(corpus)
     self.assertTrue(hasattr(result, 'pos_tags'))
     # for token in itertools.chain(*result.tokens):
     #     self.assertRegexpMatches(token, '[a-z]+_[A-Z]+')
     for tokens, tags in zip(result.tokens, result.pos_tags):
         self.assertEqual(len(tokens), len(tags))
开发者ID:s-alexey,项目名称:orange3-text,代码行数:9,代码来源:test_tags.py


注:本文中的orangecontrib.text.corpus.Corpus类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。