当前位置: 首页>>代码示例>>Python>>正文


Python Corpus.from_file方法代码示例

本文整理汇总了Python中orangecontrib.text.corpus.Corpus.from_file方法的典型用法代码示例。如果您正苦于以下问题:Python Corpus.from_file方法的具体用法?Python Corpus.from_file怎么用?Python Corpus.from_file使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在orangecontrib.text.corpus.Corpus的用法示例。


在下文中一共展示了Corpus.from_file方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_infer_text_features

# 需要导入模块: from orangecontrib.text.corpus import Corpus [as 别名]
# 或者: from orangecontrib.text.corpus.Corpus import from_file [as 别名]
    def test_infer_text_features(self):
        c = Corpus.from_file('friends-transcripts')
        tf = c.text_features
        self.assertEqual(len(tf), 1)
        self.assertEqual(tf[0].name, 'Quote')

        c = Corpus.from_file('deerwester')
        tf = c.text_features
        self.assertEqual(len(tf), 1)
        self.assertEqual(tf[0].name, 'Text')
开发者ID:s-alexey,项目名称:orange3-text,代码行数:12,代码来源:test_corpus.py

示例2: test_compute_values_to_different_domain

# 需要导入模块: from orangecontrib.text.corpus import Corpus [as 别名]
# 或者: from orangecontrib.text.corpus.Corpus import from_file [as 别名]
    def test_compute_values_to_different_domain(self):
        source = Corpus.from_file('deerwester')
        destination = Corpus.from_file('book-excerpts')

        self.assertFalse(source.domain.attributes)
        self.assertFalse(destination.domain.attributes)

        bow = BowVectorizer().transform(source)
        computed = destination.transform(bow.domain)

        self.assertEqual(bow.domain.attributes, computed.domain.attributes)
开发者ID:s-alexey,项目名称:orange3-text,代码行数:13,代码来源:test_bowvectorizer.py

示例3: test_corpus_from_file

# 需要导入模块: from orangecontrib.text.corpus import Corpus [as 别名]
# 或者: from orangecontrib.text.corpus.Corpus import from_file [as 别名]
    def test_corpus_from_file(self):
        c = Corpus.from_file('book-excerpts')
        self.assertEqual(len(c), 140)
        self.assertEqual(len(c.domain), 1)
        self.assertEqual(len(c.domain.metas), 1)
        self.assertEqual(c.metas.shape, (140, 1))

        c = Corpus.from_file('deerwester')
        self.assertEqual(len(c), 9)
        self.assertEqual(len(c.domain), 1)
        self.assertEqual(len(c.domain.metas), 1)
        self.assertEqual(c.metas.shape, (9, 1))
开发者ID:s-alexey,项目名称:orange3-text,代码行数:14,代码来源:test_corpus.py

示例4: open_file

# 需要导入模块: from orangecontrib.text.corpus import Corpus [as 别名]
# 或者: from orangecontrib.text.corpus.Corpus import from_file [as 别名]
    def open_file(self, path=None, data=None):
        self.closeContext()
        self.Error.clear()
        self.unused_attrs_model[:] = []
        self.used_attrs_model[:] = []
        if data:
            self.corpus = Corpus.from_table(data.domain, data)
        elif path:
            try:
                self.corpus = Corpus.from_file(path)
                self.corpus.name = os.path.splitext(os.path.basename(path))[0]
            except BaseException as err:
                self.Error.read_file(path, str(err))
        else:
            return

        self.update_info()
        self.used_attrs = list(self.corpus.text_features)
        if not self.corpus.text_features:
            self.Error.corpus_without_text_features()
            self.Outputs.corpus.send(None)
            return
        self.openContext(self.corpus)
        self.used_attrs_model.extend(self.used_attrs)
        self.unused_attrs_model.extend(
            [f for f in self.corpus.domain.metas
             if f.is_string and f not in self.used_attrs_model])
开发者ID:s-alexey,项目名称:orange3-text,代码行数:29,代码来源:owcorpus.py

示例5: test_corpus_from_file_just_text

# 需要导入模块: from orangecontrib.text.corpus import Corpus [as 别名]
# 或者: from orangecontrib.text.corpus.Corpus import from_file [as 别名]
    def test_corpus_from_file_just_text(self):
        c = Corpus.from_file(os.path.join(DATASET_PATH, 'deerwester.tab'))

        self.assertEqual(len(c), 9)
        self.assertEqual(len(c.domain), 0)
        self.assertEqual(len(c.domain.metas), 1)
        self.assertEqual(c.metas.shape, (9, 1))
开发者ID:kafom,项目名称:orange3-text,代码行数:9,代码来源:test_corpus.py

示例6: test_corpus_from_file

# 需要导入模块: from orangecontrib.text.corpus import Corpus [as 别名]
# 或者: from orangecontrib.text.corpus.Corpus import from_file [as 别名]
    def test_corpus_from_file(self):
        c = Corpus.from_file(os.path.join(DATASET_PATH, 'bookexcerpts.txt'))
        self.assertEqual(len(c), 140)

        self.assertEqual(len(c.domain), 0)
        self.assertEqual(len(c.domain.metas), 2)
        self.assertEqual(c.metas.shape, (140, 2))
开发者ID:kernc,项目名称:orange3-text,代码行数:9,代码来源:test_corpus.py

示例7: test_transform

# 需要导入模块: from orangecontrib.text.corpus import Corpus [as 别名]
# 或者: from orangecontrib.text.corpus.Corpus import from_file [as 别名]
    def test_transform(self):
        vect = BowVectorizer()
        corpus = Corpus.from_file('deerwester')

        result = vect.transform(corpus)
        self.assertIsInstance(result, Corpus)
        self.assertEqual(len(result.domain), 43)
开发者ID:s-alexey,项目名称:orange3-text,代码行数:9,代码来源:test_bowvectorizer.py

示例8: test_corpus_not_eq

# 需要导入模块: from orangecontrib.text.corpus import Corpus [as 别名]
# 或者: from orangecontrib.text.corpus.Corpus import from_file [as 别名]
    def test_corpus_not_eq(self):
        c = Corpus.from_file('book-excerpts')
        n_doc = c.X.shape[0]

        c2 = Corpus(c.domain, c.X, c.Y, c.metas, c.W, [])
        self.assertNotEqual(c, c2)

        c2 = Corpus(c.domain, np.ones((n_doc, 1)), c.Y, c.metas, c.W, c.text_features)
        self.assertNotEqual(c, c2)

        c2 = Corpus(c.domain, c.X, np.ones((n_doc, 1)), c.metas, c.W, c.text_features)
        self.assertNotEqual(c, c2)

        broken_metas = np.copy(c.metas)
        broken_metas[0, 0] = ''
        c2 = Corpus(c.domain, c.X, c.Y, broken_metas, c.W, c.text_features)
        self.assertNotEqual(c, c2)

        new_meta = [StringVariable('text2')]
        broken_domain = Domain(c.domain.attributes, c.domain.class_var, new_meta)
        c2 = Corpus(broken_domain, c.X, c.Y, c.metas, c.W, new_meta)
        self.assertNotEqual(c, c2)

        c2 = c.copy()
        c2.ngram_range = (2, 4)
        self.assertNotEqual(c, c2)
开发者ID:s-alexey,项目名称:orange3-text,代码行数:28,代码来源:test_corpus.py

示例9: test_documents

# 需要导入模块: from orangecontrib.text.corpus import Corpus [as 别名]
# 或者: from orangecontrib.text.corpus.Corpus import from_file [as 别名]
    def test_documents(self):
        c = Corpus.from_file('book-excerpts')
        docs = c.documents
        types = set(type(i) for i in docs)

        self.assertEqual(len(docs), len(c))
        self.assertEqual(len(types), 1)
        self.assertIn(str, types)
开发者ID:s-alexey,项目名称:orange3-text,代码行数:10,代码来源:test_corpus.py

示例10: test_POSTagger

# 需要导入模块: from orangecontrib.text.corpus import Corpus [as 别名]
# 或者: from orangecontrib.text.corpus.Corpus import from_file [as 别名]
 def test_POSTagger(self):
     corpus = Corpus.from_file('deerwester')
     tagger = tag.AveragedPerceptronTagger()
     result = tagger.tag_corpus(corpus)
     self.assertTrue(hasattr(result, 'pos_tags'))
     # for token in itertools.chain(*result.tokens):
     #     self.assertRegexpMatches(token, '[a-z]+_[A-Z]+')
     for tokens, tags in zip(result.tokens, result.pos_tags):
         self.assertEqual(len(tokens), len(tags))
开发者ID:s-alexey,项目名称:orange3-text,代码行数:11,代码来源:test_tags.py

示例11: test_compute_values

# 需要导入模块: from orangecontrib.text.corpus import Corpus [as 别名]
# 或者: from orangecontrib.text.corpus.Corpus import from_file [as 别名]
    def test_compute_values(self):
        corpus = Corpus.from_file('deerwester')
        vect = BowVectorizer()

        bow = vect.transform(corpus)
        computed = Corpus.from_table(bow.domain, corpus)

        self.assertEqual(bow.domain, computed.domain)
        self.assertEqual((bow.X != computed.X).nnz, 0)
开发者ID:s-alexey,项目名称:orange3-text,代码行数:11,代码来源:test_bowvectorizer.py

示例12: test_create_bow

# 需要导入模块: from orangecontrib.text.corpus import Corpus [as 别名]
# 或者: from orangecontrib.text.corpus.Corpus import from_file [as 别名]
    def test_create_bow(self):
        corpus = Corpus.from_file('deerwester')
        bag_of_words = self.bow(corpus, use_tfidf=True)

        self.assertIsNotNone(bag_of_words.X)
        self.assertEqual(9, bag_of_words.X.shape[0])
        self.assertEqual(42, bag_of_words.X.shape[1])
        self.assertEqual(self.progress_callbacks, 4)
        self.assertEqual(self.error_callbacks, 0)
开发者ID:david-novak,项目名称:orange3-text,代码行数:11,代码来源:test_bag_of_words.py

示例13: test_empty_corpus

# 需要导入模块: from orangecontrib.text.corpus import Corpus [as 别名]
# 或者: from orangecontrib.text.corpus.Corpus import from_file [as 别名]
 def test_empty_corpus(self):
     """
     Empty data.
     GH-247
     """
     corpus = Corpus.from_file("deerwester")[:0]
     vect = BowVectorizer(norm=BowVectorizer.L1)
     out = vect.transform(corpus)
     self.assertEqual(out, corpus)
开发者ID:s-alexey,项目名称:orange3-text,代码行数:11,代码来源:test_bowvectorizer.py

示例14: test_init_preserve_shape_of_empty_x

# 需要导入模块: from orangecontrib.text.corpus import Corpus [as 别名]
# 或者: from orangecontrib.text.corpus.Corpus import from_file [as 别名]
    def test_init_preserve_shape_of_empty_x(self):
        c = Corpus.from_file('book-excerpts')
        d = c.domain
        new_domain = Domain((ContinuousVariable('c1'),), d.class_vars, d.metas)

        empty_X = csr_matrix((len(c), 1))
        new = Corpus(new_domain, X=empty_X, Y=c.Y, metas=c.metas)

        self.assertEqual(empty_X.nnz, 0)
        self.assertEqual(new.X.shape, empty_X.shape)
开发者ID:s-alexey,项目名称:orange3-text,代码行数:12,代码来源:test_corpus.py

示例15: test_ngrams

# 需要导入模块: from orangecontrib.text.corpus import Corpus [as 别名]
# 或者: from orangecontrib.text.corpus.Corpus import from_file [as 别名]
 def test_ngrams(self):
     vect = BowVectorizer()
     corpus = Corpus.from_file('deerwester')
     pr = preprocess.Preprocessor(tokenizer=preprocess.RegexpTokenizer('\w+'),
                                  ngrams_range=(1, 3))
     pr(corpus, inplace=True)
     result = vect.transform(corpus)
     attrs = [attr.name for attr in result.domain.attributes]
     self.assertIn(corpus.tokens[0][1], attrs)
     self.assertIn(' '.join(corpus.tokens[0][:2]), attrs)
     self.assertIn(' '.join(corpus.tokens[0][:3]), attrs)
开发者ID:s-alexey,项目名称:orange3-text,代码行数:13,代码来源:test_bowvectorizer.py


注:本文中的orangecontrib.text.corpus.Corpus.from_file方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。