Python Index.add_document方法代码示例

本文整理汇总了Python中index.Index.add_document方法的典型用法代码示例。如果您正苦于以下问题：Python Index.add_document方法的具体用法？Python Index.add_document怎么用？Python Index.add_document使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类index.Index的用法示例。

在下文中一共展示了Index.add_document方法的12个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_should_store_tokens_lowercase

# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
 def test_should_store_tokens_lowercase(self):
     index = Index()
     index.add_document('doc', 'This IS mY firsT DoCuMeNt')
     expected_tokens = set(['this', 'is', 'my', 'first', 'document'])
     expected_index = {'this': set(['doc']),
                       'is': set(['doc']),
                       'my': set(['doc']),
                       'first': set(['doc']),
                       'document': set(['doc']),}
     self.assertEquals(index.tokens(), expected_tokens)
     self.assertEquals(dict(index._index), expected_index)

开发者ID:sergio-garcia-clones，项目名称:nlplaying，代码行数:13，代码来源:test_index.py

示例2: test_passing_a_stemmer_should_index_tokens_stemmed

# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
 def test_passing_a_stemmer_should_index_tokens_stemmed(self):
     porter_stemmer = PorterStemmer()
     index = Index(stemmer=porter_stemmer)
     index.add_document('coffee', 'I liked it')
     self.assertEquals(index._index, {'i': set(['coffee']),
                                      'like': set(['coffee']),
                                      'it': set(['coffee'])},)
     index = Index(stemmer=None)
     index.add_document('coffee', 'I liked it')
     self.assertEquals(index._index, {'i': set(['coffee']),
                                      'liked': set(['coffee']),
                                      'it': set(['coffee'])},)

开发者ID:sergio-garcia-clones，项目名称:nlplaying，代码行数:14，代码来源:test_index.py

示例3: test_calling_method_load_should_retrieve_object_from_pickle_file

# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
 def test_calling_method_load_should_retrieve_object_from_pickle_file(self):
     fp = NamedTemporaryFile(delete=False)
     fp.close()
     self.filename = fp.name
     index = Index()
     index.add_document('coffee', 'I liked it')
     index.add_document('water', 'I need it')
     index.dump(self.filename)
     retrieved_index = Index.load(self.filename)
     self.assertEquals(len(retrieved_index), 2)
     self.assertEquals(set(retrieved_index._index.keys()),
                       set(['i', 'liked', 'need', 'it']))

开发者ID:sergio-garcia-clones，项目名称:nlplaying，代码行数:14，代码来源:test_index.py

示例4: test_should_automatically_index_when_add_documents

# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
 def test_should_automatically_index_when_add_documents(self):
     index = Index()
     index.add_document('test', 'this is my first document')
     index.add_document('test2', 'this is my second document')
     expected_tokens = set(['this', 'is', 'my', 'first', 'second',
                            'document'])
     expected_index = {'this': set(['test', 'test2']),
                       'is': set(['test', 'test2']),
                       'my': set(['test', 'test2']),
                       'first': set(['test']),
                       'second': set(['test2']),
                       'document': set(['test', 'test2']),}
     self.assertEquals(index.tokens(), expected_tokens)
     self.assertEquals(dict(index._index), expected_index)

开发者ID:sergio-garcia-clones，项目名称:nlplaying，代码行数:16，代码来源:test_index.py

示例5: test_calling_method_dump_should_pickle_the_index_object

# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
 def test_calling_method_dump_should_pickle_the_index_object(self):
     fp = NamedTemporaryFile(delete=False)
     fp.close()
     self.filename = fp.name
     index = Index()
     index.add_document('coffee', 'I liked it')
     index.add_document('water', 'I need it')
     index.dump(self.filename)
     self.assertTrue(file_exists(self.filename))
     fp = open(self.filename)
     retrieved_index = cPickle.load(fp)
     self.assertEquals(len(retrieved_index), 2)
     self.assertEquals(set(retrieved_index._index.keys()),
                       set(['i', 'liked', 'need', 'it']))

开发者ID:sergio-garcia-clones，项目名称:nlplaying，代码行数:16，代码来源:test_index.py

示例6: test_should_be_able_to_find_using_AND_OR_and_NOT

# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
 def test_should_be_able_to_find_using_AND_OR_and_NOT(self):
     index = Index()
     index.add_document('doc1', 'this is my first document')
     index.add_document('doc2', 'this is my second document')
     index.add_document('doc3', 'another document')
     self.assertEquals(index.find('this document'), set(['doc1', 'doc2']))
     self.assertEquals(index.find('this another'), set())
     self.assertEquals(index.find('a b'), set())
     self.assertEquals(index.find('another'), set(['doc3']))
     self.assertEquals(index.find('first another'), set([]))

开发者ID:sergio-garcia-clones，项目名称:nlplaying，代码行数:12，代码来源:test_index.py

示例7: test_should_be_able_to_find_by_term

# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
 def test_should_be_able_to_find_by_term(self):
     index = Index()
     index.add_document('doc1', 'this is my first document')
     index.add_document('doc2', 'this is my second document')
     index.add_document('doc3', 'another document')
     self.assertEquals(index.find_by_term('document'),
                       set(['doc1', 'doc2', 'doc3']))
     self.assertEquals(index.find_by_term('DOCUMENT'),
                       set(['doc1', 'doc2', 'doc3']))
     self.assertEquals(index.find_by_term('this'), set(['doc1', 'doc2']))
     self.assertEquals(index.find_by_term('is'), set(['doc1', 'doc2']))
     self.assertEquals(index.find_by_term('my'), set(['doc1', 'doc2']))
     self.assertEquals(index.find_by_term('first'), set(['doc1']))
     self.assertEquals(index.find_by_term('second'), set(['doc2']))
     self.assertEquals(index.find_by_term('another'), set(['doc3']))

开发者ID:sergio-garcia-clones，项目名称:nlplaying，代码行数:17，代码来源:test_index.py

示例8: iterate_over_dir

# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
# coding: utf-8
from index import Index
import os

def iterate_over_dir(dir):
    for root, subFolders, files in os.walk(dir):
        for filename in files:
            file_path = os.path.join(root, filename)
            yield file_path


ind = Index()
for s in iterate_over_dir('../../books_search_b/not_sort_book/T'):
    try: 
        ind.add_document(s)
    except: 
        pass
    
ind.save('test.ind')

开发者ID:sdmaslennikov，项目名称:books-search，代码行数:21，代码来源:build_index.py

示例9: test_passing_stopwords_should_remove_these_words_from_token_list

# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
 def test_passing_stopwords_should_remove_these_words_from_token_list(self):
     index = Index(stopwords=['yes', 'no', ',', '.', '!'])
     index.add_document('coffee', 'Yes, sir! No, Joyce.')
     self.assertEquals(index._index, {'sir': set(['coffee']),
                                      'joyce': set(['coffee'])},)

开发者ID:sergio-garcia-clones，项目名称:nlplaying，代码行数:7，代码来源:test_index.py

示例10: test_should_add_documents_with_name_and_content

# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
 def test_should_add_documents_with_name_and_content(self):
     index = Index()
     index.add_document('test', 'this is my first document')
     index.add_document('test2', 'this is my second document')
     self.assertEquals(len(index), 2)
     self.assertEquals(index._documents, set(['test', 'test2']))

开发者ID:sergio-garcia-clones，项目名称:nlplaying，代码行数:8，代码来源:test_index.py

示例11: test_passing_a_stemmer_should_stem_search_term_before_matching

# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
 def test_passing_a_stemmer_should_stem_search_term_before_matching(self):
     porter_stemmer = PorterStemmer()
     index = Index(stemmer=porter_stemmer)
     index.add_document('coffee', 'I liked it')
     self.assertEquals(index.find_by_term('liked'), set(['coffee']))

开发者ID:sergio-garcia-clones，项目名称:nlplaying，代码行数:7，代码来源:test_index.py

示例12: Index

# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
#!/usr/bin/env python
# coding: utf-8
# Tip: run this script with `python -i example.py`
# (or `ipython -i example.py`), so you can interactively do searches by
# executing: `my_index.search('...search terms...')`

from nltk.corpus import machado
from index import Index


print 'Creating index...'
my_index = Index()
filenames = machado.fileids()[50:]
for filename in filenames:
    my_index.add_document(filename, machado.raw(filename))

print 'Searching...'
print my_index.find('brasil azul')

开发者ID:sergio-garcia-clones，项目名称:nlplaying，代码行数:20，代码来源:example.py

注：本文中的index.Index.add_document方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。