本文整理汇总了Python中index.Index.add_document方法的典型用法代码示例。如果您正苦于以下问题:Python Index.add_document方法的具体用法?Python Index.add_document怎么用?Python Index.add_document使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类index.Index
的用法示例。
在下文中一共展示了Index.add_document方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_should_store_tokens_lowercase
# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
def test_should_store_tokens_lowercase(self):
index = Index()
index.add_document('doc', 'This IS mY firsT DoCuMeNt')
expected_tokens = set(['this', 'is', 'my', 'first', 'document'])
expected_index = {'this': set(['doc']),
'is': set(['doc']),
'my': set(['doc']),
'first': set(['doc']),
'document': set(['doc']),}
self.assertEquals(index.tokens(), expected_tokens)
self.assertEquals(dict(index._index), expected_index)
示例2: test_passing_a_stemmer_should_index_tokens_stemmed
# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
def test_passing_a_stemmer_should_index_tokens_stemmed(self):
porter_stemmer = PorterStemmer()
index = Index(stemmer=porter_stemmer)
index.add_document('coffee', 'I liked it')
self.assertEquals(index._index, {'i': set(['coffee']),
'like': set(['coffee']),
'it': set(['coffee'])},)
index = Index(stemmer=None)
index.add_document('coffee', 'I liked it')
self.assertEquals(index._index, {'i': set(['coffee']),
'liked': set(['coffee']),
'it': set(['coffee'])},)
示例3: test_calling_method_load_should_retrieve_object_from_pickle_file
# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
def test_calling_method_load_should_retrieve_object_from_pickle_file(self):
fp = NamedTemporaryFile(delete=False)
fp.close()
self.filename = fp.name
index = Index()
index.add_document('coffee', 'I liked it')
index.add_document('water', 'I need it')
index.dump(self.filename)
retrieved_index = Index.load(self.filename)
self.assertEquals(len(retrieved_index), 2)
self.assertEquals(set(retrieved_index._index.keys()),
set(['i', 'liked', 'need', 'it']))
示例4: test_should_automatically_index_when_add_documents
# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
def test_should_automatically_index_when_add_documents(self):
index = Index()
index.add_document('test', 'this is my first document')
index.add_document('test2', 'this is my second document')
expected_tokens = set(['this', 'is', 'my', 'first', 'second',
'document'])
expected_index = {'this': set(['test', 'test2']),
'is': set(['test', 'test2']),
'my': set(['test', 'test2']),
'first': set(['test']),
'second': set(['test2']),
'document': set(['test', 'test2']),}
self.assertEquals(index.tokens(), expected_tokens)
self.assertEquals(dict(index._index), expected_index)
示例5: test_calling_method_dump_should_pickle_the_index_object
# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
def test_calling_method_dump_should_pickle_the_index_object(self):
fp = NamedTemporaryFile(delete=False)
fp.close()
self.filename = fp.name
index = Index()
index.add_document('coffee', 'I liked it')
index.add_document('water', 'I need it')
index.dump(self.filename)
self.assertTrue(file_exists(self.filename))
fp = open(self.filename)
retrieved_index = cPickle.load(fp)
self.assertEquals(len(retrieved_index), 2)
self.assertEquals(set(retrieved_index._index.keys()),
set(['i', 'liked', 'need', 'it']))
示例6: test_should_be_able_to_find_using_AND_OR_and_NOT
# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
def test_should_be_able_to_find_using_AND_OR_and_NOT(self):
index = Index()
index.add_document('doc1', 'this is my first document')
index.add_document('doc2', 'this is my second document')
index.add_document('doc3', 'another document')
self.assertEquals(index.find('this document'), set(['doc1', 'doc2']))
self.assertEquals(index.find('this another'), set())
self.assertEquals(index.find('a b'), set())
self.assertEquals(index.find('another'), set(['doc3']))
self.assertEquals(index.find('first another'), set([]))
示例7: test_should_be_able_to_find_by_term
# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
def test_should_be_able_to_find_by_term(self):
index = Index()
index.add_document('doc1', 'this is my first document')
index.add_document('doc2', 'this is my second document')
index.add_document('doc3', 'another document')
self.assertEquals(index.find_by_term('document'),
set(['doc1', 'doc2', 'doc3']))
self.assertEquals(index.find_by_term('DOCUMENT'),
set(['doc1', 'doc2', 'doc3']))
self.assertEquals(index.find_by_term('this'), set(['doc1', 'doc2']))
self.assertEquals(index.find_by_term('is'), set(['doc1', 'doc2']))
self.assertEquals(index.find_by_term('my'), set(['doc1', 'doc2']))
self.assertEquals(index.find_by_term('first'), set(['doc1']))
self.assertEquals(index.find_by_term('second'), set(['doc2']))
self.assertEquals(index.find_by_term('another'), set(['doc3']))
示例8: iterate_over_dir
# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
# coding: utf-8
from index import Index
import os
def iterate_over_dir(dir):
for root, subFolders, files in os.walk(dir):
for filename in files:
file_path = os.path.join(root, filename)
yield file_path
ind = Index()
for s in iterate_over_dir('../../books_search_b/not_sort_book/T'):
try:
ind.add_document(s)
except:
pass
ind.save('test.ind')
示例9: test_passing_stopwords_should_remove_these_words_from_token_list
# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
def test_passing_stopwords_should_remove_these_words_from_token_list(self):
index = Index(stopwords=['yes', 'no', ',', '.', '!'])
index.add_document('coffee', 'Yes, sir! No, Joyce.')
self.assertEquals(index._index, {'sir': set(['coffee']),
'joyce': set(['coffee'])},)
示例10: test_should_add_documents_with_name_and_content
# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
def test_should_add_documents_with_name_and_content(self):
index = Index()
index.add_document('test', 'this is my first document')
index.add_document('test2', 'this is my second document')
self.assertEquals(len(index), 2)
self.assertEquals(index._documents, set(['test', 'test2']))
示例11: test_passing_a_stemmer_should_stem_search_term_before_matching
# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
def test_passing_a_stemmer_should_stem_search_term_before_matching(self):
porter_stemmer = PorterStemmer()
index = Index(stemmer=porter_stemmer)
index.add_document('coffee', 'I liked it')
self.assertEquals(index.find_by_term('liked'), set(['coffee']))
示例12: Index
# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import add_document [as 别名]
#!/usr/bin/env python
# coding: utf-8
# Tip: run this script with `python -i example.py`
# (or `ipython -i example.py`), so you can interactively do searches by
# executing: `my_index.search('...search terms...')`
from nltk.corpus import machado
from index import Index
print 'Creating index...'
my_index = Index()
filenames = machado.fileids()[50:]
for filename in filenames:
my_index.add_document(filename, machado.raw(filename))
print 'Searching...'
print my_index.find('brasil azul')