本文整理汇总了Python中whoosh.index.open_dir方法的典型用法代码示例。如果您正苦于以下问题:Python index.open_dir方法的具体用法?Python index.open_dir怎么用?Python index.open_dir使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类whoosh.index
的用法示例。
在下文中一共展示了index.open_dir方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _init_index
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import open_dir [as 别名]
def _init_index(self):
if not os.path.exists(self.corpus.path):
os.mkdir(self.corpus.path)
analyzer = self.corpus.analyzer
self.analyzer = self.corpus.analyzer
if exists_in(self.corpus.path):
ix = open_dir(self.corpus.path)
else:
# may need to remove this? how can we have a schema if we don't know the...uh...schema?
schema = Schema(title=TEXT(stored=True,analyzer=analyzer), content=TEXT(analyzer=analyzer),
path=ID(stored=True))
ix = create_in(self.corpus.path,schema)
writer = ix.writer()
writer.commit()
self.index = ix
self.searcher = ix.searcher();
#self.reader = IndexReader.open(self.lucene_index, True)
self.reader = ix.reader();
#self.analyzer = self.corpus.analyzer
示例2: update_card
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import open_dir [as 别名]
def update_card(self, card: Card) -> None:
ix = open_dir(WhooshConstants.index_dir)
update_index(ix, [card])
示例3: __init__
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import open_dir [as 别名]
def __init__(self) -> None:
self.ix = open_dir(WhooshConstants.index_dir)
self.initialize_trie()
示例4: open_index
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import open_dir [as 别名]
def open_index(self, index_folder, create_new=False):
self.index_folder = index_folder
if create_new:
if os.path.exists(index_folder):
shutil.rmtree(index_folder)
print "deleted index folder: " + index_folder
if not os.path.exists(index_folder):
os.mkdir(index_folder)
exists = index.exists_in(index_folder)
stemming_analyzer = StemmingAnalyzer()
schema = Schema(
path=ID(stored=True, unique=True)
, filename=TEXT(stored=True, field_boost=100.0)
, tags=KEYWORD(stored=True, scorable=True, field_boost=80.0)
, headlines=KEYWORD(stored=True, scorable=True, field_boost=60.0)
, doubleemphasiswords=KEYWORD(stored=True, scorable=True, field_boost=40.0)
, emphasiswords=KEYWORD(stored=True, scorable=True, field_boost=20.0)
, content=TEXT(stored=True, analyzer=stemming_analyzer)
, time=STORED
)
if not exists:
self.ix = index.create_in(index_folder, schema)
else:
self.ix = index.open_dir(index_folder)
示例5: init
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import open_dir [as 别名]
def init(self):
ix_path = os.path.join(self.path, self.name)
if whoosh_index.exists_in(ix_path):
return whoosh_index.open_dir(ix_path)
if not os.path.exists(ix_path):
os.makedirs(ix_path)
return whoosh_index.create_in(ix_path, self.schema)
示例6: __init__
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import open_dir [as 别名]
def __init__(self, index_dir, schema=DEFAULT_SCHEMA, force_create=False):
self.schema = schema
if exists_in(index_dir) and not force_create:
self.index = open_dir(index_dir, schema=schema)
else:
self.index = create_in(index_dir, schema=schema)
示例7: __init__
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import open_dir [as 别名]
def __init__(self):
self.whbase = open_dir("database/whoosh")
self.parser = QueryParser("content", schema=self.whbase.schema)
示例8: __init__
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import open_dir [as 别名]
def __init__(self, db_path):
ensuredir(db_path)
if index.exists_in(db_path):
self.index = index.open_dir(db_path)
else:
self.index = index.create_in(db_path, schema=self.schema)
self.qparser = QueryParser('text', self.schema)
示例9: get_item_count
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import open_dir [as 别名]
def get_item_count(dirs):
ix = index.open_dir(os.path.join(baseindexpath, dirs))
return ix.doc_count_all()
示例10: __init__
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import open_dir [as 别名]
def __init__(self, index_path, language):
from whoosh import index as whoosh_index
from whoosh.fields import Schema, TEXT, ID
from whoosh import qparser
from whoosh.highlight import UppercaseFormatter
from whoosh.analysis import SimpleAnalyzer, LanguageAnalyzer
from whoosh.lang import has_stemmer, has_stopwords
import os
if not has_stemmer(language) or not has_stopwords(language):
# TODO Display a warning?
analyzer = SimpleAnalyzer()
else:
analyzer = LanguageAnalyzer(language)
self.schema = Schema(path=ID(unique=True, stored=True), body=TEXT(analyzer=analyzer))
self.formatter = UppercaseFormatter()
self.index_path = index_path
if not os.path.exists(index_path):
try:
os.mkdir(index_path)
except OSError as e:
sys.exit("Error creating Whoosh index: %s" % e)
if whoosh_index.exists_in(index_path):
try:
self.search_index = whoosh_index.open_dir(index_path)
except whoosh_index.IndexError as e:
sys.exit("Error opening whoosh index: {0}".format(e))
else:
self.search_index = whoosh_index.create_in(index_path, self.schema)
self.query_parser = qparser.MultifieldParser(["body", "path"], schema=self.schema)
self.query_parser.add_plugin(qparser.FuzzyTermPlugin())
示例11: create_index
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import open_dir [as 别名]
def create_index():
regex_tokenize = re.compile('\w+(?:-\w+)+|<[A-Z]+>[^<]+</[A-Z]+>|\w+', re.U)
tokenizer = RegexTokenizer(regex_tokenize)
schema = Schema(sentence=TEXT(stored=True, analyzer=tokenizer))
if not os.path.exists("index_full"):
os.mkdir("index_full")
idx = create_in("index_full", schema)
else:
idx = open_dir("index_full")
return idx
示例12: corpus_query
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import open_dir [as 别名]
def corpus_query(self, query, save_file=None, window_size=300, surround_size=50):
"""Send query to a corpus's index. `save_file` is a filename.
:type save_file: str
>>> # cltk_index = CLTKIndex('latin', 'latin_text_latin_library')
>>> # results = cltk_index.corpus_query('amicitia')
"""
_index = open_dir(self.index_path)
output_str = ''
with _index.searcher() as searcher:
_query = QueryParser("content", _index.schema).parse(query)
results = searcher.search(_query, limit=None)
results.fragmenter.charlimit = None
# Allow larger fragments
results.fragmenter.maxchars = window_size
# Show more context before and after
results.fragmenter.surround = surround_size
docs_number = searcher.doc_count_all()
output_str += 'Docs containing hits: {}.'.format(docs_number) + '</br></br>'
for hit in results:
author = hit['author']
filepath = hit['path']
output_str += author + '</br>'
output_str += filepath + '</br>'
with open(filepath) as file_open:
file_contents = file_open.read()
highlights = hit.highlights("content", text=file_contents, top=10000000)
lines = highlights.split('\n')
#lines_numbers = [l for l in lines]
lines_br = '</br>'.join(lines)
lines_number_approx = len(lines)
output_str += 'Approximate hits: {}.'.format(lines_number_approx) + '</br>'
output_str += lines_br + '</br></br>'
if save_file:
user_dir = os.path.normpath(get_cltk_data_dir() + '/user_data/search')
output_path = os.path.join(user_dir, save_file + '.html')
try:
with open(output_path, 'w') as file_open:
file_open.write(output_str)
except FileNotFoundError:
os.mkdir(user_dir)
with open(output_path, 'w') as file_open:
file_open.write(output_str)
else:
return output_str