本文整理汇总了Python中whoosh.index.create_in方法的典型用法代码示例。如果您正苦于以下问题:Python index.create_in方法的具体用法?Python index.create_in怎么用?Python index.create_in使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类whoosh.index
的用法示例。
在下文中一共展示了index.create_in方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _init_index
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import create_in [as 别名]
def _init_index(self):
if not os.path.exists(self.corpus.path):
os.mkdir(self.corpus.path)
analyzer = self.corpus.analyzer
self.analyzer = self.corpus.analyzer
if exists_in(self.corpus.path):
ix = open_dir(self.corpus.path)
else:
# may need to remove this? how can we have a schema if we don't know the...uh...schema?
schema = Schema(title=TEXT(stored=True,analyzer=analyzer), content=TEXT(analyzer=analyzer),
path=ID(stored=True))
ix = create_in(self.corpus.path,schema)
writer = ix.writer()
writer.commit()
self.index = ix
self.searcher = ix.searcher();
#self.reader = IndexReader.open(self.lucene_index, True)
self.reader = ix.reader();
#self.analyzer = self.corpus.analyzer
示例2: __init__
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import create_in [as 别名]
def __init__(self, root, storeDir, analyzer, args_dir = None):
self.args_dir = args_dir
if not os.path.exists(storeDir):
os.mkdir(storeDir)
schema = Schema(name=TEXT(stored=True),
path=ID(stored=True),
txtorg_id=ID(stored=True),
contents=TEXT(stored=False,vector=True,analyzer=analyzer()))
ix = create_in(storeDir, schema)
writer = ix.writer()
print 'document dir is', root
self.indexDocs(root, writer)
print 'optimizing index',
writer.commit(optimize=True)
print 'done'
self.index = ix
self.writer = writer
self.reader = ix.reader()
示例3: rewrite_index
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import create_in [as 别名]
def rewrite_index(self, cards: List[Card]) -> None:
print('Rewriting index in {d}'.format(d=WhooshConstants.index_dir))
ensure_dir_exists(WhooshConstants.index_dir)
ix = create_in(WhooshConstants.index_dir, self.schema)
update_index(ix, cards)
# pylint: disable=no-self-use
示例4: open_index
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import create_in [as 别名]
def open_index(self, index_folder, create_new=False):
self.index_folder = index_folder
if create_new:
if os.path.exists(index_folder):
shutil.rmtree(index_folder)
print "deleted index folder: " + index_folder
if not os.path.exists(index_folder):
os.mkdir(index_folder)
exists = index.exists_in(index_folder)
stemming_analyzer = StemmingAnalyzer()
schema = Schema(
path=ID(stored=True, unique=True)
, filename=TEXT(stored=True, field_boost=100.0)
, tags=KEYWORD(stored=True, scorable=True, field_boost=80.0)
, headlines=KEYWORD(stored=True, scorable=True, field_boost=60.0)
, doubleemphasiswords=KEYWORD(stored=True, scorable=True, field_boost=40.0)
, emphasiswords=KEYWORD(stored=True, scorable=True, field_boost=20.0)
, content=TEXT(stored=True, analyzer=stemming_analyzer)
, time=STORED
)
if not exists:
self.ix = index.create_in(index_folder, schema)
else:
self.ix = index.open_dir(index_folder)
示例5: init
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import create_in [as 别名]
def init(self):
ix_path = os.path.join(self.path, self.name)
if whoosh_index.exists_in(ix_path):
return whoosh_index.open_dir(ix_path)
if not os.path.exists(ix_path):
os.makedirs(ix_path)
return whoosh_index.create_in(ix_path, self.schema)
示例6: __init__
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import create_in [as 别名]
def __init__(self, index_dir, schema=DEFAULT_SCHEMA, force_create=False):
self.schema = schema
if exists_in(index_dir) and not force_create:
self.index = open_dir(index_dir, schema=schema)
else:
self.index = create_in(index_dir, schema=schema)
示例7: __init__
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import create_in [as 别名]
def __init__(self, db_path):
ensuredir(db_path)
if index.exists_in(db_path):
self.index = index.open_dir(db_path)
else:
self.index = index.create_in(db_path, schema=self.schema)
self.qparser = QueryParser('text', self.schema)
示例8: __init__
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import create_in [as 别名]
def __init__(self, index_path, language):
from whoosh import index as whoosh_index
from whoosh.fields import Schema, TEXT, ID
from whoosh import qparser
from whoosh.highlight import UppercaseFormatter
from whoosh.analysis import SimpleAnalyzer, LanguageAnalyzer
from whoosh.lang import has_stemmer, has_stopwords
import os
if not has_stemmer(language) or not has_stopwords(language):
# TODO Display a warning?
analyzer = SimpleAnalyzer()
else:
analyzer = LanguageAnalyzer(language)
self.schema = Schema(path=ID(unique=True, stored=True), body=TEXT(analyzer=analyzer))
self.formatter = UppercaseFormatter()
self.index_path = index_path
if not os.path.exists(index_path):
try:
os.mkdir(index_path)
except OSError as e:
sys.exit("Error creating Whoosh index: %s" % e)
if whoosh_index.exists_in(index_path):
try:
self.search_index = whoosh_index.open_dir(index_path)
except whoosh_index.IndexError as e:
sys.exit("Error opening whoosh index: {0}".format(e))
else:
self.search_index = whoosh_index.create_in(index_path, self.schema)
self.query_parser = qparser.MultifieldParser(["body", "path"], schema=self.schema)
self.query_parser.add_plugin(qparser.FuzzyTermPlugin())
示例9: delete_index
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import create_in [as 别名]
def delete_index(self, index):
from whoosh import index as whoosh_index
self.search_index.close()
self.search_index = whoosh_index.create_in(self.index_path, schema=self.schema)
示例10: create_index
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import create_in [as 别名]
def create_index():
regex_tokenize = re.compile('\w+(?:-\w+)+|<[A-Z]+>[^<]+</[A-Z]+>|\w+', re.U)
tokenizer = RegexTokenizer(regex_tokenize)
schema = Schema(sentence=TEXT(stored=True, analyzer=tokenizer))
if not os.path.exists("index_full"):
os.mkdir("index_full")
idx = create_in("index_full", schema)
else:
idx = open_dir("index_full")
return idx
示例11: cal_sim
# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import create_in [as 别名]
def cal_sim(train_data_path, test_data_path, dst_result_path=None, save_n_best_search=1):
schema = Schema(context=TEXT(stored=True), response=STORED, post=TEXT(stored=True))
index_i = re.findall('\d', train_data_path)[0]
index_path = "../tmp/ix_index/" + index_i
if not os.path.exists(index_path):
os.makedirs(index_path)
ix = create_in(index_path, schema)
writer = ix.writer()
def get_cpr(line):
lines = line.lower().strip().split('\t')
context = ''
post = lines[0]
response = lines[1]
return context.strip().decode('utf-8'), response.decode('utf-8'), post.decode('utf-8')
def load_train_data(file_name, writer):
f = open(file_name)
for line in f:
context, response, post = get_cpr(line)
if context != '':
writer.add_document(context=context, response=response, post=post)
else:
writer.add_document(response=response, post=post)
writer.commit()
def get_query(line, ix):
lines = line.strip().split('\t')
post = lines[0].decode('utf-8')
q2 = QueryParser("post", ix.schema).parse(post)
terms = list(q2.all_terms())
query = Or([Term(*x) for x in terms])
return query
load_train_data(train_data_path, writer)
f = open(test_data_path, 'r')
fw_search = open(dst_result_path, 'w')
with ix.searcher(weighting=scoring.TF_IDF()) as searcher:
c = searcher.collector(limit=10)
tlc = TimeLimitCollector(c, timelimit=10.0)
for line in f:
try:
query = get_query(line, ix)
searcher.search_with_collector(query, tlc)
results = tlc.results()
for i in range(min(len(results), save_n_best_search)):
fw_search.write(
line.strip() + '\t' + str(results[i]["post"]) + '\t' + str(results[i]["response"]) + '\n')
except Exception as e:
print('TimeLimit, ignore it!')
print(line)
fw_search.close()