本文整理汇总了Python中whoosh.query.Term方法的典型用法代码示例。如果您正苦于以下问题:Python query.Term方法的具体用法?Python query.Term怎么用?Python query.Term使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类whoosh.query
的用法示例。
在下文中一共展示了query.Term方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: search
# 需要导入模块: from whoosh import query [as 别名]
# 或者: from whoosh.query import Term [as 别名]
def search(self, w: str) -> SearchResult:
if not self.ix.up_to_date():
self.initialize_trie() # if the index is not up to date, someone has added cards, so we reinitialize the trie
normalized = list(WhooshConstants.normalized_analyzer(w))[0].text
# If we get matches by prefix, we return that
exact, prefix_whole_word, other_prefixed = self.find_matches_by_prefix(normalized)
if exact or len(prefix_whole_word) > 0 or len(other_prefixed) > 0:
return SearchResult(exact, prefix_whole_word, other_prefixed, [])
# We try fuzzy and stemmed queries
query_normalized = fuzzy_term(normalized, self.DIST, 'name_normalized')
query_stemmed = And([Term('name_stemmed', q.text) for q in WhooshConstants.stem_analyzer(w)])
query_tokenized = And([fuzzy_term(q.text, self.DIST, 'name_tokenized') for q in WhooshConstants.tokenized_analyzer(w)])
if len(query_tokenized) == 0: # This can be empty because some unicode chars are ignored. See #4988
query = Or([query_normalized, query_stemmed])
else:
query = Or([query_normalized, query_tokenized, query_stemmed])
with self.ix.searcher() as searcher:
fuzzy = [(r['canonical_name'], r.score) for r in searcher.search(query, limit=40)]
return SearchResult(exact, prefix_whole_word, other_prefixed, fuzzy)
示例2: fuzzy_term
# 需要导入模块: from whoosh import query [as 别名]
# 或者: from whoosh.query import Term [as 别名]
def fuzzy_term(q: str, dist: int, field: str) -> Term:
if len(q) <= 3:
return Term(field, q)
return FuzzyTerm(field, q, maxdist=dist, prefixlength=1)
示例3: get_all_num
# 需要导入模块: from whoosh import query [as 别名]
# 或者: from whoosh.query import Term [as 别名]
def get_all_num(self, keyword, catid=''):
queryit = self.parser.parse(keyword)
if catid == '':
pass
else:
queryit = And([Term("catid", catid), queryit])
results = self.whbase.searcher().search(queryit)
return len(results)
# return len(self.whbase.searcher().search(queryit).docs())
示例4: search_pager
# 需要导入模块: from whoosh import query [as 别名]
# 或者: from whoosh.query import Term [as 别名]
def search_pager(self, keyword, catid='', page_index=1, doc_per_page=10):
queryit = self.parser.parse(keyword)
if catid == '':
pass
else:
queryit = And([Term("catid", catid), queryit])
try:
queryres = self.whbase.searcher().search(queryit, limit=page_index * doc_per_page)
return queryres[(page_index - 1) * doc_per_page: page_index * doc_per_page]
finally:
pass
示例5: search_addresses
# 需要导入模块: from whoosh import query [as 别名]
# 或者: from whoosh.query import Term [as 别名]
def search_addresses(searcher, query):
restrict_q = Term("tag", "drafts") | Term("tag", "trash")
results = []
for field in ['to', 'cc', 'bcc', 'sender']:
query_parser = QueryParser(field, searcher.schema)
results.append(
searcher.search(
query_parser.parse("*%s* OR *%s*" % (query.title(), query)),
limit=None,
mask=restrict_q,
groupedby=sorting.FieldFacet(
field,
allow_overlap=True),
terms=True).matched_terms())
return [address[1] for address in flatten(results)]
示例6: cal_sim
# 需要导入模块: from whoosh import query [as 别名]
# 或者: from whoosh.query import Term [as 别名]
def cal_sim(train_data_path, test_data_path, dst_result_path=None, save_n_best_search=1):
schema = Schema(context=TEXT(stored=True), response=STORED, post=TEXT(stored=True))
index_i = re.findall('\d', train_data_path)[0]
index_path = "../tmp/ix_index/" + index_i
if not os.path.exists(index_path):
os.makedirs(index_path)
ix = create_in(index_path, schema)
writer = ix.writer()
def get_cpr(line):
lines = line.lower().strip().split('\t')
context = ''
post = lines[0]
response = lines[1]
return context.strip().decode('utf-8'), response.decode('utf-8'), post.decode('utf-8')
def load_train_data(file_name, writer):
f = open(file_name)
for line in f:
context, response, post = get_cpr(line)
if context != '':
writer.add_document(context=context, response=response, post=post)
else:
writer.add_document(response=response, post=post)
writer.commit()
def get_query(line, ix):
lines = line.strip().split('\t')
post = lines[0].decode('utf-8')
q2 = QueryParser("post", ix.schema).parse(post)
terms = list(q2.all_terms())
query = Or([Term(*x) for x in terms])
return query
load_train_data(train_data_path, writer)
f = open(test_data_path, 'r')
fw_search = open(dst_result_path, 'w')
with ix.searcher(weighting=scoring.TF_IDF()) as searcher:
c = searcher.collector(limit=10)
tlc = TimeLimitCollector(c, timelimit=10.0)
for line in f:
try:
query = get_query(line, ix)
searcher.search_with_collector(query, tlc)
results = tlc.results()
for i in range(min(len(results), save_n_best_search)):
fw_search.write(
line.strip() + '\t' + str(results[i]["post"]) + '\t' + str(results[i]["response"]) + '\n')
except Exception as e:
print('TimeLimit, ignore it!')
print(line)
fw_search.close()