本文整理汇总了Python中whoosh.qparser.QueryParser方法的典型用法代码示例。如果您正苦于以下问题:Python qparser.QueryParser方法的具体用法?Python qparser.QueryParser怎么用?Python qparser.QueryParser使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类whoosh.qparser
的用法示例。
在下文中一共展示了qparser.QueryParser方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: search
# 需要导入模块: from whoosh import qparser [as 别名]
# 或者: from whoosh.qparser import QueryParser [as 别名]
def search(self, query_list, fields=None):
with self.ix.searcher() as searcher:
query_string = " ".join(query_list)
query = None
if "\"" in query_string or ":" in query_string:
query = QueryParser("content", self.schema).parse(query_string)
elif len(fields) == 1 and fields[0] == "filename":
pass
elif len(fields) == 1 and fields[0] == "tags":
pass
elif len(fields) == 2:
pass
else:
fields = ["tags", "headlines", "content", "filename", "doubleemphasiswords", "emphasiswords"]
if not query:
query = MultifieldParser(fields, schema=self.ix.schema).parse(query_string)
parsed_query = "%s" % query
print "query: %s" % parsed_query
results = searcher.search(query, terms=False, scored=True, groupedby="path")
key_terms = results.key_terms("tags", docs=100, numterms=100)
tag_cloud = [keyword for keyword, score in key_terms]
search_result = self.create_search_result(results)
return parsed_query, search_result, tag_cloud
示例2: search
# 需要导入模块: from whoosh import qparser [as 别名]
# 或者: from whoosh.qparser import QueryParser [as 别名]
def search(self, query, search_field='content'):
if not isinstance(query, Query):
parser = QueryParser(search_field, self.schema, termclass=Variations)
query = parser.parse(query)
with self.index.searcher() as searcher:
for hit in searcher.search(query, limit=100):
yield self._decode(hit['data'])
示例3: setup
# 需要导入模块: from whoosh import qparser [as 别名]
# 或者: from whoosh.qparser import QueryParser [as 别名]
def setup(self):
"""
Defers loading until needed.
"""
from haystack import connections
new_index = False
# Make sure the index is there.
if self.use_file_storage and not os.path.exists(self.path):
os.makedirs(self.path)
new_index = True
if self.use_file_storage and not os.access(self.path, os.W_OK):
raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path)
if self.use_file_storage:
self.storage = FileStorage(self.path)
else:
global LOCALS
if LOCALS.RAM_STORE is None:
LOCALS.RAM_STORE = RamStorage()
self.storage = LOCALS.RAM_STORE
self.content_field_name, self.schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields())
self.parser = QueryParser(self.content_field_name, schema=self.schema)
if new_index is True:
self.index = self.storage.create_index(self.schema)
else:
try:
self.index = self.storage.open_index(schema=self.schema)
except index.EmptyIndexError:
self.index = self.storage.create_index(self.schema)
self.setup_complete = True
示例4: __init__
# 需要导入模块: from whoosh import qparser [as 别名]
# 或者: from whoosh.qparser import QueryParser [as 别名]
def __init__(self):
self.whbase = open_dir("database/whoosh")
self.parser = QueryParser("content", schema=self.whbase.schema)
示例5: setup
# 需要导入模块: from whoosh import qparser [as 别名]
# 或者: from whoosh.qparser import QueryParser [as 别名]
def setup(self):
"""
Defers loading until needed.
"""
from haystack import connections
new_index = False
# Make sure the index is there.
if self.use_file_storage and not os.path.exists(self.path):
os.makedirs(self.path)
new_index = True
if self.use_file_storage and not os.access(self.path, os.W_OK):
raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path)
if self.use_file_storage:
self.storage = FileStorage(self.path)
else:
global LOCALS
if getattr(LOCALS, 'RAM_STORE', None) is None:
LOCALS.RAM_STORE = RamStorage()
self.storage = LOCALS.RAM_STORE
self.content_field_name, self.schema = self.build_schema(
connections[self.connection_alias].get_unified_index().all_searchfields())
self.parser = QueryParser(self.content_field_name, schema=self.schema)
if new_index is True:
self.index = self.storage.create_index(self.schema)
else:
try:
self.index = self.storage.open_index(schema=self.schema)
except index.EmptyIndexError:
self.index = self.storage.create_index(self.schema)
self.setup_complete = True
示例6: __init__
# 需要导入模块: from whoosh import qparser [as 别名]
# 或者: from whoosh.qparser import QueryParser [as 别名]
def __init__(self, db_path):
ensuredir(db_path)
if index.exists_in(db_path):
self.index = index.open_dir(db_path)
else:
self.index = index.create_in(db_path, schema=self.schema)
self.qparser = QueryParser('text', self.schema)
示例7: setup
# 需要导入模块: from whoosh import qparser [as 别名]
# 或者: from whoosh.qparser import QueryParser [as 别名]
def setup(self):
"""
Defers loading until needed.
"""
from haystack import connections
new_index = False
# Make sure the index is there.
if self.use_file_storage and not os.path.exists(self.path):
os.makedirs(self.path)
new_index = True
if self.use_file_storage and not os.access(self.path, os.W_OK):
raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path)
if self.use_file_storage:
self.storage = FileStorage(self.path)
else:
global LOCALS
if getattr(LOCALS, 'RAM_STORE', None) is None:
LOCALS.RAM_STORE = RamStorage()
self.storage = LOCALS.RAM_STORE
self.content_field_name, self.schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields())
self.parser = QueryParser(self.content_field_name, schema=self.schema)
if new_index is True:
self.index = self.storage.create_index(self.schema)
else:
try:
self.index = self.storage.open_index(schema=self.schema)
except index.EmptyIndexError:
self.index = self.storage.create_index(self.schema)
self.setup_complete = True
示例8: _search_tag_groups
# 需要导入模块: from whoosh import qparser [as 别名]
# 或者: from whoosh.qparser import QueryParser [as 别名]
def _search_tag_groups(self, is_filtering_tags):
seen = None
query_parser = QueryParser('tag', self._index.schema)
options = {'limit': None, 'groupedby': sorting.FieldFacet('tag', allow_overlap=True), 'maptype': sorting.Count}
with self._index.searcher() as searcher:
total = searcher.search(query_parser.parse('*'), **options).groups()
if not is_filtering_tags:
seen = searcher.search(query_parser.parse("* AND flags:%s" % Status.SEEN), **options).groups()
return seen, total
示例9: _search_with_options
# 需要导入模块: from whoosh import qparser [as 别名]
# 或者: from whoosh.qparser import QueryParser [as 别名]
def _search_with_options(self, options, query):
with self._index.searcher() as searcher:
query = QueryParser('raw', self._index.schema).parse(query)
results = searcher.search(query, **options)
return results
示例10: search_addresses
# 需要导入模块: from whoosh import qparser [as 别名]
# 或者: from whoosh.qparser import QueryParser [as 别名]
def search_addresses(searcher, query):
restrict_q = Term("tag", "drafts") | Term("tag", "trash")
results = []
for field in ['to', 'cc', 'bcc', 'sender']:
query_parser = QueryParser(field, searcher.schema)
results.append(
searcher.search(
query_parser.parse("*%s* OR *%s*" % (query.title(), query)),
limit=None,
mask=restrict_q,
groupedby=sorting.FieldFacet(
field,
allow_overlap=True),
terms=True).matched_terms())
return [address[1] for address in flatten(results)]
示例11: run_query_unique
# 需要导入模块: from whoosh import qparser [as 别名]
# 或者: from whoosh.qparser import QueryParser [as 别名]
def run_query_unique(field_name, value, return_fields=None):
"""Perform a search query for a single item using an unique key."""
index = open_index()
if index:
with index.searcher() as searcher:
q = QueryParser(
field_name,
schema=current_app.config['KERKO_COMPOSER'].schema,
plugins=[]
).parse(value)
results = searcher.search(q, limit=1)
if results:
return _get_fields(results[0], return_fields)
return None
示例12: cal_sim
# 需要导入模块: from whoosh import qparser [as 别名]
# 或者: from whoosh.qparser import QueryParser [as 别名]
def cal_sim(train_data_path, test_data_path, dst_result_path=None, save_n_best_search=1):
schema = Schema(context=TEXT(stored=True), response=STORED, post=TEXT(stored=True))
index_i = re.findall('\d', train_data_path)[0]
index_path = "../tmp/ix_index/" + index_i
if not os.path.exists(index_path):
os.makedirs(index_path)
ix = create_in(index_path, schema)
writer = ix.writer()
def get_cpr(line):
lines = line.lower().strip().split('\t')
context = ''
post = lines[0]
response = lines[1]
return context.strip().decode('utf-8'), response.decode('utf-8'), post.decode('utf-8')
def load_train_data(file_name, writer):
f = open(file_name)
for line in f:
context, response, post = get_cpr(line)
if context != '':
writer.add_document(context=context, response=response, post=post)
else:
writer.add_document(response=response, post=post)
writer.commit()
def get_query(line, ix):
lines = line.strip().split('\t')
post = lines[0].decode('utf-8')
q2 = QueryParser("post", ix.schema).parse(post)
terms = list(q2.all_terms())
query = Or([Term(*x) for x in terms])
return query
load_train_data(train_data_path, writer)
f = open(test_data_path, 'r')
fw_search = open(dst_result_path, 'w')
with ix.searcher(weighting=scoring.TF_IDF()) as searcher:
c = searcher.collector(limit=10)
tlc = TimeLimitCollector(c, timelimit=10.0)
for line in f:
try:
query = get_query(line, ix)
searcher.search_with_collector(query, tlc)
results = tlc.results()
for i in range(min(len(results), save_n_best_search)):
fw_search.write(
line.strip() + '\t' + str(results[i]["post"]) + '\t' + str(results[i]["response"]) + '\n')
except Exception as e:
print('TimeLimit, ignore it!')
print(line)
fw_search.close()
示例13: corpus_query
# 需要导入模块: from whoosh import qparser [as 别名]
# 或者: from whoosh.qparser import QueryParser [as 别名]
def corpus_query(self, query, save_file=None, window_size=300, surround_size=50):
"""Send query to a corpus's index. `save_file` is a filename.
:type save_file: str
>>> # cltk_index = CLTKIndex('latin', 'latin_text_latin_library')
>>> # results = cltk_index.corpus_query('amicitia')
"""
_index = open_dir(self.index_path)
output_str = ''
with _index.searcher() as searcher:
_query = QueryParser("content", _index.schema).parse(query)
results = searcher.search(_query, limit=None)
results.fragmenter.charlimit = None
# Allow larger fragments
results.fragmenter.maxchars = window_size
# Show more context before and after
results.fragmenter.surround = surround_size
docs_number = searcher.doc_count_all()
output_str += 'Docs containing hits: {}.'.format(docs_number) + '</br></br>'
for hit in results:
author = hit['author']
filepath = hit['path']
output_str += author + '</br>'
output_str += filepath + '</br>'
with open(filepath) as file_open:
file_contents = file_open.read()
highlights = hit.highlights("content", text=file_contents, top=10000000)
lines = highlights.split('\n')
#lines_numbers = [l for l in lines]
lines_br = '</br>'.join(lines)
lines_number_approx = len(lines)
output_str += 'Approximate hits: {}.'.format(lines_number_approx) + '</br>'
output_str += lines_br + '</br></br>'
if save_file:
user_dir = os.path.normpath(get_cltk_data_dir() + '/user_data/search')
output_path = os.path.join(user_dir, save_file + '.html')
try:
with open(output_path, 'w') as file_open:
file_open.write(output_str)
except FileNotFoundError:
os.mkdir(user_dir)
with open(output_path, 'w') as file_open:
file_open.write(output_str)
else:
return output_str
示例14: run
# 需要导入模块: from whoosh import qparser [as 别名]
# 或者: from whoosh.qparser import QueryParser [as 别名]
def run(index, searcher, analyzer, reader, command, content_field="contents"):
print 'content_field is', content_field
"""check to see whether the user specified a field"""
print command
if command == 'all':
myresults = reader.all_doc_ids()
print 'Query Completed'
else:
query = QueryParser(content_field,schema=index.schema).parse(command)
myresults = searcher.docs_for_query(query)
print 'Query Completed'
allDicts = []
allTerms = set()
allMetadata = []
termsDocs = dict()
scoreDocs = []
for docnum in myresults:
#doc = searcher.doc(scoreDoc.doc)
vector = searcher.vector_as("frequency", docnum, content_field)
#vector = reader.getTermFreqVector(scoreDoc.doc,content_field)
if vector is None: continue
d = dict()
m = dict()
# a vector is a generator of tuples -- convert of list
# [(u"apple", 3), (u"bear", 2), (u"cab", 2)]
#vector = [elt for elt in vector]
#vterms = [elt[0] for elt in vector]
#vvalues = [elt[1] for elt in vector]
#allTerms = allTerms.union(map(lambda x: x.encode('utf-8'),vterms))
# for (t,num) in zip(vterms,vvalues):
for (t,num) in vector:
allTerms.add(t.encode('utf-8'))
d[t.encode('utf-8')] = num
if t in termsDocs:
termsDocs[t.encode('utf-8')] += 1
else:
termsDocs[t.encode('utf-8')] = 1
d["txtorg_id"] = searcher.stored_fields(docnum)["txtorg_id"].encode('utf-8')
# Build the metadata
for k in searcher.stored_fields(docnum):
if k != 'txtorg_id':
m[k] = searcher.stored_fields(docnum)[k].encode('utf-8')
allDicts.append(d)
allMetadata.append(m)
scoreDocs.append(docnum)
names = set(allTerms)
print allMetadata
return scoreDocs, allTerms, allDicts, termsDocs, allMetadata