本文整理汇总了Python中whoosh.qparser.MultifieldParser.parse方法的典型用法代码示例。如果您正苦于以下问题:Python MultifieldParser.parse方法的具体用法?Python MultifieldParser.parse怎么用?Python MultifieldParser.parse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类whoosh.qparser.MultifieldParser
的用法示例。
在下文中一共展示了MultifieldParser.parse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: find
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import parse [as 别名]
def find(cmd, criteria, reindex=False):
from whoosh.qparser import MultifieldParser
if reindex:
_create_index(cmd.cli_ctx)
try:
ix = _get_index(cmd.cli_ctx)
except ValueError:
# got a pickle error because the index was written by a different python version
# recreate the index and proceed
_create_index(cmd.cli_ctx)
ix = _get_index(cmd.cli_ctx)
qp = MultifieldParser(
['cmd_name', 'short_summary', 'long_summary', 'examples'],
schema=_get_schema()
)
if 'OR' in criteria or 'AND' in criteria:
# looks more advanced, let's trust them to make a great query
q = qp.parse(" ".join(criteria))
else:
# let's help out with some OR's to provide a less restrictive search
expanded_query = " OR ".join(criteria) + " OR '{}'".format(criteria)
q = qp.parse(expanded_query)
with ix.searcher() as searcher:
from whoosh.highlight import UppercaseFormatter, ContextFragmenter
results = searcher.search(q)
results.fragmenter = ContextFragmenter(maxchars=300, surround=200)
results.formatter = UppercaseFormatter()
for hit in results:
_print_hit(hit)
示例2: search_results
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import parse [as 别名]
def search_results(ix, search_query, fields):
qpo = MultifieldParser(fields, schema=ix.schema, group=qparser.OrGroup)
qpa = MultifieldParser(fields, schema=ix.schema)
qo = qpo.parse(search_query)
qa = qpa.parse(search_query)
data = []
data_index = 0
with ix.searcher() as s:
resultsa = s.search(qa)
resultso = s.search(qo)
for hit in resultsa:
data.append(dict(**hit))
context = str()
for field in fields:
if(len(hit.highlights(field)) > 0 and hit.highlights(field) not in context):
context += re.sub(r"(\(.*[^\)])",r'\1)', hit.highlights(field))
data[data_index]["context"] = context
data_index += 1
for hit in resultso:
found = False
for hita in resultsa:
if hit["id"] == hita["id"]:
found = True
if not found:
data.append(dict(**hit))
context = str()
for field in fields:
if(len(hit.highlights(field)) > 0 and hit.highlights(field) not in context):
context += re.sub(r"(\(.*[^\)])",r'\1)', hit.highlights(field))
data[data_index]["context"] = context
data_index += 1
return data
示例3: search
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import parse [as 别名]
def search(querytext, request, pagenum=1, maxresults=30, staff=False, scope=None,
orderby='-creation_date'):
search_engine = get_search_engine('resource')
search_result = {}
if pagenum < 1:
pagenum = 1
with search_engine.searcher() as searcher:
parser = MultifieldParser(search_engine.default_search_fields, searcher.schema)
user_q = querytext and parser.parse(querytext) or Every()
user_q, search_kwargs = build_search_kwargs(user_q, request, scope, staff, orderby)
hits = searcher.search(user_q, limit=(pagenum * maxresults) + 1, **search_kwargs)
if querytext and hits.is_empty():
correction_q = parser.parse(querytext)
corrected = searcher.correct_query(correction_q, querytext)
if corrected.query != correction_q:
querytext = corrected.string
search_result['corrected_q'] = querytext
user_q, search_kwargs = build_search_kwargs(corrected.query, request, scope, staff, orderby)
hits = searcher.search(user_q, limit=(pagenum * maxresults), **search_kwargs)
search_engine.prepare_search_response(search_result, hits, pagenum, maxresults)
search_result['results'] = add_other_versions(searcher, search_result['results'], request.user, staff)
add_absolute_urls(search_result['results'], request)
return search_result
示例4: Searcher
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import parse [as 别名]
class Searcher(object):
"""
Assigned to a Model class as ``search_query``, which enables text-querying.
"""
def __init__(self, model_class, primary, index):
self.model_class = model_class
self.primary = primary
self.index = index
self.searcher = index.searcher()
fields = set(index.schema._fields.keys()) - set([self.primary])
self.parser = MultifieldParser(list(fields), index.schema)
def __call__(self, query, limit=None):
"""API similar to SQLAlchemy's queries.
"""
session = self.model_class.query.session
results = self.index.searcher().search(self.parser.parse(query), limit=limit)
keys = [x[self.primary] for x in results]
if not keys:
# Dummy request...
return session.query(self.model_class).filter("uid = -1")
else:
primary_column = getattr(self.model_class, self.primary)
return session.query(self.model_class).filter(primary_column.in_(keys))
def search(self, query, limit=None):
"""New API: returns both whoosh records and SA models."""
# TODO: highly suboptimal
session = self.model_class.query.session
hits = self.index.searcher().search(self.parser.parse(query), limit=limit)
for hit in hits:
yield (hit, session.query(self.model_class).get(hit[self.primary]))
示例5: search
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import parse [as 别名]
def search(self, q, tool_name_boost, tool_section_boost, tool_description_boost, tool_label_boost, tool_stub_boost, tool_help_boost, tool_search_limit, tool_enable_ngram_search, tool_ngram_minsize, tool_ngram_maxsize):
"""
Perform search on the in-memory index. Weight in the given boosts.
"""
# Change field boosts for searcher
searcher = self.index.searcher(
weighting=BM25F(
field_B={'name_B': float(tool_name_boost),
'section_B': float(tool_section_boost),
'description_B': float(tool_description_boost),
'labels_B': float(tool_label_boost),
'stub_B': float(tool_stub_boost),
'help_B': float(tool_help_boost)}
)
)
# Set query to search name, description, section, help, and labels.
parser = MultifieldParser(['name', 'description', 'section', 'help', 'labels', 'stub'], schema=self.schema)
# Hyphens are wildcards in Whoosh causing bad things
if q.find('-') != -1:
q = (' ').join([token.text for token in self.rex(to_unicode(q))])
# Perform tool search with ngrams if set to true in the config file
if (tool_enable_ngram_search is True or tool_enable_ngram_search == "True"):
hits_with_score = {}
token_analyzer = StandardAnalyzer() | analysis.NgramFilter(minsize=int(tool_ngram_minsize), maxsize=int(tool_ngram_maxsize))
ngrams = [token.text for token in token_analyzer(q)]
for query in ngrams:
# Get the tool list with respective scores for each qgram
curr_hits = searcher.search(parser.parse('*' + query + '*'), limit=float(tool_search_limit))
for i, curr_hit in enumerate(curr_hits):
is_present = False
for prev_hit in hits_with_score:
# Check if the tool appears again for the next qgram search
if curr_hit['id'] == prev_hit:
is_present = True
# Add the current score with the previous one if the
# tool appears again for the next qgram
hits_with_score[prev_hit] = curr_hits.score(i) + hits_with_score[prev_hit]
# Add the tool if not present to the collection with its score
if not is_present:
hits_with_score[curr_hit['id']] = curr_hits.score(i)
# Sort the results based on aggregated BM25 score in decreasing order of scores
hits_with_score = sorted(hits_with_score.items(), key=lambda x: x[1], reverse=True)
# Return the tool ids
return [item[0] for item in hits_with_score[0:int(tool_search_limit)]]
else:
# Perform the search
hits = searcher.search(parser.parse('*' + q + '*'), limit=float(tool_search_limit))
return [hit['id'] for hit in hits]
示例6: keywords
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import parse [as 别名]
def keywords(request):
query = request.GET.get('q', '')
if not query:
return render(request, 'search/keywords.html', {'page_name': 'search.keywords'})
qtext = get_tokenized_query(query)
print qtext
idx_dir = os.path.join(settings.BASE_DIR, 'search/lagou_idx')
ix = open_dir(idx_dir)
searcher = ix.searcher()
parser = MultifieldParser(["name", "com_name", 'city'], schema=ix.schema)
q = parser.parse(qtext)
plen = 100
results = searcher.search(q, limit=plen)
total = len(results)
got = results.scored_length()
numterms = 100
if got < 10:
numterms = 10
elif got < 100:
numterms = 50
keywords = [(kw, score) for kw, score in results.key_terms("desc", docs=got, numterms=numterms)]
return render(request, 'search/keywords.html',
{'page_name': 'search.keywords',
'query': query,
'total': total,
'got': got,
'keywords': keywords,
})
示例7: page
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import parse [as 别名]
def page(self, page, limit):
with self.engine.index.searcher() as searcher:
parser = MultifieldParser(
self.engine.search_fields,
schema = self.engine.index.schema,
)
parser.add_plugin(GtLtPlugin())
parser.add_plugin(PhrasePlugin())
parser.add_plugin(FieldsPlugin())
#parser.remove_plugin_class(WildcardPlugin)
#parser.add_plugin(WildcardPlugin())
parser.add_plugin(PrefixPlugin())
whoosh_query = parser.parse(self.query.toString(self.engine))
#print "============" + str(whoosh_query)
results = searcher.search_page(whoosh_query, page, limit, sortedby = self.order)
self.rows = results.total
_results = []
doc_class = self.engine.database.document
for result in results:
doc = doc_class(data = {field: result.get(field, None) for field in self.engine.stored_fields}, restore = True)
_results.append(doc)
return _results
示例8: parse
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import parse [as 别名]
def parse(text, schema=SCHEMA):
"""
parse(text[, schema=SCHEMA])
Analisa e trata o texto em ``text`` de acordo com o ``schema``
do índice de documentos.
.. code-block:: python
>>> from storyline.engine.query import parse
>>> from storyline.engine.schema import get_schema
>>>
>>> SCHEMA = get_schema()
>>> parse("Mestre", SCHEMA)
Or([Term('title', u'mestr'), Term('content', u'mestr')])
:param text: Consulta feita pelo usuário.
:type text: str
:param schema: Schema do índice de documentos.
:type schema: Schema
:returns: Query com termos e operadores.
"""
try:
from whoosh.qparser import MultifieldParser
except ImportError:
print "Ocorreu um erro na importação do módulo whoosh.qparser."
qp = MultifieldParser(["title", "content"], schema, None)
return qp.parse(text)
示例9: __init__
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import parse [as 别名]
class Index:
def __init__(self, path='~/Music/iTunes/iTunes Music Library.xml', folder='~/Library/Application Support/Share my tunes'):
self.path = os.path.expanduser(path)
self.schema = Schema(
trackId = ID(stored=True),
name=TEXT(stored=True),
artist=TEXT(stored=True),
album=TEXT(stored=True),
genre=KEYWORD(stored=True),
location=STORED,
trackNumber=STORED,
bitRate=ID(stored=True),
artwork=KEYWORD(stored=True)
)
self.parser = MultifieldParser(["name", "album", "artist"], schema = self.schema)
self.folder = "%s/index" % os.path.expanduser(folder)
self.empty = not whoosh.index.exists_in(self.folder)
self.ix = None
def index(self):
if self.empty:
if not os.path.exists(self.folder):
os.makedirs(self.folder)
st = FileStorage(self.folder)
ix = st.create_index(self.schema)
w = ix.writer()
w.add_document(name = u"beuha")
pipe = file.ID3Filter()
#[TODO] using itunes info for artwork?
cpt = 0
for track in pipe(ItunesParser(self.path)):
if track['album'] != None :
album = track['album'].encode('ascii', 'ignore')
else:
album = ""
#print track['artwork'], "[%s]" % album, track['name'].encode('ascii', 'ignore')
if cpt % 20 == 0:
print "\n%i " %cpt,
print '#',
#print track['album'], track['name']
w.add_document(
trackId = track['trackId'], name=track['name']
,artist=track['artist'], album=track['album'],
genre=track['genre'], location=track['location'],
artwork=boolean(track['artwork']),
trackNumber=track['trackNumber'], bitRate=track['bitRate']
)
#if cpt % 100 == 1:
# w.commit()
cpt += 1
print "\n\n%i tracks indexed" % cpt
w.commit()
ix.optimize()
ix.close()
else :
print "already indexed"
def query(self, query):
if self.ix == None:
self.ix = FileStorage(self.folder).open_index()
q = self.parser.parse(query)
return self.ix.searcher().search(q, sortedby=("album", "name"), limit=None)
示例10: search_commodity
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import parse [as 别名]
def search_commodity():
from shop import app
ix = open_dir(app.config.get("INDEX_DIR"))
searcher = ix.searcher()
mparser = MultifieldParser(["content", "title"], schema=ix.schema)
query_raw = request.args.get('q', '')
if query_raw:
query = mparser.parse(unicode(query_raw.lower()))
results = searcher.search(query)
result_id = []
for result in results:
result_id.append(int(result['id']))
result_id = list(set(result_id))
wq = None
for rid in result_id:
if not wq:
wq = Q(id=rid)
else:
wq |= Q(id=rid)
if wq:
coms = Commodity.select().where(wq)
else:
coms = []
else:
coms = Commodity.select()
category = int(request.args.get('c', '0'))
if category and category != 1:
coms = [c for c in coms if c.is_category(category)]
return render_template('core/com_list.html', commodities=coms)
示例11: getdocs
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import parse [as 别名]
def getdocs():
params = dict(request.args.items())
search_terms = params['NPS'].split(quails.DELIMITER)
try:
ix = index.open_dir("indexQ")
except:
return jsonify(failure="Index not found. Ensure that index exists and tries again.")
qp = MultifieldParser(["title","body"], schema=ix.schema)
queries = []
for term in search_terms:
queries.append(qp.parse(term))
docs = OrderedDict()
hit_list = []
with ix.searcher() as searcher:
for query in queries:
results=searcher.search(query)
for result in results:
hit_list.append((str(query),result['title']))
return jsonify(results=hit_list)
示例12: generic
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import parse [as 别名]
def generic(idx, qs=None, q=None, limit=5, parser=None, page=1):
if qs is q is None:
raise ValueError('cannot have a null querystring and query')
if parser is None:
parser = MultifieldParser(
['title', 'keywords', 'summary', 'content', 'author'], idx.schema, group=OrGroup)
# add better date parsing support
parser.add_plugin(DateParserPlugin())
parser.remove_plugin_class(WildcardPlugin)
with idx.searcher() as search:
# generate the Query object
if qs:
query = parser.parse(qs)
else:
query = q
facet = MultiFacet()
facet.add_score()
facet.add_field('modified', reverse=True)
facet.add_field('title')
results = search.search_page(query, pagenum=page, sortedby=facet, pagelen=limit)
res = clean_results(idx, results, query)
# pagination attributes on `search_page` method
res.page_number = results.pagenum # current page number
res.page_total = results.pagecount # total pages in results
res.offset = results.offset # first result of current page
res.pagelen = results.pagelen # the number of max results per page
return res
示例13: search_documents
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import parse [as 别名]
def search_documents(filter):
results = None
# Check for existing index
dir_path = os.path.join(DATA_DIR, 'index')
if not os.path.exists(dir_path) or not Index.exists_in(dir_path):
return None
index = Index.open_dir(dir_path)
if filter.startswith('tags:'):
fields = ['tags']
filter = filter[5:]
else:
fields = ['path', 'content']
parser = MultifieldParser(fields, schema=index.schema)
search_query = parser.parse(unicode(filter))
# Try documents search
try:
searcher = index.searcher(closereader=False)
return searcher.search(search_query,
collapse=[sorting.FieldFacet('path'), sorting.FieldFacet('content')],
collapse_order=sorting.FieldFacet('revision', reverse=True),
sortedby=[sorting.FieldFacet('path'), sorting.FieldFacet('date', reverse=True)]
)
finally:
searcher.close()
return results
示例14: FTSSearcher
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import parse [as 别名]
class FTSSearcher(object):
"""用于检索
"""
def __init__(self, storage=default_storage):
self._fragmenter_maxchars = 70
self._fragmenter_surround = 70
self._formatter = MarkFormatter()
schema = Schema(news_id=ID(unique=True, stored=True),
title=TEXT(field_boost=2.0, analyzer=analyzer),
content=TEXT(analyzer=analyzer))
self._ix = storage.open_index(schema=schema)
self._parser = MultifieldParser(["title", "content"], self._ix.schema)
self._searcher = self._ix.searcher()
def search(self, query_string, limit=10):
"""搜索文件
"""
# refresh searcher
query_string = util.str2unicode(query_string)
query = self._parser.parse(query_string)
search_results = self._searcher.search(query, limit=limit)
# 设置highlight属性
search_results.formatter = self._formatter
search_results.fragmenter.maxchars = self._fragmenter_maxchars
search_results.fragmenter.surround = self._fragmenter_surround
return search_results
def close(self):
self._searcher.close()
示例15: search
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import parse [as 别名]
def search(querystring, language_code):
ix = LanguageIndex(settings.WHOOSH_INDEX_PATH, language_code, _get_schema()).load()
# parser = QueryParser('content', ix.schema)
parser = MultifieldParser(['title', 'keywords', 'content'], ix.schema) # fieldboosts={'title':5, 'keywords':4, 'content':1})
parser.remove_plugin_class(WildcardPlugin) # remove unused feature for better performance
query = parser.parse(querystring)
# print(parser, query, querystring)
result = {
'results': [],
}
with ix.searcher() as searcher:
results = searcher.search(query)
# print(results)
# import pdb; pdb.set_trace()
# collect results
for hit in results:
my_hit = {}
# my_hit['pos'] = hit.pos
# my_hit['rank'] = hit.rank
# my_hit['docnum'] = hit.docnum
my_hit['score'] = hit.score
my_hit['object'] = Article.objects.get(code=hit.fields()['code'])
#.exclude(published=False).exclude(release_date__gte=datetime.today())
# my_hit['object']['is_visible'] = True
result['results'].append(my_hit)
# print(hit.pos, hit.rank, hit.docnum, hit.score, hit)
return result