本文整理汇总了Python中whoosh.qparser.MultifieldParser类的典型用法代码示例。如果您正苦于以下问题:Python MultifieldParser类的具体用法?Python MultifieldParser怎么用?Python MultifieldParser使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了MultifieldParser类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: search_documents
def search_documents(filter):
results = None
# Check for existing index
dir_path = os.path.join(DATA_DIR, 'index')
if not os.path.exists(dir_path) or not Index.exists_in(dir_path):
return None
index = Index.open_dir(dir_path)
if filter.startswith('tags:'):
fields = ['tags']
filter = filter[5:]
else:
fields = ['path', 'content']
parser = MultifieldParser(fields, schema=index.schema)
search_query = parser.parse(unicode(filter))
# Try documents search
try:
searcher = index.searcher(closereader=False)
return searcher.search(search_query,
collapse=[sorting.FieldFacet('path'), sorting.FieldFacet('content')],
collapse_order=sorting.FieldFacet('revision', reverse=True),
sortedby=[sorting.FieldFacet('path'), sorting.FieldFacet('date', reverse=True)]
)
finally:
searcher.close()
return results
示例2: find
def find(cmd, criteria, reindex=False):
from whoosh.qparser import MultifieldParser
if reindex:
_create_index(cmd.cli_ctx)
try:
ix = _get_index(cmd.cli_ctx)
except ValueError:
# got a pickle error because the index was written by a different python version
# recreate the index and proceed
_create_index(cmd.cli_ctx)
ix = _get_index(cmd.cli_ctx)
qp = MultifieldParser(
['cmd_name', 'short_summary', 'long_summary', 'examples'],
schema=_get_schema()
)
if 'OR' in criteria or 'AND' in criteria:
# looks more advanced, let's trust them to make a great query
q = qp.parse(" ".join(criteria))
else:
# let's help out with some OR's to provide a less restrictive search
expanded_query = " OR ".join(criteria) + " OR '{}'".format(criteria)
q = qp.parse(expanded_query)
with ix.searcher() as searcher:
from whoosh.highlight import UppercaseFormatter, ContextFragmenter
results = searcher.search(q)
results.fragmenter = ContextFragmenter(maxchars=300, surround=200)
results.formatter = UppercaseFormatter()
for hit in results:
_print_hit(hit)
示例3: search
def search(self, query):
""" general search function for a query string """
hit_docs = []
index_dir = "D:/bjstinfo_index" # deprecated. we should use variable or configure file.
if not os.path.exists(index_dir):
print "Error: indexer doesn't exist!"
sys.exit(1)
ix = index.open_dir(index_dir)
# For keywords query, we search multi-fields of documents as:
# Title, Keywords, Abstract. give the query-time fieldsboost:
# {"Title": 1.2, "Keywords": 1.1, "Abstract": 1.0}
query_fields = ['Title', 'Keywords', 'Abstract']
field_boosts = {'Title':1.2, 'Keywords':1.1, 'Abstract':1.0}
qp = MultifieldParser(query_fields, schema=ix.schema, fieldboosts=field_boosts)
q = qp.parse(query)
with ix.searcher() as s:
results = s.search(q, limit=50, terms=True)
# my_cf = ContextFragmenter(maxchars=100, surround=30) #custome fragmenter.
# results.fragmenter = my_cf
# my_score = StandarDeviationScorer(my_cf) #custome scorer.
# results.scorer = my_score
# results.formatter = HtmlFormatter()
for hit in results:
# print hit.fields()
hit_docs.append(hit.fields())
# why just cannot implement the highlight function?
# print hit.highlights('Abstract', top=20)
return hit_docs
示例4: search
def search(q, limit=None):
# q = str(q)
ix = open_dir(DIRECTORY, NAME)
with ix.searcher() as searcher:
qp = MultifieldParser(fieldnames=['title',
'author',
'tags',
'notes',
'text',
'source',
# 'cached',
'year'],
fieldboosts={'title': 7,
'year': 6,
'author': 10,
'tags': 4,
'notes': 2,
'text': 1},
schema=ix.schema)
# Whoosh chokes on queries with stop words, so remove them.
q = remove_stopwords(q)
q = qp.parse(q)
for hit in searcher.search(q, limit=limit):
yield hit
示例5: search
def search(querystring, language_code):
ix = LanguageIndex(settings.WHOOSH_INDEX_PATH, language_code, _get_schema()).load()
# parser = QueryParser('content', ix.schema)
parser = MultifieldParser(['title', 'keywords', 'content'], ix.schema) # fieldboosts={'title':5, 'keywords':4, 'content':1})
parser.remove_plugin_class(WildcardPlugin) # remove unused feature for better performance
query = parser.parse(querystring)
# print(parser, query, querystring)
result = {
'results': [],
}
with ix.searcher() as searcher:
results = searcher.search(query)
# print(results)
# import pdb; pdb.set_trace()
# collect results
for hit in results:
my_hit = {}
# my_hit['pos'] = hit.pos
# my_hit['rank'] = hit.rank
# my_hit['docnum'] = hit.docnum
my_hit['score'] = hit.score
my_hit['object'] = Article.objects.get(code=hit.fields()['code'])
#.exclude(published=False).exclude(release_date__gte=datetime.today())
# my_hit['object']['is_visible'] = True
result['results'].append(my_hit)
# print(hit.pos, hit.rank, hit.docnum, hit.score, hit)
return result
示例6: keywords
def keywords(request):
query = request.GET.get('q', '')
if not query:
return render(request, 'search/keywords.html', {'page_name': 'search.keywords'})
qtext = get_tokenized_query(query)
print qtext
idx_dir = os.path.join(settings.BASE_DIR, 'search/lagou_idx')
ix = open_dir(idx_dir)
searcher = ix.searcher()
parser = MultifieldParser(["name", "com_name", 'city'], schema=ix.schema)
q = parser.parse(qtext)
plen = 100
results = searcher.search(q, limit=plen)
total = len(results)
got = results.scored_length()
numterms = 100
if got < 10:
numterms = 10
elif got < 100:
numterms = 50
keywords = [(kw, score) for kw, score in results.key_terms("desc", docs=got, numterms=numterms)]
return render(request, 'search/keywords.html',
{'page_name': 'search.keywords',
'query': query,
'total': total,
'got': got,
'keywords': keywords,
})
示例7: getdocs
def getdocs():
params = dict(request.args.items())
search_terms = params['NPS'].split(quails.DELIMITER)
try:
ix = index.open_dir("indexQ")
except:
return jsonify(failure="Index not found. Ensure that index exists and tries again.")
qp = MultifieldParser(["title","body"], schema=ix.schema)
queries = []
for term in search_terms:
queries.append(qp.parse(term))
docs = OrderedDict()
hit_list = []
with ix.searcher() as searcher:
for query in queries:
results=searcher.search(query)
for result in results:
hit_list.append((str(query),result['title']))
return jsonify(results=hit_list)
示例8: Searcher
class Searcher(object):
"""
Assigned to a Model class as ``search_query``, which enables text-querying.
"""
def __init__(self, model_class, primary, index):
self.model_class = model_class
self.primary = primary
self.index = index
self.searcher = index.searcher()
fields = set(index.schema._fields.keys()) - set([self.primary])
self.parser = MultifieldParser(list(fields), index.schema)
def __call__(self, query, limit=None):
"""API similar to SQLAlchemy's queries.
"""
session = self.model_class.query.session
results = self.index.searcher().search(self.parser.parse(query), limit=limit)
keys = [x[self.primary] for x in results]
if not keys:
# Dummy request...
return session.query(self.model_class).filter("uid = -1")
else:
primary_column = getattr(self.model_class, self.primary)
return session.query(self.model_class).filter(primary_column.in_(keys))
def search(self, query, limit=None):
"""New API: returns both whoosh records and SA models."""
# TODO: highly suboptimal
session = self.model_class.query.session
hits = self.index.searcher().search(self.parser.parse(query), limit=limit)
for hit in hits:
yield (hit, session.query(self.model_class).get(hit[self.primary]))
示例9: live_search
def live_search(self, query):
"""live search on ngram field"""
with self.ix.\
searcher(weighting=scoring.BM25F(title_B=2)) as searcher:
qp = MultifieldParser(self.live_search_field + self.search_field,
schema=self.ix.schema)
q = qp.parse(query)
results = searcher.search(q, limit=25).copy()
res = {'estimated_length': results.estimated_length(),
'scored_length': results.scored_length(),
'runtime': results.runtime,
'list': []}
for i, r in enumerate(results):
if 'id' in r and 'space' in r:
url = url_for('document.view', space=r['space'],
doc_id=r['id'])
else:
url = None
res['list'].append({'id': r.get('id', ''),
'space': r.get('space', ''),
'title': r.get('title', ''),
'rank': r.rank,
'url': url,
'score': results.score(i)})
return res
示例10: search
def search(querytext, request, pagenum=1, maxresults=30, staff=False, scope=None,
orderby='-creation_date'):
search_engine = get_search_engine('resource')
search_result = {}
if pagenum < 1:
pagenum = 1
with search_engine.searcher() as searcher:
parser = MultifieldParser(search_engine.default_search_fields, searcher.schema)
user_q = querytext and parser.parse(querytext) or Every()
user_q, search_kwargs = build_search_kwargs(user_q, request, scope, staff, orderby)
hits = searcher.search(user_q, limit=(pagenum * maxresults) + 1, **search_kwargs)
if querytext and hits.is_empty():
correction_q = parser.parse(querytext)
corrected = searcher.correct_query(correction_q, querytext)
if corrected.query != correction_q:
querytext = corrected.string
search_result['corrected_q'] = querytext
user_q, search_kwargs = build_search_kwargs(corrected.query, request, scope, staff, orderby)
hits = searcher.search(user_q, limit=(pagenum * maxresults), **search_kwargs)
search_engine.prepare_search_response(search_result, hits, pagenum, maxresults)
search_result['results'] = add_other_versions(searcher, search_result['results'], request.user, staff)
add_absolute_urls(search_result['results'], request)
return search_result
示例11: parse
def parse(text, schema=SCHEMA):
"""
parse(text[, schema=SCHEMA])
Analisa e trata o texto em ``text`` de acordo com o ``schema``
do índice de documentos.
.. code-block:: python
>>> from storyline.engine.query import parse
>>> from storyline.engine.schema import get_schema
>>>
>>> SCHEMA = get_schema()
>>> parse("Mestre", SCHEMA)
Or([Term('title', u'mestr'), Term('content', u'mestr')])
:param text: Consulta feita pelo usuário.
:type text: str
:param schema: Schema do índice de documentos.
:type schema: Schema
:returns: Query com termos e operadores.
"""
try:
from whoosh.qparser import MultifieldParser
except ImportError:
print "Ocorreu um erro na importação do módulo whoosh.qparser."
qp = MultifieldParser(["title", "content"], schema, None)
return qp.parse(text)
示例12: search_commodity
def search_commodity():
from shop import app
ix = open_dir(app.config.get("INDEX_DIR"))
searcher = ix.searcher()
mparser = MultifieldParser(["content", "title"], schema=ix.schema)
query_raw = request.args.get('q', '')
if query_raw:
query = mparser.parse(unicode(query_raw.lower()))
results = searcher.search(query)
result_id = []
for result in results:
result_id.append(int(result['id']))
result_id = list(set(result_id))
wq = None
for rid in result_id:
if not wq:
wq = Q(id=rid)
else:
wq |= Q(id=rid)
if wq:
coms = Commodity.select().where(wq)
else:
coms = []
else:
coms = Commodity.select()
category = int(request.args.get('c', '0'))
if category and category != 1:
coms = [c for c in coms if c.is_category(category)]
return render_template('core/com_list.html', commodities=coms)
示例13: search
def search(self):
c.terms = request.GET.get('terms', '')
c.results = []
if len(c.terms) < 4:
h.flash(
_('Search queries must be at least 4 characters in length.'),
'error'
)
redirect(url(controller='blog', action='index'))
query = MultifieldParser(
['title', 'content', 'summary'],
schema=index.schema
).parse(c.terms)
results = index.searcher().search(query, limit=10)
for result in results:
terms = [v for k, v in query.all_terms() if k == 'content']
url_kwargs = json.loads(result['url'])
result['url'] = url(**url_kwargs)
result['highlights'] = highlight(
result['content'],
terms,
search.schema['content'].format.analyzer,
ContextFragmenter(terms),
HtmlFormatter(tagname='span', classname='highlight')
)
c.results.append(result)
return render('search.tpl', slacks=True)
示例14: __call__
def __call__(self, query, limit=None, fields=None, or_=False):
if fields is None:
fields = self._all_fields
group = OrGroup if or_ else AndGroup
parser = MultifieldParser(fields, self._index.schema, group=group)
return self._index.searcher().search(parser.parse(query), limit=limit)
示例15: search_results
def search_results(ix, search_query, fields):
qpo = MultifieldParser(fields, schema=ix.schema, group=qparser.OrGroup)
qpa = MultifieldParser(fields, schema=ix.schema)
qo = qpo.parse(search_query)
qa = qpa.parse(search_query)
data = []
data_index = 0
with ix.searcher() as s:
resultsa = s.search(qa)
resultso = s.search(qo)
for hit in resultsa:
data.append(dict(**hit))
context = str()
for field in fields:
if(len(hit.highlights(field)) > 0 and hit.highlights(field) not in context):
context += re.sub(r"(\(.*[^\)])",r'\1)', hit.highlights(field))
data[data_index]["context"] = context
data_index += 1
for hit in resultso:
found = False
for hita in resultsa:
if hit["id"] == hita["id"]:
found = True
if not found:
data.append(dict(**hit))
context = str()
for field in fields:
if(len(hit.highlights(field)) > 0 and hit.highlights(field) not in context):
context += re.sub(r"(\(.*[^\)])",r'\1)', hit.highlights(field))
data[data_index]["context"] = context
data_index += 1
return data