本文整理汇总了Python中whoosh.qparser.QueryParser.parse方法的典型用法代码示例。如果您正苦于以下问题:Python QueryParser.parse方法的具体用法?Python QueryParser.parse怎么用?Python QueryParser.parse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类whoosh.qparser.QueryParser
的用法示例。
在下文中一共展示了QueryParser.parse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_correct_query
# 需要导入模块: from whoosh.qparser import QueryParser [as 别名]
# 或者: from whoosh.qparser.QueryParser import parse [as 别名]
def test_correct_query():
schema = fields.Schema(a=fields.TEXT(), b=fields.TEXT)
with TempIndex(schema) as ix:
with ix.writer() as w:
w.add_document(a=u"alfa bravo charlie delta")
w.add_document(a=u"delta echo foxtrot golf")
w.add_document(a=u"golf hotel india juliet")
w.add_document(a=u"juliet kilo lima mike")
with ix.searcher() as s:
qp = QueryParser("a", ix.schema)
qtext = u'alpha ("brovo november" OR b:dolta) detail'
q = qp.parse(qtext, ix.schema)
c = s.correct_query(q, qtext)
cq = c.query
assert isinstance(cq, query.And)
assert cq[0].text == "alfa"
assert isinstance(cq[1], query.Or)
assert isinstance(cq[1][0], query.Phrase)
assert cq[1][0].words == ["bravo", "november"]
qtext = u'alpha b:("brovo november" a:delta) detail'
q = qp.parse(qtext, ix.schema)
c = s.correct_query(q, qtext)
assert c.query.__unicode__() == '(a:alfa AND b:"brovo november" AND a:delta AND a:detail)'
assert c.string == 'alfa b:("brovo november" a:delta) detail'
hf = highlight.HtmlFormatter(classname="c")
assert c.format_string(hf) == '<strong class="c term0">alfa</strong> b:("brovo november" a:delta) detail'
示例2: test_correct_query
# 需要导入模块: from whoosh.qparser import QueryParser [as 别名]
# 或者: from whoosh.qparser.QueryParser import parse [as 别名]
def test_correct_query():
schema = fields.Schema(a=fields.TEXT(spelling=True), b=fields.TEXT)
ix = RamStorage().create_index(schema)
w = ix.writer()
w.add_document(a=u("alfa bravo charlie delta"))
w.add_document(a=u("delta echo foxtrot golf"))
w.add_document(a=u("golf hotel india juliet"))
w.add_document(a=u("juliet kilo lima mike"))
w.commit()
s = ix.searcher()
qp = QueryParser("a", ix.schema)
qtext = u('alpha ("brovo november" OR b:dolta) detail')
q = qp.parse(qtext, ix.schema)
c = s.correct_query(q, qtext)
assert c.query.__unicode__() == '(a:alfa AND (a:"bravo november" OR b:dolta) AND a:detail)'
assert c.string == 'alfa ("bravo november" OR b:dolta) detail'
qtext = u('alpha b:("brovo november" a:delta) detail')
q = qp.parse(qtext, ix.schema)
c = s.correct_query(q, qtext)
assert c.query.__unicode__() == '(a:alfa AND b:"brovo november" AND a:delta AND a:detail)'
assert c.string == 'alfa b:("brovo november" a:delta) detail'
hf = highlight.HtmlFormatter(classname="c")
assert c.format_string(hf) == '<strong class="c term0">alfa</strong> b:("brovo november" a:delta) detail'
示例3: test_wildcard_existing_terms
# 需要导入模块: from whoosh.qparser import QueryParser [as 别名]
# 或者: from whoosh.qparser.QueryParser import parse [as 别名]
def test_wildcard_existing_terms():
s = fields.Schema(key=fields.ID, value=fields.TEXT)
ix = RamStorage().create_index(s)
w = ix.writer()
w.add_document(key=u("a"), value=u("alfa bravo bear charlie delta"))
w.add_document(key=u("a"), value=u("boggle echo render rendering renders"))
w.commit()
r = ix.reader()
qp = QueryParser("value", ix.schema)
def words(terms):
z = []
for t in terms:
assert t[0] == "value"
z.append(t[1])
return " ".join(sorted(z))
q = qp.parse(u("b*"))
ts = q.existing_terms(r)
assert_equal(ts, set())
ts = q.existing_terms(r, expand=True)
assert_equal(words(ts), "bear boggle bravo")
q = qp.parse(u("[a TO f]"))
ts = q.existing_terms(r)
assert_equal(ts, set())
ts = q.existing_terms(r, expand=True)
assert_equal(words(ts), "alfa bear boggle bravo charlie delta echo")
q = query.Variations("value", "render")
ts = q.existing_terms(r, expand=False)
assert_equal(ts, set())
ts = q.existing_terms(r, expand=True)
assert_equal(words(ts), "render rendering renders")
示例4: search
# 需要导入模块: from whoosh.qparser import QueryParser [as 别名]
# 或者: from whoosh.qparser.QueryParser import parse [as 别名]
def search(self, query, page= -1, page_size=10):
search_results = list()
qp_artist = QueryParser('title', self.artist_searcher.schema)
query = qp_artist.parse(unicode(query))
if page < 1:
artist_hits = self.artist_searcher.search(query, limit=None, sortedby='title')
else:
artist_hits = self.artist_searcher.search(query, page, page_size, sortedby='title')
for hit in artist_hits:
search_results.append(SearchResult(self._artist_from_document(hit), Type.ARTIST))
qp_album = QueryParser('title', self.album_searcher.schema)
query = qp_album.parse(unicode(query))
if page < 1:
album_hits = self.album_searcher.search(query, limit=None, sortedby='title')
else:
album_hits = self.album_searcher.search(query, page, page_size, sortedby='title')
for hit in album_hits:
search_results.append(SearchResult(self._album_from_document(hit), Type.ALBUM))
qp_track = QueryParser('title', self.track_searcher.schema)
query = qp_track.parse(unicode(query))
if page < 1:
track_hits = self.track_searcher.search(query, limit=None, sortedby='title')
else:
track_hits = self.track_searcher.search(query, page, page_size, sortedby='title')
for hit in track_hits:
search_results.append(SearchResult(self._track_from_document(hit), Type.TRACK))
return search_results
示例5: find
# 需要导入模块: from whoosh.qparser import QueryParser [as 别名]
# 或者: from whoosh.qparser.QueryParser import parse [as 别名]
def find(q):
ix = Index()
parser = QueryParser("content", schema=SCHEMA)
print parser.parse(unicode(q))
results = ix.find(q)
if len(results):
print "Found in %d documents" % len(results)
else:
print "Not found"
示例6: _search_tag_groups
# 需要导入模块: from whoosh.qparser import QueryParser [as 别名]
# 或者: from whoosh.qparser.QueryParser import parse [as 别名]
def _search_tag_groups(self, is_filtering_tags):
seen = None
query_parser = QueryParser("tag", self._index.schema)
options = {"limit": None, "groupedby": sorting.FieldFacet("tag", allow_overlap=True), "maptype": sorting.Count}
with self._index.searcher() as searcher:
total = searcher.search(query_parser.parse("*"), **options).groups()
if not is_filtering_tags:
seen = searcher.search(query_parser.parse("* AND flags:%s" % Status.SEEN), **options).groups()
return seen, total
示例7: WhooshGuess
# 需要导入模块: from whoosh.qparser import QueryParser [as 别名]
# 或者: from whoosh.qparser.QueryParser import parse [as 别名]
class WhooshGuess(object):
def __init__(self):
self.storage = RamStorage()
schema = Schema(key=ID(stored=True), \
ask=BOOLEAN(stored=True), \
content=TEXT(stored=True, analyzer=RegexTokenizer()))
self.ix = self.storage.create_index(schema)
self.writer = self.ix.writer()
self.is_train = False
for s in greeting.split('\n'):
self.train(u'matchinggreeting', s)
@property
def is_ok(self):
return self.is_train
def train(self, key, line):
splits = u' '.join(list(lang.tokenizezh(line)))
ask = lang.is_question(key)
#print ask
#print splits
self.writer.add_document(key=key, content=splits, ask=ask)
def train_ok(self):
self.writer.commit(optimize=True)
self.searcher = self.ix.searcher()
self.parser = QueryParser("content", schema=self.ix.schema)
self.is_train = True
def guess(self, s, is_ask = None):
assert(self.is_train)
keys = list(lang.keyword(s))
if len(keys) == 0:
return ''
# MUST contain the keys
keys = u' '.join(keys)
splits = u' '.join(list(lang.tokenizezh(s)))
#q = self.parser.parse(splits + ' OR ' + keys)
q1 = self.parser.parse(keys)
q2 = self.parser.parse(splits)
q = q1 | q2
#print unicode(q)
if not is_ask:
ask = query.Term(u"ask", lang.is_question(s))
else:
ask = query.Term(u"ask", is_ask)
results = self.searcher.search(q, filter=ask)
for hit in results:
return hit['key']
return ''
示例8: update_changeset_index
# 需要导入模块: from whoosh.qparser import QueryParser [as 别名]
# 或者: from whoosh.qparser.QueryParser import parse [as 别名]
def update_changeset_index(self):
idx = open_dir(self.index_location, indexname=CHGSET_IDX_NAME)
with idx.searcher() as searcher:
writer = idx.writer()
writer_is_dirty = False
try:
indexed_total = 0
repo_name = None
for repo_name, repo in self.repo_paths.items():
# skip indexing if there aren't any revs in the repo
num_of_revs = len(repo)
if num_of_revs < 1:
continue
qp = QueryParser('repository', schema=CHGSETS_SCHEMA)
q = qp.parse(u"last:t AND %s" % repo_name)
results = searcher.search(q)
# default to scanning the entire repo
last_rev = 0
start_id = None
if len(results) > 0:
# assuming that there is only one result, if not this
# may require a full re-index.
start_id = results[0]['raw_id']
last_rev = repo.get_changeset(revision=start_id).revision
# there are new changesets to index or a new repo to index
if last_rev == 0 or num_of_revs > last_rev + 1:
# delete the docs in the index for the previous
# last changeset(s)
for hit in results:
q = qp.parse(u"last:t AND %s AND raw_id:%s" %
(repo_name, hit['raw_id']))
writer.delete_by_query(q)
# index from the previous last changeset + all new ones
indexed_total += self.index_changesets(writer,
repo_name, repo, start_id)
writer_is_dirty = True
log.debug('indexed %s changesets for repo %s' % (
indexed_total, repo_name)
)
finally:
if writer_is_dirty:
log.debug('>> COMMITING CHANGES TO CHANGESET INDEX<<')
writer.commit(merge=True)
log.debug('>>> FINISHED REBUILDING CHANGESET INDEX <<<')
else:
writer.cancel
log.debug('>> NOTHING TO COMMIT TO CHANGESET INDEX<<')
示例9: GET
# 需要导入模块: from whoosh.qparser import QueryParser [as 别名]
# 或者: from whoosh.qparser.QueryParser import parse [as 别名]
def GET(self):
url = web.input().get('url')
qp = QueryParser('url', schema = ix.schema)
q = qp.parse(url)
r = searcher.search(q, limit = 1)
doc = list(r)[0]
qp = QueryParser('refers_to', schema = ix.schema)
q = qp.parse(url)
refs = searcher.search(q, limit = 25)
return render.show(doc, refs, DocumentSearcher(ix))
示例10: test_query_terms
# 需要导入模块: from whoosh.qparser import QueryParser [as 别名]
# 或者: from whoosh.qparser.QueryParser import parse [as 别名]
def test_query_terms():
qp = QueryParser("a", None)
q = qp.parse("alfa b:(bravo OR c:charlie) delta")
assert sorted(q.iter_all_terms()) == [("a", "alfa"), ("a", "delta"),
("b", "bravo"), ("c", "charlie")]
q = qp.parse("alfa brav*")
assert sorted(q.iter_all_terms()) == [("a", "alfa")]
q = qp.parse('a b:("b c" d)^2 e')
tokens = [(t.fieldname, t.text, t.boost) for t in q.all_tokens()]
assert tokens == [('a', 'a', 1.0), ('b', 'b', 2.0), ('b', 'c', 2.0),
('b', 'd', 2.0), ('a', 'e', 1.0)]
示例11: bm25_retrieve
# 需要导入模块: from whoosh.qparser import QueryParser [as 别名]
# 或者: from whoosh.qparser.QueryParser import parse [as 别名]
def bm25_retrieve (query, num_res):
ix = open_dir('index')
searcher = ix.searcher()
query_terms = query.split(' ')
bool_query = ''
for term in query_terms:
bool_query += term + ' OR '
parser = QueryParser("content", ix.schema)
real_query = parser.parse(bool_query)
results = searcher.search(real_query, limit = num_res)
new_results = {}
res_len = len(results)
#assume that top 10 results is relevant
ri = {}
ni = {}
R = 10
N = res_len
for term in query_terms:
ri[term] = 0
ni[term] = 0
#for each term in the query, calculate its ri and ni
for term in query_terms:
for res in searcher.search(real_query):
if term in res['content']:
ri[term] += 1
parser = QueryParser("content", ix.schema)
term_query = parser.parse(term)
ni[term] = len(searcher.search(term_query, limit = 500))
#for each document, calculate its bm25 score
if num_res > 10:
for res in results:
new_results[res['id']] = 0
for res in results:
for term in query_terms:
reg = re.compile(term)
#fi is the i's term's frequency in the document
fi = len(reg.findall(res['content']))
k1 = 1.5
b = 0.75
avdl = 200
K = k1 * (1 - b + b * len(res['content']) / 200)
new_results[res['id']] += math.log((ri[term]+0.5)*(N-ni[term]-R+ri[term]+0.5)/(R-ri[term]+0.5)/(ni[term]-ri[term]+0.5)) * (k1 + 1) * fi / (K + fi)
return new_results
示例12: contacts
# 需要导入模块: from whoosh.qparser import QueryParser [as 别名]
# 或者: from whoosh.qparser.QueryParser import parse [as 别名]
def contacts(self, query):
if query:
to = QueryParser('to', self._index.schema)
cc = QueryParser('cc', self._index.schema)
bcc = QueryParser('bcc', self._index.schema)
with self._index.searcher() as searcher:
to = searcher.search(to.parse("*%s*" % query), limit=None,
groupedby=sorting.FieldFacet('to', allow_overlap=True)).groups()
cc = searcher.search(cc.parse("*%s*" % query), limit=None,
groupedby=sorting.FieldFacet('cc', allow_overlap=True)).groups()
bcc = searcher.search(bcc.parse("*%s*" % query), limit=None,
groupedby=sorting.FieldFacet('bcc', allow_overlap=True)).groups()
return flatten([to, cc, bcc])
return []
示例13: test_correct_spell_field
# 需要导入模块: from whoosh.qparser import QueryParser [as 别名]
# 或者: from whoosh.qparser.QueryParser import parse [as 别名]
def test_correct_spell_field():
ana = analysis.StemmingAnalyzer()
schema = fields.Schema(text=fields.TEXT(analyzer=ana, spelling=True))
with TempIndex(schema) as ix:
with ix.writer() as w:
w.add_document(text=u"rendering shading modeling reactions")
with ix.searcher() as s:
text = s.schema["text"]
spell_text = s.schema["spell_text"]
r = s.reader()
words = [text.from_bytes(t) for t in r.lexicon("text")]
assert words == ["model", "reaction", "render", "shade"]
words = [spell_text.from_bytes(t) for t in r.lexicon("spell_text")]
assert words == ["modeling", "reactions", "rendering", "shading"]
qp = QueryParser("text", s.schema)
qtext = u"renderink"
q = qp.parse(qtext, s.schema)
r = s.search(q)
assert len(r) == 0
c = s.correct_query(q, qtext)
assert c.string == "rendering"
assert c.query == query.Term("text", "rendering")
hf = highlight.HtmlFormatter(classname="c")
assert c.format_string(hf) == '<strong class="c term0">rendering</strong>'
示例14: __call__
# 需要导入模块: from whoosh.qparser import QueryParser [as 别名]
# 或者: from whoosh.qparser.QueryParser import parse [as 别名]
def __call__(self, query):
"""search"""
query = unicode(query)
query_parser = QueryParser("description", schema=self.ix.schema)
myquery = query_parser.parse(query)
# Old code: too strict
# extendedquery = Or([myquery] +
# [Term(field, query) for field in self.keywords])
# New code: too permissive
# extendedquery = [myquery]
excluded = set(["AND", "OR", "NOT"])
terms = [i for i in query.split() if i not in excluded]
# for field in self.keywords:
# extendedquery.extend([Term(field, term) for term in terms])
# extendedquery = Or(extendedquery)
# Code should look something like
# Or([myquery] + [Or(
# extendedquery = [myquery]
extendedquery = And(
[
Or(
[myquery]
+ [Term("description", term), Term("name", term)]
+ [Term(field, term) for field in self.keywords]
)
for term in terms
]
)
# perform the search
searcher = self.ix.searcher()
return [i["name"] for i in searcher.search(extendedquery, limit=None)]
示例15: search_files
# 需要导入模块: from whoosh.qparser import QueryParser [as 别名]
# 或者: from whoosh.qparser.QueryParser import parse [as 别名]
def search_files(index_dir, content):
"""
search file content in index
if not hit: return False
if hit: return results
"""
index_exist = index.exists_in(index_dir)
if not index_exist:
print ("index not exist")
return False
ix = index.open_dir(index_dir)
content = unicode(content)
with ix.searcher() as searcher:
parser = QueryParser("content", ix.schema)
query = parser.parse(content)
# whoosh.searching.Results
results = searcher.search(query)
print (type(results))
l = len(results)
print l
for h in results:
# whoosh.searching.Hit
print type(h)
print h
return results
return False