本文整理汇总了Python中whoosh.qparser.MultifieldParser.add_plugin方法的典型用法代码示例。如果您正苦于以下问题:Python MultifieldParser.add_plugin方法的具体用法?Python MultifieldParser.add_plugin怎么用?Python MultifieldParser.add_plugin使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类whoosh.qparser.MultifieldParser
的用法示例。
在下文中一共展示了MultifieldParser.add_plugin方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: generic
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
def generic(idx, qs=None, q=None, limit=5, parser=None, page=1):
if qs is q is None:
raise ValueError('cannot have a null querystring and query')
if parser is None:
parser = MultifieldParser(
['title', 'keywords', 'summary', 'content', 'author'], idx.schema, group=OrGroup)
# add better date parsing support
parser.add_plugin(DateParserPlugin())
parser.remove_plugin_class(WildcardPlugin)
with idx.searcher() as search:
# generate the Query object
if qs:
query = parser.parse(qs)
else:
query = q
facet = MultiFacet()
facet.add_score()
facet.add_field('modified', reverse=True)
facet.add_field('title')
results = search.search_page(query, pagenum=page, sortedby=facet, pagelen=limit)
res = clean_results(idx, results, query)
# pagination attributes on `search_page` method
res.page_number = results.pagenum # current page number
res.page_total = results.pagecount # total pages in results
res.offset = results.offset # first result of current page
res.pagelen = results.pagelen # the number of max results per page
return res
示例2: query_parser
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
def query_parser(self, default_fields, idx_name=LATEST_REVS):
"""
Build a query parser for a list of default fields.
"""
schema = self.schemas[idx_name]
if len(default_fields) > 1:
qp = MultifieldParser(default_fields, schema=schema)
elif len(default_fields) == 1:
qp = QueryParser(default_fields[0], schema=schema)
else:
raise ValueError("default_fields list must at least contain one field name")
qp.add_plugin(RegexPlugin())
def userid_pseudo_field_factory(fieldname):
"""generate a translator function, that searches for the userid
in the given fieldname when provided with the username
"""
def userid_pseudo_field(node):
username = node.text
users = user.search_users(**{NAME_EXACT: username})
if users:
userid = users[0].meta[ITEMID]
node = WordNode(userid)
node.set_fieldname(fieldname)
return node
return node
return userid_pseudo_field
qp.add_plugin(PseudoFieldPlugin(dict(
# username:JoeDoe searches for revisions modified by JoeDoe
username=userid_pseudo_field_factory(USERID),
# assigned:JoeDoe searches for tickets assigned to JoeDoe
assigned=userid_pseudo_field_factory(ASSIGNED_TO),
)))
return qp
示例3: get_whoosh_parser
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
def get_whoosh_parser(index):
from whoosh.qparser import MultifieldParser, GtLtPlugin
# TODO: only active columns
term_fields = ['content', 'unitid']
parser = MultifieldParser(term_fields, index.schema)
parser.add_plugin(GtLtPlugin)
return parser
示例4: answer_query
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
def answer_query(query):
with main_index.searcher() as searcher:
parser = MultifieldParser(['title', 'summary'], main_index.schema, fieldboosts={'title': 5.0, 'summary': 0.2})
parser.add_plugin(FuzzyTermPlugin())
# tilde adds fuzzy parsing for 1 character and /1 requires the first letter to match
query = parser.parse(unicode(query) + '~/1')
results = searcher.search(query, limit=100)
tags = [r['tag'] for r in results]
return tags
示例5: render_GET
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
def render_GET(self, request):
section_path = '/'.join(request.postpath).strip('/')
if not section_path:
defer.returnValue(json.dumps({'status': 'error', 'message': 'unable to search root'}))
section_name = request.postpath[0]
ix = self._get_index(section_path)
if not ix:
defer.returnValue(json.dumps({'status': 'error', 'message': 'unknown index for %s' % section_path}))
schema_settings = self._get_schema_settings(section_path)
schema = schema_settings['schema']
if 'schema' in request.args:
if section_path in self.currently_indexing:
yield self.currently_indexing[section_path]
field_choices = schema_settings.get('field_choices', {})
fields = {}
for field in set(schema.names()):
if isinstance(schema[field], KEYWORD) and field in field_choices:
fields[field] = sorted(x for x in field_choices[field] if x)
defer.returnValue(json.dumps({'status': 'ok', 'schema': fields, 'type': schema_settings['type']}))
if 'q' not in request.args:
defer.returnValue(json.dumps({'status': 'error', 'message': 'missing q argument in url'}))
q = unicode(request.args['q'][0])
parser = MultifieldParser(['search_field'], schema=schema)
parser.add_plugin(GtLtPlugin())
query = parser.parse(q)
with ix.searcher() as searcher:
results = yield threads.deferToThread(searcher.search, query, limit=10000)
#corrected = searcher.correct_query(query, q) # jesus this is bad for titles
results = [x['linkitem'] for x in results]
section = settings.SECTIONS[section_name]
rootfolder = RootFolder(parent_path='', name='Search result for: %s' % q, urlname=self.name, date=0)
rootfolder['content_type'] = section.levels[0].content_type
for result in results:
rootfolder.add_item(result)
#if corrected.query != query:
# retval['suggestion'] = {
# 'rel': 'suggested_query',
# 'href': urlparse.urljoin(settings.BASE_URL, '/search/%s' % urllib.quote(section_path)) + '?%s' % urllib.urlencode({'q': corrected.string}),
# 'suggested_query': corrected.string,
# }
defer.returnValue(rootfolder.serialize())
示例6: _create_parser
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
def _create_parser(self, context):
parser = MultifieldParser(
self.field_boosts.keys(),
WhooshBackend.SCHEMA,
fieldboosts=self.field_boosts
)
parser.add_plugin(
MetaKeywordPlugin(meta_keyword_parsers=self.meta_keyword_parsers,
context=context)
)
return parser
示例7: query
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
def query(self, s=None, is_curated=True, is_fiction=True, pagenum=1, allpages=False):
'''
Search for books using whoosh, or return first page from all
'''
if self.whoosh is None:
return
if not s:
# default to list all authors
query = Every('author')
else:
# create a search by author and title
qp = MultifieldParser(['author', 'title'], self.whoosh.schema, group=OrGroup)
# fuzzy query only if wildcard not present
if '*' not in s:
qp.add_plugin(FuzzyTermPlugin())
# setup search terms for fuzzy match
fuzzy_terms = []
for w in s.split():
fuzzy_terms.append('{}~'.format(w))
s = ' '.join(fuzzy_terms)
# parse the search terms
query = qp.parse(s)
# only filter is_fiction / is_curated when true
filters = []
if is_curated is True:
filters.append(Term('is_curated', is_curated))
if is_fiction is True:
filters.append(Term('is_fiction', is_fiction))
qfilter = And(filters)
with self.whoosh.searcher() as searcher:
pagecount = None
if allpages:
# special search returning all pages upto pagenum
results = searcher.search(query, filter=qfilter, limit=(self.pagelen * pagenum))
else:
# paginated search for specific page, or to feed infinite scroll
results = searcher.search_page(query, int(pagenum), filter=qfilter, pagelen=self.pagelen)
pagecount = results.pagecount
output = [item.fields() for item in results]
if pagecount is None:
pagecount = int(math.ceil(float(len(output)) / self.pagelen))
return {'results': output, 'pagecount': pagecount}
示例8: search_for_track
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
def search_for_track(self, querystring):
if len(querystring) >= 3:
with self.ix.searcher() as searcher:
collector = searcher.collector(limit=20)
tlc = TimeLimitCollector(collector, timelimit=1.4, use_alarm=False)
parser = MultifieldParser(["artist", "album", "title"], self.ix.schema)
parser.add_plugin(qparser.FuzzyTermPlugin())
myquery = parser.parse(querystring)
try:
searcher.search_with_collector(myquery, tlc)
if len(tlc.results()) == 0:
myquery = parser.parse(" ".join(word + "~2" for word in querystring.split()))
searcher.search_with_collector(myquery, tlc)
except TimeLimit:
logging.info("Time Limit for query reached!")
logging.debug("czas zapytania: ", collector.runtime)
ret = [self.__tracks[int(result["id"])] for result in tlc.results()]
return ret
else:
return []
示例9: page
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
def page(self, page, limit):
with self.engine.index.searcher() as searcher:
parser = MultifieldParser(
self.engine.search_fields,
schema = self.engine.index.schema,
)
parser.add_plugin(GtLtPlugin())
parser.add_plugin(PhrasePlugin())
parser.add_plugin(FieldsPlugin())
#parser.remove_plugin_class(WildcardPlugin)
#parser.add_plugin(WildcardPlugin())
parser.add_plugin(PrefixPlugin())
whoosh_query = parser.parse(self.query.toString(self.engine))
#print "============" + str(whoosh_query)
results = searcher.search_page(whoosh_query, page, limit, sortedby = self.order)
self.rows = results.total
_results = []
doc_class = self.engine.database.document
for result in results:
doc = doc_class(data = {field: result.get(field, None) for field in self.engine.stored_fields}, restore = True)
_results.append(doc)
return _results
示例10: query_parser
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
def query_parser(self, default_fields, idx_name=LATEST_REVS):
"""
Build a query parser for a list of default fields.
"""
schema = self.schemas[idx_name]
if len(default_fields) > 1:
qp = MultifieldParser(default_fields, schema=schema)
elif len(default_fields) == 1:
qp = QueryParser(default_fields[0], schema=schema)
else:
raise ValueError("default_fields list must at least contain one field name")
qp.add_plugin(RegexPlugin())
def username_pseudo_field(node):
username = node.text
users = user.search_users(**{NAME_EXACT: username})
if users:
userid = users[0].meta['userid']
node = WordNode(userid)
node.set_fieldname("userid")
return node
return node
qp.add_plugin(PseudoFieldPlugin({'username': username_pseudo_field}))
return qp
示例11: search
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
def search(self, queries, fuzzy = True, default_fields = [], max_results = None):
if type(queries) != list:
queries = [queries]
if type(default_fields) != list:
default_fields = [default_fields]
if fuzzy and len(queries) == 1 and len(queries[0].split()) == 1 and ':' not in queries[0] and '*' not in queries[0]:
queries = ['*%s*' % (queries[0])]
for query in queries:
if type(query) != unicode:
query = query.decode('utf-8')
log.msg('search query: %s' % (query))
with self.ix.searcher() as searcher:
parser = MultifieldParser(default_fields, self.ix.schema)
parser.remove_plugin_class(plugins.WildcardPlugin)
parser.add_plugin(WildcardPlugin)
query = parser.parse(query)
log.msg('search query parsed: %s' % (query))
results = searcher.search(query, limit = None)
count = 0
for result in results:
yield result['oid']
count += 1
if max_results and count >= max_results:
break
示例12: open_dir
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
# coding=utf-8
from whoosh.index import open_dir
from whoosh.qparser import MultifieldParser
from whoosh.qparser import FuzzyTermPlugin
idx_dir = 'lagou_idx'
ix = open_dir(idx_dir)
searcher = ix.searcher()
parser = MultifieldParser(["name", "desc"], schema=ix.schema)
parser.add_plugin(FuzzyTermPlugin())
# Single field parser.
k = u'搜索 OR Pythn~2 city:上海'
q = parser.parse(k)
results = searcher.search_page(q, 1, pagelen=5)
print(u'{0} results found for keyword {1}, {2} returned: '.format(len(results), k, results.scored_length()))
for hit in results[:50]:
print(hit['id'])
print(hit['name'])
# print(hit['city'])
print(hit['com_name'])
print('************')
示例13: BockCore
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
class BockCore():
def __init__(self, articles_path):
"""Attempt to initialize a folder with Markdown articles. If a git
repo, create a search index and populate.
Markdown Extension References
* http://facelessuser.github.io/pymdown-extensions
* https://pythonhosted.org/Markdown/extensions
"""
self.article_repo = Repo(articles_path)
self.articles_path = articles_path
self.markdown_extensions = [
'markdown.extensions.abbr',
'markdown.extensions.attr_list',
'markdown.extensions.def_list',
'markdown.extensions.fenced_code',
'markdown.extensions.footnotes',
'markdown.extensions.tables',
'markdown.extensions.smart_strong',
'markdown.extensions.admonition',
'markdown.extensions.codehilite',
'markdown.extensions.headerid',
'markdown.extensions.sane_lists',
'markdown.extensions.smarty',
'markdown.extensions.toc',
'markdown.extensions.wikilinks',
'pymdownx.betterem',
'pymdownx.caret',
'pymdownx.githubemoji',
'pymdownx.headeranchor',
'pymdownx.magiclink',
'pymdownx.mark',
'pymdownx.smartsymbols',
'pymdownx.tasklist',
'pymdownx.tilde',
'pymdownx.critic',
]
self.markdown_extensions_config = {
'markdown.extensions.codehilite': {
'css_class': 'code-highlight'
}
}
self.__search_schema = Schema(
title=ID(stored=True, unique=True),
path=ID(stored=True),
content=TEXT,
)
self.__search_parser = MultifieldParser(
['title', 'content'],
schema=self.__search_schema,
)
self.__search_parser.add_plugin(FuzzyTermPlugin())
self.__search_index = self.create_search_index()
self.populate_search_index()
# ------------------------ Article Functions ------------------------
def markdown_to_html(self, text):
"""Converts a given Markdown string to HTML
"""
return markdown.markdown(
text=text,
output_format='html5',
extensions=self.markdown_extensions,
extension_configs=self.markdown_extensions_config,
)
def raw_article(self, article_path):
"""Return the text contents of an article
"""
with open(self.full_article_path(article_path)) as f:
article_content = f.read()
return article_content
def processed_article(self, article_path):
"""Return the 'marked-down' HTML version of an article
"""
return self.markdown_to_html(self.raw_article(article_path))
def article_last_modified(self, article_path):
"""Return the last modified date of a given article in ISO8601 format
"""
return str(
arrow.get(
os.stat(
self.full_article_path(article_path)
).st_mtime
)
)
def article_last_modified_human(self, article_path):
"""Return the last modified date of a given article in a
human-readable format
"""
return arrow.get(
self.article_last_modified(article_path)
).humanize()
def is_article_modified(self, article_path):
#.........这里部分代码省略.........
示例14: fetcher
# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
class fetcher(object):
def __init__(self, path):
self.idxpath = path
self.ix = open_dir(self.idxpath)
self.query = MultifieldParser(['content','ctime'], schema=self.ix.schema)
self.query.add_plugin(DateParserPlugin())
self.sorter = MultiFacet(["ctime", ScoreFacet()])
self.parser = ttp.Parser();
self.dateparser = parser.parser();
def fetch_thread_by_tid(self, retid):
t1 = int(round(time.time() * 1000))
tweets = []
try :
searcher = self.ix.searcher()
results = searcher.documents(retweetid=retid)
for r in results:
tweet = json.loads(r['json'])
tweet['created_at'] = self.dateparser.parse(tweet['created_at'])
tweets.append(tweet)
except Exception as e:
print 'fetch_tweets error' + str(e)
finally:
searcher.close()
t2 = int(round(time.time() * 1000))
tweets = sorted(tweets, key=lambda x: x['created_at'], reverse=False)
print '----> fetch tweets by retweet id ' + str(t2 - t1) + ' ms'
return tweets
def fetch_tweets_by_uid(self, uid):
t1 = int(round(time.time() * 1000))
try :
searcher = self.ix.searcher()
results = searcher.documents(ownerid=uid)
tweets = []
for r in results:
tweet = json.loads(r['json'])
tweet['user']['retweet_at'] = self.dateparser.parse(tweet['created_at'])
tweet['created_at'] = self.dateparser.parse(tweet['created_at'])
tweets.append(tweet)
except Exception as e:
print 'fetch_tweets error' + str(e)
finally:
searcher.close()
t2 = int(round(time.time() * 1000))
print '----> fetch tweets for the specified user costs ' + str(t2 - t1) + ' ms'
return tweets
def fetch_tweets_by_keyword(self, keyword, start, topk):
print 'thread : ' + keyword
t1 = int(round(time.time() * 1000))
tweets = []
users = []
tweetids = {}
qtext = unicode('ctime:[' + str(start) + ' to] AND ' + 'content:(' + keyword + ')')
try :
searcher = self.ix.searcher()
q = self.query.parse(qtext)
results = searcher.search(q)
for r in results:
t = json.loads(r['json'])
tt = t;
if 'retweeted_status' in t and t['retweeted_status'] is not None:
t = t['retweeted_status']
tid = t['id_str']
if tid not in tweetids:
user = {
"id":tt['user']['id_str'],
"retweet_time":self.dateparser.parse(tt['created_at']).strftime('%Y%m%d%H%M%S'),
"screen_name":tt['user']['screen_name'],
"profile_image_url":tt['user']['profile_image_url'],
"followers_count":tt['user']['followers_count']
};
users.append(user)
tweet = {}
tweet['id'] = tid
tweet['text'] = t['text']
tweet['creator'] = {}
tweet['creator']['id'] = t['user']['id_str']
tweet['creator']['creator'] = t['user']['screen_name']
tweet['creator']['creator_img'] = t['user']['profile_image_url']
tweet['retweet_count'] = t['retweet_count']
tweet['created_at'] = self.dateparser.parse(t['created_at']).strftime('%Y%m%d%H%M%S')
tweet['retweet_history'] = [user]
tweet['rank'] = max(t['user']['followers_count'], tt['user']['followers_count']) * t['retweet_count']
tweetids[tid] = tweet
tweets.append(tweet)
else :
user = {
"id":tt['user']['id_str'],
"retweet_time":self.dateparser.parse(tt['created_at']).strftime('%Y%m%d%H%M%S'),
"screen_name":tt['user']['screen_name'],
"profile_image_url":tt['user']['profile_image_url'],
#.........这里部分代码省略.........