当前位置: 首页>>代码示例>>Python>>正文


Python MultifieldParser.add_plugin方法代码示例

本文整理汇总了Python中whoosh.qparser.MultifieldParser.add_plugin方法的典型用法代码示例。如果您正苦于以下问题:Python MultifieldParser.add_plugin方法的具体用法?Python MultifieldParser.add_plugin怎么用?Python MultifieldParser.add_plugin使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在whoosh.qparser.MultifieldParser的用法示例。


在下文中一共展示了MultifieldParser.add_plugin方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: generic

# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
def generic(idx, qs=None, q=None, limit=5, parser=None, page=1):
    if qs is q is None:
        raise ValueError('cannot have a null querystring and query')

    if parser is None:
        parser = MultifieldParser(
                ['title', 'keywords', 'summary', 'content', 'author'], idx.schema, group=OrGroup)

    # add better date parsing support
    parser.add_plugin(DateParserPlugin())
    parser.remove_plugin_class(WildcardPlugin)

    with idx.searcher() as search:
        # generate the Query object
        if qs:
            query = parser.parse(qs)
        else:
            query = q

        facet = MultiFacet()
        facet.add_score()
        facet.add_field('modified', reverse=True)
        facet.add_field('title')

        results = search.search_page(query, pagenum=page, sortedby=facet, pagelen=limit)
        res = clean_results(idx, results, query)

        # pagination attributes on `search_page` method
        res.page_number = results.pagenum   # current page number
        res.page_total = results.pagecount  # total pages in results
        res.offset = results.offset         # first result of current page
        res.pagelen = results.pagelen       # the number of max results per page

    return res
开发者ID:blakev,项目名称:sowing-seasons,代码行数:36,代码来源:queries.py

示例2: query_parser

# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
    def query_parser(self, default_fields, idx_name=LATEST_REVS):
        """
        Build a query parser for a list of default fields.
        """
        schema = self.schemas[idx_name]
        if len(default_fields) > 1:
            qp = MultifieldParser(default_fields, schema=schema)
        elif len(default_fields) == 1:
            qp = QueryParser(default_fields[0], schema=schema)
        else:
            raise ValueError("default_fields list must at least contain one field name")
        qp.add_plugin(RegexPlugin())

        def userid_pseudo_field_factory(fieldname):
            """generate a translator function, that searches for the userid
               in the given fieldname when provided with the username
            """
            def userid_pseudo_field(node):
                username = node.text
                users = user.search_users(**{NAME_EXACT: username})
                if users:
                    userid = users[0].meta[ITEMID]
                    node = WordNode(userid)
                    node.set_fieldname(fieldname)
                    return node
                return node
            return userid_pseudo_field
        qp.add_plugin(PseudoFieldPlugin(dict(
            # username:JoeDoe searches for revisions modified by JoeDoe
            username=userid_pseudo_field_factory(USERID),
            # assigned:JoeDoe searches for tickets assigned to JoeDoe
            assigned=userid_pseudo_field_factory(ASSIGNED_TO),
        )))
        return qp
开发者ID:denedios,项目名称:moin-2.0,代码行数:36,代码来源:indexing.py

示例3: get_whoosh_parser

# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
def get_whoosh_parser(index):
    from whoosh.qparser import MultifieldParser, GtLtPlugin

    # TODO: only active columns
    term_fields = ['content', 'unitid']
    parser = MultifieldParser(term_fields, index.schema)
    parser.add_plugin(GtLtPlugin)
    return parser
开发者ID:kcl-ddh,项目名称:digipal,代码行数:10,代码来源:viewer.py

示例4: answer_query

# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
def answer_query(query):
    with main_index.searcher() as searcher:
        parser = MultifieldParser(['title', 'summary'], main_index.schema, fieldboosts={'title': 5.0, 'summary': 0.2})
        parser.add_plugin(FuzzyTermPlugin())
        # tilde adds fuzzy parsing for 1 character and /1 requires the first letter to match
        query = parser.parse(unicode(query) + '~/1') 
        
        results = searcher.search(query, limit=100)
        tags = [r['tag'] for r in results]
    return tags
开发者ID:Fangang,项目名称:metacademy-application,代码行数:12,代码来源:search.py

示例5: render_GET

# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
 def render_GET(self, request):
     section_path = '/'.join(request.postpath).strip('/')
     if not section_path:
         defer.returnValue(json.dumps({'status': 'error', 'message': 'unable to search root'}))
     
     section_name = request.postpath[0]
     
     ix = self._get_index(section_path)
     if not ix:
         defer.returnValue(json.dumps({'status': 'error', 'message': 'unknown index for %s' % section_path}))
     
     schema_settings = self._get_schema_settings(section_path)
     schema = schema_settings['schema']
     
     if 'schema' in request.args:
         if section_path in self.currently_indexing:
             yield self.currently_indexing[section_path]
         
         field_choices = schema_settings.get('field_choices', {})
         fields = {}
         
         for field in set(schema.names()):
             if isinstance(schema[field], KEYWORD) and field in field_choices:
                 fields[field] = sorted(x for x in field_choices[field] if x)
         
         defer.returnValue(json.dumps({'status': 'ok', 'schema': fields, 'type': schema_settings['type']}))
     
     if 'q' not in request.args:
         defer.returnValue(json.dumps({'status': 'error', 'message': 'missing q argument in url'}))
     q = unicode(request.args['q'][0])
     
     parser = MultifieldParser(['search_field'], schema=schema)
     parser.add_plugin(GtLtPlugin())
     query = parser.parse(q)
     
     with ix.searcher() as searcher:
         results = yield threads.deferToThread(searcher.search, query, limit=10000)
         #corrected = searcher.correct_query(query, q) # jesus this is bad for titles
         results = [x['linkitem'] for x in results]
     
     section = settings.SECTIONS[section_name]
     rootfolder = RootFolder(parent_path='', name='Search result for: %s' % q, urlname=self.name, date=0)
     rootfolder['content_type'] = section.levels[0].content_type
     
     for result in results:
         rootfolder.add_item(result)
     
     #if corrected.query != query:
     #    retval['suggestion'] = {
     #        'rel': 'suggested_query',
     #        'href': urlparse.urljoin(settings.BASE_URL, '/search/%s' % urllib.quote(section_path)) + '?%s' % urllib.urlencode({'q': corrected.string}),
     #        'suggested_query': corrected.string,
     #    }
     
     defer.returnValue(rootfolder.serialize())
开发者ID:JohnDoee,项目名称:tidalstream-apiserver-plugins,代码行数:57,代码来源:search.py

示例6: _create_parser

# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
 def _create_parser(self, context):
     parser = MultifieldParser(
         self.field_boosts.keys(),
         WhooshBackend.SCHEMA,
         fieldboosts=self.field_boosts
     )
     parser.add_plugin(
         MetaKeywordPlugin(meta_keyword_parsers=self.meta_keyword_parsers,
                           context=context)
     )
     return parser
开发者ID:Stackato-Apps,项目名称:bloodhound,代码行数:13,代码来源:query_parser.py

示例7: query

# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
    def query(self, s=None, is_curated=True, is_fiction=True, pagenum=1, allpages=False):
        '''
        Search for books using whoosh, or return first page from all
        '''
        if self.whoosh is None:
            return

        if not s:
            # default to list all authors
            query = Every('author')
        else:
            # create a search by author and title
            qp = MultifieldParser(['author', 'title'], self.whoosh.schema, group=OrGroup)

            # fuzzy query only if wildcard not present
            if '*' not in s:
                qp.add_plugin(FuzzyTermPlugin())

                # setup search terms for fuzzy match
                fuzzy_terms = []
                for w in s.split():
                    fuzzy_terms.append('{}~'.format(w))
                s = ' '.join(fuzzy_terms)

            # parse the search terms
            query = qp.parse(s)

        # only filter is_fiction / is_curated when true
        filters = []
        if is_curated is True:
            filters.append(Term('is_curated', is_curated))
        if is_fiction is True:
            filters.append(Term('is_fiction', is_fiction))
        qfilter = And(filters)

        with self.whoosh.searcher() as searcher:
            pagecount = None

            if allpages:
                # special search returning all pages upto pagenum
                results = searcher.search(query, filter=qfilter, limit=(self.pagelen * pagenum))
            else:
                # paginated search for specific page, or to feed infinite scroll
                results = searcher.search_page(query, int(pagenum), filter=qfilter, pagelen=self.pagelen)
                pagecount = results.pagecount

            output = [item.fields() for item in results]

            if pagecount is None:
                pagecount = int(math.ceil(float(len(output)) / self.pagelen))

        return {'results': output, 'pagecount': pagecount}
开发者ID:oii,项目名称:ogre,代码行数:54,代码来源:search.py

示例8: search_for_track

# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
 def search_for_track(self, querystring):
     if len(querystring) >= 3:
         with self.ix.searcher() as searcher:
             collector = searcher.collector(limit=20)
             tlc = TimeLimitCollector(collector, timelimit=1.4, use_alarm=False)
             parser = MultifieldParser(["artist", "album", "title"], self.ix.schema)
             parser.add_plugin(qparser.FuzzyTermPlugin())
             myquery = parser.parse(querystring)
             try:
                 searcher.search_with_collector(myquery, tlc)
                 if len(tlc.results()) == 0:
                     myquery = parser.parse(" ".join(word + "~2" for word in querystring.split()))
                     searcher.search_with_collector(myquery, tlc)
             except TimeLimit:
                 logging.info("Time Limit for query reached!")
             logging.debug("czas zapytania: ", collector.runtime)
             ret = [self.__tracks[int(result["id"])] for result in tlc.results()]
             return ret
     else:
         return []
开发者ID:mRokita,项目名称:sMusic-core,代码行数:22,代码来源:music_library.py

示例9: page

# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
    def page(self, page, limit):
        with self.engine.index.searcher() as searcher:
            parser = MultifieldParser(
                self.engine.search_fields,
                schema = self.engine.index.schema,
            )
            parser.add_plugin(GtLtPlugin())
            parser.add_plugin(PhrasePlugin())
            parser.add_plugin(FieldsPlugin())
            #parser.remove_plugin_class(WildcardPlugin)
            #parser.add_plugin(WildcardPlugin())
            parser.add_plugin(PrefixPlugin())

            whoosh_query = parser.parse(self.query.toString(self.engine))
            #print "============" + str(whoosh_query)
            results = searcher.search_page(whoosh_query, page, limit, sortedby = self.order)
            self.rows = results.total

            _results = []

            doc_class = self.engine.database.document

            for result in results:
                doc = doc_class(data = {field: result.get(field, None) for field in self.engine.stored_fields}, restore = True)
                _results.append(doc)

        return _results
开发者ID:toudi,项目名称:django-search-nonmodel,代码行数:29,代码来源:__init__.py

示例10: query_parser

# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
 def query_parser(self, default_fields, idx_name=LATEST_REVS):
     """
     Build a query parser for a list of default fields.
     """
     schema = self.schemas[idx_name]
     if len(default_fields) > 1:
         qp = MultifieldParser(default_fields, schema=schema)
     elif len(default_fields) == 1:
         qp = QueryParser(default_fields[0], schema=schema)
     else:
         raise ValueError("default_fields list must at least contain one field name")
     qp.add_plugin(RegexPlugin())
     def username_pseudo_field(node):
         username = node.text
         users = user.search_users(**{NAME_EXACT: username})
         if users:
             userid = users[0].meta['userid']
             node = WordNode(userid)
             node.set_fieldname("userid")
             return node
         return node
     qp.add_plugin(PseudoFieldPlugin({'username': username_pseudo_field}))
     return qp
开发者ID:pombredanne,项目名称:moin2,代码行数:25,代码来源:indexing.py

示例11: search

# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
 def search(self, queries, fuzzy = True, default_fields = [], max_results = None):
     if type(queries) != list:
         queries = [queries]
     if type(default_fields) != list:
         default_fields = [default_fields]
     if fuzzy and len(queries) == 1 and len(queries[0].split()) == 1 and ':' not in queries[0] and '*' not in queries[0]:
         queries = ['*%s*' % (queries[0])]
     for query in queries:
         if type(query) != unicode:
             query = query.decode('utf-8')
         log.msg('search query: %s' % (query))
         with self.ix.searcher() as searcher:
             parser = MultifieldParser(default_fields, self.ix.schema)
             parser.remove_plugin_class(plugins.WildcardPlugin)
             parser.add_plugin(WildcardPlugin)
             query = parser.parse(query)
             log.msg('search query parsed: %s' % (query))
             results = searcher.search(query, limit = None)
             count = 0
             for result in results:
                 yield result['oid']
                 count += 1
                 if max_results and count >= max_results:
                     break
开发者ID:sii,项目名称:siptrackd,代码行数:26,代码来源:search.py

示例12: open_dir

# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
# coding=utf-8
from whoosh.index import open_dir
from whoosh.qparser import MultifieldParser
from whoosh.qparser import FuzzyTermPlugin

idx_dir = 'lagou_idx'
ix = open_dir(idx_dir)
searcher = ix.searcher()

parser = MultifieldParser(["name", "desc"], schema=ix.schema)
parser.add_plugin(FuzzyTermPlugin())

# Single field parser.
k = u'搜索 OR Pythn~2 city:上海'
q = parser.parse(k)

results = searcher.search_page(q, 1, pagelen=5)

print(u'{0} results found for keyword {1}, {2} returned: '.format(len(results), k, results.scored_length()))
for hit in results[:50]:
    print(hit['id'])
    print(hit['name'])
    # print(hit['city'])
    print(hit['com_name'])
    print('************')
开发者ID:anderscui,项目名称:nlpy,代码行数:27,代码来源:lagou_query_fuzzy.py

示例13: BockCore

# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
class BockCore():
    def __init__(self, articles_path):
        """Attempt to initialize a folder with Markdown articles. If a git
        repo, create a search index and populate.

        Markdown Extension References
        * http://facelessuser.github.io/pymdown-extensions
        * https://pythonhosted.org/Markdown/extensions
        """
        self.article_repo = Repo(articles_path)
        self.articles_path = articles_path
        self.markdown_extensions = [
            'markdown.extensions.abbr',
            'markdown.extensions.attr_list',
            'markdown.extensions.def_list',
            'markdown.extensions.fenced_code',
            'markdown.extensions.footnotes',
            'markdown.extensions.tables',
            'markdown.extensions.smart_strong',
            'markdown.extensions.admonition',
            'markdown.extensions.codehilite',
            'markdown.extensions.headerid',
            'markdown.extensions.sane_lists',
            'markdown.extensions.smarty',
            'markdown.extensions.toc',
            'markdown.extensions.wikilinks',
            'pymdownx.betterem',
            'pymdownx.caret',
            'pymdownx.githubemoji',
            'pymdownx.headeranchor',
            'pymdownx.magiclink',
            'pymdownx.mark',
            'pymdownx.smartsymbols',
            'pymdownx.tasklist',
            'pymdownx.tilde',
            'pymdownx.critic',
        ]
        self.markdown_extensions_config = {
            'markdown.extensions.codehilite': {
                'css_class': 'code-highlight'
            }
        }
        self.__search_schema = Schema(
            title=ID(stored=True, unique=True),
            path=ID(stored=True),
            content=TEXT,
        )
        self.__search_parser = MultifieldParser(
            ['title', 'content'],
            schema=self.__search_schema,
        )
        self.__search_parser.add_plugin(FuzzyTermPlugin())
        self.__search_index = self.create_search_index()
        self.populate_search_index()

    # ------------------------ Article Functions ------------------------

    def markdown_to_html(self, text):
        """Converts a given Markdown string to HTML
        """
        return markdown.markdown(
            text=text,
            output_format='html5',
            extensions=self.markdown_extensions,
            extension_configs=self.markdown_extensions_config,
        )

    def raw_article(self, article_path):
        """Return the text contents of an article
        """
        with open(self.full_article_path(article_path)) as f:
            article_content = f.read()

        return article_content

    def processed_article(self, article_path):
        """Return the 'marked-down' HTML version of an article
        """
        return self.markdown_to_html(self.raw_article(article_path))

    def article_last_modified(self, article_path):
        """Return the last modified date of a given article in ISO8601 format
        """
        return str(
            arrow.get(
                os.stat(
                    self.full_article_path(article_path)
                ).st_mtime
            )
        )

    def article_last_modified_human(self, article_path):
        """Return the last modified date of a given article in a
        human-readable format
        """
        return arrow.get(
            self.article_last_modified(article_path)
        ).humanize()

    def is_article_modified(self, article_path):
#.........这里部分代码省略.........
开发者ID:afreeorange,项目名称:bock,代码行数:103,代码来源:core.py

示例14: fetcher

# 需要导入模块: from whoosh.qparser import MultifieldParser [as 别名]
# 或者: from whoosh.qparser.MultifieldParser import add_plugin [as 别名]
class fetcher(object):
    
    def __init__(self, path):
        self.idxpath = path
        self.ix = open_dir(self.idxpath)
        self.query = MultifieldParser(['content','ctime'], schema=self.ix.schema)
        self.query.add_plugin(DateParserPlugin())
        self.sorter = MultiFacet(["ctime", ScoreFacet()])
        self.parser = ttp.Parser();
        self.dateparser = parser.parser();
        
    def fetch_thread_by_tid(self, retid):
        t1 = int(round(time.time() * 1000))
        tweets = []
        try :
            searcher = self.ix.searcher()
            results = searcher.documents(retweetid=retid)
            for r in results:
                tweet = json.loads(r['json'])
                tweet['created_at'] = self.dateparser.parse(tweet['created_at'])
                tweets.append(tweet)
        except Exception as e:
            print 'fetch_tweets error' + str(e)
        finally:
            searcher.close()
        t2 = int(round(time.time() * 1000))
        tweets = sorted(tweets, key=lambda x: x['created_at'], reverse=False)
        print '----> fetch tweets by retweet id ' + str(t2 - t1) + ' ms'
        return tweets
     
    def fetch_tweets_by_uid(self, uid):
        t1 = int(round(time.time() * 1000))
        try :
            searcher = self.ix.searcher()
            results = searcher.documents(ownerid=uid)
            tweets = []
            for r in results:
                tweet = json.loads(r['json'])
                tweet['user']['retweet_at'] = self.dateparser.parse(tweet['created_at'])
                tweet['created_at'] = self.dateparser.parse(tweet['created_at'])
                tweets.append(tweet)
        except Exception as e:
            print 'fetch_tweets error' + str(e)
        finally:
            searcher.close()
        t2 = int(round(time.time() * 1000))
        print '----> fetch tweets for the specified user costs ' + str(t2 - t1) + ' ms'
        return tweets
    
    def fetch_tweets_by_keyword(self, keyword, start, topk):
        
        print 'thread : '  + keyword
        
        t1 = int(round(time.time() * 1000))
        tweets = []
        users = []
        tweetids = {}
        qtext = unicode('ctime:[' + str(start) + ' to] AND ' + 'content:(' + keyword + ')')
        try :
            searcher = self.ix.searcher()
            q = self.query.parse(qtext)
            results = searcher.search(q)
            
            for r in results:
                t = json.loads(r['json'])
                tt = t;
                if 'retweeted_status' in t and t['retweeted_status'] is not None:
                    t = t['retweeted_status']
                tid = t['id_str']
                if tid not in tweetids:
                    
                    user = {
                        "id":tt['user']['id_str'], 
                        "retweet_time":self.dateparser.parse(tt['created_at']).strftime('%Y%m%d%H%M%S'),
                        "screen_name":tt['user']['screen_name'], 
                        "profile_image_url":tt['user']['profile_image_url'],
                        "followers_count":tt['user']['followers_count']
                    };
                    
                    users.append(user)
                    
                    tweet = {}
                    tweet['id'] = tid
                    tweet['text'] = t['text']
                    tweet['creator'] = {}
                    tweet['creator']['id'] = t['user']['id_str']
                    tweet['creator']['creator'] = t['user']['screen_name']
                    tweet['creator']['creator_img'] = t['user']['profile_image_url']
                    tweet['retweet_count'] = t['retweet_count']
                    tweet['created_at'] = self.dateparser.parse(t['created_at']).strftime('%Y%m%d%H%M%S')
                    tweet['retweet_history'] = [user]
                    tweet['rank'] = max(t['user']['followers_count'], tt['user']['followers_count']) * t['retweet_count']
                    tweetids[tid] = tweet
                    tweets.append(tweet)
                else :
                    user = {
                        "id":tt['user']['id_str'], 
                        "retweet_time":self.dateparser.parse(tt['created_at']).strftime('%Y%m%d%H%M%S'),
                        "screen_name":tt['user']['screen_name'], 
                        "profile_image_url":tt['user']['profile_image_url'],
#.........这里部分代码省略.........
开发者ID:opmiss,项目名称:ActivityVis,代码行数:103,代码来源:fetcher.py


注:本文中的whoosh.qparser.MultifieldParser.add_plugin方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。