当前位置: 首页>>代码示例>>Python>>正文


Python RamStorage.create_index方法代码示例

本文整理汇总了Python中whoosh.filedb.filestore.RamStorage.create_index方法的典型用法代码示例。如果您正苦于以下问题:Python RamStorage.create_index方法的具体用法?Python RamStorage.create_index怎么用?Python RamStorage.create_index使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在whoosh.filedb.filestore.RamStorage的用法示例。


在下文中一共展示了RamStorage.create_index方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: ToolBoxSearch

# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
class ToolBoxSearch( object ):
    """
    Support searching tools in a toolbox. This implementation uses
    the Whoosh search library.
    """

    def __init__( self, toolbox, index_help=True ):
        """
        Create a searcher for `toolbox`.
        """
        self.toolbox = toolbox
        self.build_index( index_help )

    def build_index( self, index_help=True ):
        log.debug( 'Starting to build toolbox index.' )
        self.storage = RamStorage()
        self.index = self.storage.create_index( schema )
        writer = self.index.writer()
        for id, tool in self.toolbox.tools():
            #  Do not add data managers to the public index
            if tool.tool_type == 'manage_data':
                continue
            add_doc_kwds = {
                "id": id,
                "name": to_unicode( tool.name ),
                "description": to_unicode( tool.description ),
                "section": to_unicode( tool.get_panel_section()[1] if len( tool.get_panel_section() ) == 2 else '' ),
                "help": to_unicode( "" )
            }
            if tool.labels:
                add_doc_kwds['labels'] = to_unicode( " ".join( tool.labels ) )
            if index_help and tool.help:
                try:
                    add_doc_kwds['help'] = to_unicode( tool.help.render( host_url="", static_path="" ) )
                except Exception:
                    # Don't fail to build index just because a help message
                    # won't render.
                    pass
            writer.add_document( **add_doc_kwds )
        writer.commit()
        log.debug( 'Toolbox index finished.' )

    def search( self, q, tool_name_boost, tool_section_boost, tool_description_boost, tool_help_boost, tool_search_limit ):
        """
        Perform search on the in-memory index. Weight in the given boosts.
        """
        # Change field boosts for searcher
        searcher = self.index.searcher(
            weighting=BM25F(
                field_B={ 'name_B': float( tool_name_boost ),
                          'section_B': float( tool_section_boost ),
                          'description_B': float( tool_description_boost ),
                          'help_B': float( tool_help_boost ) }
            )
        )
        # Set query to search name, description, section, help, and labels.
        parser = MultifieldParser( [ 'name', 'description', 'section', 'help', 'labels' ], schema=schema )
        # Perform the search
        hits = searcher.search( parser.parse( '*' + q + '*' ), limit=float( tool_search_limit ) )
        return [ hit[ 'id' ] for hit in hits ]
开发者ID:AbhishekKumarSingh,项目名称:galaxy,代码行数:62,代码来源:__init__.py

示例2: ToolBoxSearch

# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
class ToolBoxSearch( object ):
    """
    Support searching tools in a toolbox. This implementation uses
    the "whoosh" search library.
    """
    
    def __init__( self, toolbox ):
        """
        Create a searcher for `toolbox`. 
        """
        self.toolbox = toolbox
        self.enabled = tool_search_enabled
        if tool_search_enabled:
            self.build_index()
        
    def build_index( self ):
        self.storage = RamStorage()
        self.index = self.storage.create_index( schema )
        writer = self.index.writer()
        ## TODO: would also be nice to search section headers.
        for id, tool in self.toolbox.tools_by_id.iteritems():
            writer.add_document( id=id, title=to_unicode(tool.name), description=to_unicode(tool.description), help=to_unicode(tool.help) )
        writer.commit()
        
    def search( self, query, return_attribute='id' ):
        if not tool_search_enabled:
            return []
        # Change field boosts for searcher to place more weight on title, description than help.
        searcher = self.index.searcher( \
                        weighting=BM25F( field_B={ 'title_B' : 3, 'description_B' : 2, 'help_B' : 1 } \
                                    ) )
        # Set query to search title, description, and help.
        parser = MultifieldParser( [ 'title', 'description', 'help' ], schema = schema )
        results = searcher.search( parser.parse( query ), minscore=2.0 )
        return [ result[ return_attribute ] for result in results ]
开发者ID:agbiotec,项目名称:galaxy-tools-vcr,代码行数:37,代码来源:__init__.py

示例3: test_weighting

# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
def test_weighting():
    from whoosh.scoring import Weighting, BaseScorer

    schema = fields.Schema(id=fields.ID(stored=True),
                           n_comments=fields.STORED)
    st = RamStorage()
    ix = st.create_index(schema)

    w = ix.writer()
    w.add_document(id=u("1"), n_comments=5)
    w.add_document(id=u("2"), n_comments=12)
    w.add_document(id=u("3"), n_comments=2)
    w.add_document(id=u("4"), n_comments=7)
    w.commit()

    # Fake Weighting implementation
    class CommentWeighting(Weighting):
        def scorer(self, searcher, fieldname, text, qf=1):
            return self.CommentScorer(searcher.stored_fields)

        class CommentScorer(BaseScorer):
            def __init__(self, stored_fields):
                self.stored_fields = stored_fields

            def score(self, matcher):
                ncomments = self.stored_fields(matcher.id()).get("n_comments", 0)
                return ncomments

    with ix.searcher(weighting=CommentWeighting()) as s:
        q = TermRange("id", u("1"), u("4"), constantscore=False)

        r = s.search(q)
        ids = [fs["id"] for fs in r]
        assert_equal(ids, ["2", "4", "1", "3"])
开发者ID:ChimmyTee,项目名称:oh-mainline,代码行数:36,代码来源:test_searching.py

示例4: test_finalweighting

# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
def test_finalweighting():
    from whoosh.scoring import Frequency

    schema = fields.Schema(id=fields.ID(stored=True),
                           summary=fields.TEXT,
                           n_comments=fields.STORED)
    st = RamStorage()
    ix = st.create_index(schema)

    w = ix.writer()
    w.add_document(id=u("1"), summary=u("alfa bravo"), n_comments=5)
    w.add_document(id=u("2"), summary=u("alfa"), n_comments=12)
    w.add_document(id=u("3"), summary=u("bravo"), n_comments=2)
    w.add_document(id=u("4"), summary=u("bravo bravo"), n_comments=7)
    w.commit()

    class CommentWeighting(Frequency):
        use_final = True

        def final(self, searcher, docnum, score):
            ncomments = searcher.stored_fields(docnum).get("n_comments", 0)
            return ncomments

    with ix.searcher(weighting=CommentWeighting()) as s:
        r = s.search(qparser.QueryParser("summary", None).parse("alfa OR bravo"))
        ids = [fs["id"] for fs in r]
        assert_equal(["2", "4", "1", "3"], ids)
开发者ID:ChimmyTee,项目名称:oh-mainline,代码行数:29,代码来源:test_searching.py

示例5: test_merged

# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
def test_merged():
    sc = fields.Schema(id=fields.ID(stored=True), content=fields.TEXT)
    st = RamStorage()
    ix = st.create_index(sc)
    w = ix.writer()
    w.add_document(id=u("alfa"), content=u("alfa"))
    w.add_document(id=u("bravo"), content=u("bravo"))
    w.add_document(id=u("charlie"), content=u("charlie"))
    w.add_document(id=u("delta"), content=u("delta"))
    w.commit()

    with ix.searcher() as s:
        r = s.search(Term("content", u("bravo")))
        assert_equal(len(r), 1)
        assert_equal(r[0]["id"], "bravo")

    w = ix.writer()
    w.add_document(id=u("echo"), content=u("echo"))
    w.commit()
    assert_equal(len(ix._segments()), 1)

    with ix.searcher() as s:
        r = s.search(Term("content", u("bravo")))
        assert_equal(len(r), 1)
        assert_equal(r[0]["id"], "bravo")
开发者ID:ChimmyTee,项目名称:oh-mainline,代码行数:27,代码来源:test_searching.py

示例6: test_missing_field_scoring

# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
def test_missing_field_scoring():
    schema = fields.Schema(name=fields.TEXT(stored=True),
                           hobbies=fields.TEXT(stored=True))
    storage = RamStorage()
    ix = storage.create_index(schema)
    writer = ix.writer()
    writer.add_document(name=u('Frank'), hobbies=u('baseball, basketball'))
    writer.commit()
    r = ix.reader()
    assert_equal(r.field_length("hobbies"), 2)
    assert_equal(r.field_length("name"), 1)
    r.close()

    writer = ix.writer()
    writer.add_document(name=u('Jonny'))
    writer.commit()

    with ix.searcher() as s:
        r = s.reader()
        assert_equal(len(ix._segments()), 1)
        assert_equal(r.field_length("hobbies"), 2)
        assert_equal(r.field_length("name"), 2)

        parser = qparser.MultifieldParser(['name', 'hobbies'], schema)
        q = parser.parse(u("baseball"))
        result = s.search(q)
        assert_equal(len(result), 1)
开发者ID:ChimmyTee,项目名称:oh-mainline,代码行数:29,代码来源:test_searching.py

示例7: test_term_inspection

# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
def test_term_inspection():
    schema = fields.Schema(title=fields.TEXT(stored=True),
                           content=fields.TEXT)
    st = RamStorage()
    ix = st.create_index(schema)
    writer = ix.writer()
    writer.add_document(title=u("My document"),
                        content=u("AA AA BB BB CC AA AA AA BB BB CC DD EE EE"))
    writer.add_document(title=u("My other document"),
                        content=u("AA AB BB CC EE EE AX AX DD"))
    writer.commit()

    reader = ix.reader()
    assert " ".join(reader.field_terms("content")) == "aa ab ax bb cc dd ee"
    assert list(reader.expand_prefix("content", "a")) == [b('aa'), b('ab'), b('ax')]
    assert set(reader.all_terms()) == set([('content', b('aa')), ('content', b('ab')),
                                           ('content', b('ax')), ('content', b('bb')),
                                           ('content', b('cc')), ('content', b('dd')),
                                           ('content', b('ee')), ('title', b('document')),
                                           ('title', b('my')), ('title', b('other'))])
    # (text, doc_freq, index_freq)
    assert _fstats(reader.iter_field("content")) == [(b('aa'), 2, 6), (b('ab'), 1, 1), (b('ax'), 1, 2),
                                                     (b('bb'), 2, 5), (b('cc'), 2, 3), (b('dd'), 2, 2),
                                                     (b('ee'), 2, 4)]
    assert _fstats(reader.iter_field("content", prefix="c")) == [(b('cc'), 2, 3), (b('dd'), 2, 2), (b('ee'), 2, 4)]
    assert list(reader.most_frequent_terms("content")) == [(6, b('aa')), (5, b('bb')), (4, b('ee')), (3, b('cc')), (2, b('dd'))]
    assert list(reader.most_frequent_terms("content", prefix="a")) == [(6, b('aa')), (2, b('ax')), (1, b('ab'))]
    assert list(reader.most_distinctive_terms("content", 3)) == [(1.3862943611198906, b('ax')), (0.6931471805599453, b('ab')), (0.0, b('ee'))]
开发者ID:JunjieHu,项目名称:dl,代码行数:30,代码来源:test_reading.py

示例8: test_not2

# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
def test_not2():
    schema = fields.Schema(name=fields.ID(stored=True), value=fields.TEXT)
    storage = RamStorage()
    ix = storage.create_index(schema)
    writer = ix.writer()
    writer.add_document(name=u("a"), value=u("alfa bravo charlie delta echo"))
    writer.add_document(name=u("b"), value=u("bravo charlie delta echo foxtrot"))
    writer.add_document(name=u("c"), value=u("charlie delta echo foxtrot golf"))
    writer.add_document(name=u("d"), value=u("delta echo golf hotel india"))
    writer.add_document(name=u("e"), value=u("echo golf hotel india juliet"))
    writer.commit()

    with ix.searcher() as s:
        p = qparser.QueryParser("value", None)
        results = s.search(p.parse("echo NOT golf"))
        assert_equal(sorted([d["name"] for d in results]), ["a", "b"])

        results = s.search(p.parse("echo NOT bravo"))
        assert_equal(sorted([d["name"] for d in results]), ["c", "d", "e"])

    ix.delete_by_term("value", u("bravo"))

    with ix.searcher() as s:
        results = s.search(p.parse("echo NOT charlie"))
        assert_equal(sorted([d["name"] for d in results]), ["d", "e"])
开发者ID:ChimmyTee,项目名称:oh-mainline,代码行数:27,代码来源:test_searching.py

示例9: test_pages

# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
 def test_pages(self):
     from whoosh.scoring import Frequency
     
     schema = fields.Schema(id=fields.ID(stored=True), c=fields.TEXT)
     st = RamStorage()
     ix = st.create_index(schema)
     
     w = ix.writer()
     w.add_document(id=u"1", c=u"alfa alfa alfa alfa alfa alfa")
     w.add_document(id=u"2", c=u"alfa alfa alfa alfa alfa")
     w.add_document(id=u"3", c=u"alfa alfa alfa alfa")
     w.add_document(id=u"4", c=u"alfa alfa alfa")
     w.add_document(id=u"5", c=u"alfa alfa")
     w.add_document(id=u"6", c=u"alfa")
     w.commit()
     
     s = ix.searcher(weighting=Frequency)
     q = query.Term("c", u"alfa")
     r = s.search(q)
     self.assertEqual([d["id"] for d in r], ["1", "2", "3", "4", "5", "6"])
     r = s.search_page(q, 2, pagelen=2)
     self.assertEqual([d["id"] for d in r], ["3", "4"])
     
     r = s.search_page(q, 10, pagelen=4)
     self.assertEqual(r.total, 6)
     self.assertEqual(r.pagenum, 2)
     self.assertEqual(r.pagelen, 2)
开发者ID:SpaceAppsXploration,项目名称:whoosh,代码行数:29,代码来源:test_searching.py

示例10: test_add_sortable

# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
def test_add_sortable():
    st = RamStorage()
    schema = fields.Schema(chapter=fields.ID(stored=True), price=fields.NUMERIC)
    ix = st.create_index(schema)
    with ix.writer() as w:
        w.add_document(chapter=u("alfa"), price=100)
        w.add_document(chapter=u("bravo"), price=200)
        w.add_document(chapter=u("charlie"), price=300)
        w.add_document(chapter=u("delta"), price=400)
    with ix.writer() as w:
        w.add_document(chapter=u("bravo"), price=500)
        w.add_document(chapter=u("alfa"), price=600)
        w.add_document(chapter=u("delta"), price=100)
        w.add_document(chapter=u("charlie"), price=200)
        w.merge = False

    with ix.reader() as r:
        assert not r.has_column("chapter")
        assert not r.has_column("price")

    with ix.writer() as w:
        sorting.add_sortable(w, "chapter", sorting.StoredFieldFacet("chapter"))
        sorting.add_sortable(w, "price", sorting.FieldFacet("price"))
        w.schema.test = 100

    with ix.reader() as r:
        assert r.has_column("chapter")
        assert r.has_column("price")

        chapr = r.column_reader("chapter")
        pricer = r.column_reader("price")
        assert chapr[0] == "alfa"
        assert pricer[0] == 100
开发者ID:JunjieHu,项目名称:dl,代码行数:35,代码来源:test_sorting.py

示例11: test_finalweighting

# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
 def test_finalweighting(self):
     from whoosh.scoring import Weighting
     
     schema = fields.Schema(id=fields.ID(stored=True),
                            summary=fields.TEXT,
                            n_comments=fields.ID(stored=True))
     st = RamStorage()
     ix = st.create_index(schema)
     
     w = ix.writer()
     w.add_document(id=u"1", summary=u"alfa bravo", n_comments=u"5")
     w.add_document(id=u"2", summary=u"alfa", n_comments=u"12")
     w.add_document(id=u"3", summary=u"bravo", n_comments=u"2")
     w.add_document(id=u"4", summary=u"bravo bravo", n_comments=u"7")
     w.commit()
     
     class CommentWeighting(Weighting):
         def score(self, *args, **kwargs):
             return 0
         
         def final(self, searcher, docnum, score):
             ncomments = int(searcher.stored_fields(docnum).get("n_comments"))
             return ncomments
     
     s = ix.searcher(weighting=CommentWeighting())
     r = s.search(qparser.QueryParser("summary").parse("alfa OR bravo"))
     ids = [fs["id"] for fs in r]
     self.assertEqual(ids, ["2", "4", "1", "3"])
开发者ID:SpaceAppsXploration,项目名称:whoosh,代码行数:30,代码来源:test_searching.py

示例12: test_merged

# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
 def test_merged(self):
     sc = fields.Schema(id=fields.ID(stored=True), content=fields.TEXT)
     st = RamStorage()
     ix = st.create_index(sc)
     w = ix.writer()
     w.add_document(id=u"alfa", content=u"alfa")
     w.add_document(id=u"bravo", content=u"bravo")
     w.add_document(id=u"charlie", content=u"charlie")
     w.add_document(id=u"delta", content=u"delta")
     w.commit()
     
     s = ix.searcher()
     r = s.search(query.Term("content", u"bravo"))
     self.assertEqual(len(r), 1)
     self.assertEqual(r[0]["id"], "bravo")
     
     w = ix.writer()
     w.add_document(id=u"echo", content=u"echo")
     w.commit()
     self.assertEqual(len(ix.segments), 1)
     
     s = ix.searcher()
     r = s.search(query.Term("content", u"bravo"))
     self.assertEqual(len(r), 1)
     self.assertEqual(r[0]["id"], "bravo")
开发者ID:SpaceAppsXploration,项目名称:whoosh,代码行数:27,代码来源:test_searching.py

示例13: test_intersection

# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
def test_intersection():
    schema = fields.Schema(key=fields.ID(stored=True),
                           value=fields.TEXT(stored=True))
    st = RamStorage()
    ix = st.create_index(schema)

    w = ix.writer()
    w.add_document(key=u("a"), value=u("alpha bravo charlie delta"))
    w.add_document(key=u("b"), value=u("echo foxtrot alpha bravo"))
    w.add_document(key=u("c"), value=u("charlie delta golf hotel"))
    w.commit()

    w = ix.writer()
    w.add_document(key=u("d"), value=u("india alpha bravo charlie"))
    w.add_document(key=u("e"), value=u("delta bravo india bravo"))
    w.commit()

    with ix.searcher() as s:
        q = And([Term("value", u("bravo")), Term("value", u("delta"))])
        m = q.matcher(s)
        assert _keys(s, m.all_ids()) == ["a", "e"]

        q = And([Term("value", u("bravo")), Term("value", u("alpha"))])
        m = q.matcher(s)
        assert _keys(s, m.all_ids()) == ["a", "b", "d"]
开发者ID:wdv4758h,项目名称:ZipPy,代码行数:27,代码来源:test_matching.py

示例14: test_midlevel_writing

# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
def test_midlevel_writing():
    st = RamStorage()
    schema = fields.Schema(t=fields.TEXT(phrase=False))
    ix = st.create_index(schema)
    w = ix.writer()
    w.add_document(t=u("alfa bravo charlie delta alfa bravo alfa"))
    w.commit()

    with ix.reader() as r:
        ti = r.termsindex["t", u("alfa")]
        assert_equal(ti.weight(), 3.0)
        assert_equal(ti.doc_frequency(), 1)
        assert_equal(ti.min_length(), 7)
        assert_equal(ti.max_length(), 7)
        assert_equal(ti.max_weight(), 3.0)
        assert_almost_equal(ti.max_wol(), 3.0 / 7)
        assert_equal(ti.postings, ((0,), (3.0,), (b('\x00\x00\x00\x03'),)))

    w = ix.writer()
    w.add_document(t=u("alfa charlie alfa"))
    w.commit()

    with ix.reader() as r:
        ti = r.termsindex["t", u("alfa")]
        assert_equal(ti.weight(), 5.0)
        assert_equal(ti.doc_frequency(), 2)
        assert_equal(ti.min_length(), 3)
        assert_equal(ti.max_length(), 7)
        assert_equal(ti.max_weight(), 3.0)
        assert_almost_equal(ti.max_wol(), 2.0 / 3)
        assert_equal(ti.postings, 0)
开发者ID:ChimmyTee,项目名称:oh-mainline,代码行数:33,代码来源:test_quality.py

示例15: test_intersection

# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
 def test_intersection(self):
     schema = fields.Schema(key = fields.ID(stored=True), value = fields.TEXT(stored=True))
     st = RamStorage()
     ix = st.create_index(schema)
     
     w = ix.writer()
     w.add_document(key=u"a", value=u"alpha bravo charlie delta")
     w.add_document(key=u"b", value=u"echo foxtrot alpha bravo")
     w.add_document(key=u"c", value=u"charlie delta golf hotel")
     w.commit()
     
     w = ix.writer()
     w.add_document(key=u"d", value=u"india alpha bravo charlie")
     w.add_document(key=u"e", value=u"delta bravo india bravo")
     w.commit()
     
     searcher = ix.searcher()
     
     q = And([Term("value", u"bravo"), Term("value", u"delta")])
     sc = q.scorer(searcher)
     self.assertEqual(self._keys(searcher, sc.all_ids()), ["a", "e"])
     
     q = And([Term("value", u"bravo"), Term("value", u"alpha")])
     sc = q.scorer(searcher)
     self.assertEqual(self._keys(searcher, sc.all_ids()), ["a", "b", "d"])
开发者ID:SpaceAppsXploration,项目名称:whoosh,代码行数:27,代码来源:test_scorers.py


注:本文中的whoosh.filedb.filestore.RamStorage.create_index方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。