本文整理汇总了Python中whoosh.filedb.filestore.RamStorage.create_index方法的典型用法代码示例。如果您正苦于以下问题:Python RamStorage.create_index方法的具体用法?Python RamStorage.create_index怎么用?Python RamStorage.create_index使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类whoosh.filedb.filestore.RamStorage
的用法示例。
在下文中一共展示了RamStorage.create_index方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ToolBoxSearch
# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
class ToolBoxSearch( object ):
"""
Support searching tools in a toolbox. This implementation uses
the Whoosh search library.
"""
def __init__( self, toolbox, index_help=True ):
"""
Create a searcher for `toolbox`.
"""
self.toolbox = toolbox
self.build_index( index_help )
def build_index( self, index_help=True ):
log.debug( 'Starting to build toolbox index.' )
self.storage = RamStorage()
self.index = self.storage.create_index( schema )
writer = self.index.writer()
for id, tool in self.toolbox.tools():
# Do not add data managers to the public index
if tool.tool_type == 'manage_data':
continue
add_doc_kwds = {
"id": id,
"name": to_unicode( tool.name ),
"description": to_unicode( tool.description ),
"section": to_unicode( tool.get_panel_section()[1] if len( tool.get_panel_section() ) == 2 else '' ),
"help": to_unicode( "" )
}
if tool.labels:
add_doc_kwds['labels'] = to_unicode( " ".join( tool.labels ) )
if index_help and tool.help:
try:
add_doc_kwds['help'] = to_unicode( tool.help.render( host_url="", static_path="" ) )
except Exception:
# Don't fail to build index just because a help message
# won't render.
pass
writer.add_document( **add_doc_kwds )
writer.commit()
log.debug( 'Toolbox index finished.' )
def search( self, q, tool_name_boost, tool_section_boost, tool_description_boost, tool_help_boost, tool_search_limit ):
"""
Perform search on the in-memory index. Weight in the given boosts.
"""
# Change field boosts for searcher
searcher = self.index.searcher(
weighting=BM25F(
field_B={ 'name_B': float( tool_name_boost ),
'section_B': float( tool_section_boost ),
'description_B': float( tool_description_boost ),
'help_B': float( tool_help_boost ) }
)
)
# Set query to search name, description, section, help, and labels.
parser = MultifieldParser( [ 'name', 'description', 'section', 'help', 'labels' ], schema=schema )
# Perform the search
hits = searcher.search( parser.parse( '*' + q + '*' ), limit=float( tool_search_limit ) )
return [ hit[ 'id' ] for hit in hits ]
示例2: ToolBoxSearch
# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
class ToolBoxSearch( object ):
"""
Support searching tools in a toolbox. This implementation uses
the "whoosh" search library.
"""
def __init__( self, toolbox ):
"""
Create a searcher for `toolbox`.
"""
self.toolbox = toolbox
self.enabled = tool_search_enabled
if tool_search_enabled:
self.build_index()
def build_index( self ):
self.storage = RamStorage()
self.index = self.storage.create_index( schema )
writer = self.index.writer()
## TODO: would also be nice to search section headers.
for id, tool in self.toolbox.tools_by_id.iteritems():
writer.add_document( id=id, title=to_unicode(tool.name), description=to_unicode(tool.description), help=to_unicode(tool.help) )
writer.commit()
def search( self, query, return_attribute='id' ):
if not tool_search_enabled:
return []
# Change field boosts for searcher to place more weight on title, description than help.
searcher = self.index.searcher( \
weighting=BM25F( field_B={ 'title_B' : 3, 'description_B' : 2, 'help_B' : 1 } \
) )
# Set query to search title, description, and help.
parser = MultifieldParser( [ 'title', 'description', 'help' ], schema = schema )
results = searcher.search( parser.parse( query ), minscore=2.0 )
return [ result[ return_attribute ] for result in results ]
示例3: test_weighting
# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
def test_weighting():
from whoosh.scoring import Weighting, BaseScorer
schema = fields.Schema(id=fields.ID(stored=True),
n_comments=fields.STORED)
st = RamStorage()
ix = st.create_index(schema)
w = ix.writer()
w.add_document(id=u("1"), n_comments=5)
w.add_document(id=u("2"), n_comments=12)
w.add_document(id=u("3"), n_comments=2)
w.add_document(id=u("4"), n_comments=7)
w.commit()
# Fake Weighting implementation
class CommentWeighting(Weighting):
def scorer(self, searcher, fieldname, text, qf=1):
return self.CommentScorer(searcher.stored_fields)
class CommentScorer(BaseScorer):
def __init__(self, stored_fields):
self.stored_fields = stored_fields
def score(self, matcher):
ncomments = self.stored_fields(matcher.id()).get("n_comments", 0)
return ncomments
with ix.searcher(weighting=CommentWeighting()) as s:
q = TermRange("id", u("1"), u("4"), constantscore=False)
r = s.search(q)
ids = [fs["id"] for fs in r]
assert_equal(ids, ["2", "4", "1", "3"])
示例4: test_finalweighting
# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
def test_finalweighting():
from whoosh.scoring import Frequency
schema = fields.Schema(id=fields.ID(stored=True),
summary=fields.TEXT,
n_comments=fields.STORED)
st = RamStorage()
ix = st.create_index(schema)
w = ix.writer()
w.add_document(id=u("1"), summary=u("alfa bravo"), n_comments=5)
w.add_document(id=u("2"), summary=u("alfa"), n_comments=12)
w.add_document(id=u("3"), summary=u("bravo"), n_comments=2)
w.add_document(id=u("4"), summary=u("bravo bravo"), n_comments=7)
w.commit()
class CommentWeighting(Frequency):
use_final = True
def final(self, searcher, docnum, score):
ncomments = searcher.stored_fields(docnum).get("n_comments", 0)
return ncomments
with ix.searcher(weighting=CommentWeighting()) as s:
r = s.search(qparser.QueryParser("summary", None).parse("alfa OR bravo"))
ids = [fs["id"] for fs in r]
assert_equal(["2", "4", "1", "3"], ids)
示例5: test_merged
# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
def test_merged():
sc = fields.Schema(id=fields.ID(stored=True), content=fields.TEXT)
st = RamStorage()
ix = st.create_index(sc)
w = ix.writer()
w.add_document(id=u("alfa"), content=u("alfa"))
w.add_document(id=u("bravo"), content=u("bravo"))
w.add_document(id=u("charlie"), content=u("charlie"))
w.add_document(id=u("delta"), content=u("delta"))
w.commit()
with ix.searcher() as s:
r = s.search(Term("content", u("bravo")))
assert_equal(len(r), 1)
assert_equal(r[0]["id"], "bravo")
w = ix.writer()
w.add_document(id=u("echo"), content=u("echo"))
w.commit()
assert_equal(len(ix._segments()), 1)
with ix.searcher() as s:
r = s.search(Term("content", u("bravo")))
assert_equal(len(r), 1)
assert_equal(r[0]["id"], "bravo")
示例6: test_missing_field_scoring
# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
def test_missing_field_scoring():
schema = fields.Schema(name=fields.TEXT(stored=True),
hobbies=fields.TEXT(stored=True))
storage = RamStorage()
ix = storage.create_index(schema)
writer = ix.writer()
writer.add_document(name=u('Frank'), hobbies=u('baseball, basketball'))
writer.commit()
r = ix.reader()
assert_equal(r.field_length("hobbies"), 2)
assert_equal(r.field_length("name"), 1)
r.close()
writer = ix.writer()
writer.add_document(name=u('Jonny'))
writer.commit()
with ix.searcher() as s:
r = s.reader()
assert_equal(len(ix._segments()), 1)
assert_equal(r.field_length("hobbies"), 2)
assert_equal(r.field_length("name"), 2)
parser = qparser.MultifieldParser(['name', 'hobbies'], schema)
q = parser.parse(u("baseball"))
result = s.search(q)
assert_equal(len(result), 1)
示例7: test_term_inspection
# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
def test_term_inspection():
schema = fields.Schema(title=fields.TEXT(stored=True),
content=fields.TEXT)
st = RamStorage()
ix = st.create_index(schema)
writer = ix.writer()
writer.add_document(title=u("My document"),
content=u("AA AA BB BB CC AA AA AA BB BB CC DD EE EE"))
writer.add_document(title=u("My other document"),
content=u("AA AB BB CC EE EE AX AX DD"))
writer.commit()
reader = ix.reader()
assert " ".join(reader.field_terms("content")) == "aa ab ax bb cc dd ee"
assert list(reader.expand_prefix("content", "a")) == [b('aa'), b('ab'), b('ax')]
assert set(reader.all_terms()) == set([('content', b('aa')), ('content', b('ab')),
('content', b('ax')), ('content', b('bb')),
('content', b('cc')), ('content', b('dd')),
('content', b('ee')), ('title', b('document')),
('title', b('my')), ('title', b('other'))])
# (text, doc_freq, index_freq)
assert _fstats(reader.iter_field("content")) == [(b('aa'), 2, 6), (b('ab'), 1, 1), (b('ax'), 1, 2),
(b('bb'), 2, 5), (b('cc'), 2, 3), (b('dd'), 2, 2),
(b('ee'), 2, 4)]
assert _fstats(reader.iter_field("content", prefix="c")) == [(b('cc'), 2, 3), (b('dd'), 2, 2), (b('ee'), 2, 4)]
assert list(reader.most_frequent_terms("content")) == [(6, b('aa')), (5, b('bb')), (4, b('ee')), (3, b('cc')), (2, b('dd'))]
assert list(reader.most_frequent_terms("content", prefix="a")) == [(6, b('aa')), (2, b('ax')), (1, b('ab'))]
assert list(reader.most_distinctive_terms("content", 3)) == [(1.3862943611198906, b('ax')), (0.6931471805599453, b('ab')), (0.0, b('ee'))]
示例8: test_not2
# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
def test_not2():
schema = fields.Schema(name=fields.ID(stored=True), value=fields.TEXT)
storage = RamStorage()
ix = storage.create_index(schema)
writer = ix.writer()
writer.add_document(name=u("a"), value=u("alfa bravo charlie delta echo"))
writer.add_document(name=u("b"), value=u("bravo charlie delta echo foxtrot"))
writer.add_document(name=u("c"), value=u("charlie delta echo foxtrot golf"))
writer.add_document(name=u("d"), value=u("delta echo golf hotel india"))
writer.add_document(name=u("e"), value=u("echo golf hotel india juliet"))
writer.commit()
with ix.searcher() as s:
p = qparser.QueryParser("value", None)
results = s.search(p.parse("echo NOT golf"))
assert_equal(sorted([d["name"] for d in results]), ["a", "b"])
results = s.search(p.parse("echo NOT bravo"))
assert_equal(sorted([d["name"] for d in results]), ["c", "d", "e"])
ix.delete_by_term("value", u("bravo"))
with ix.searcher() as s:
results = s.search(p.parse("echo NOT charlie"))
assert_equal(sorted([d["name"] for d in results]), ["d", "e"])
示例9: test_pages
# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
def test_pages(self):
from whoosh.scoring import Frequency
schema = fields.Schema(id=fields.ID(stored=True), c=fields.TEXT)
st = RamStorage()
ix = st.create_index(schema)
w = ix.writer()
w.add_document(id=u"1", c=u"alfa alfa alfa alfa alfa alfa")
w.add_document(id=u"2", c=u"alfa alfa alfa alfa alfa")
w.add_document(id=u"3", c=u"alfa alfa alfa alfa")
w.add_document(id=u"4", c=u"alfa alfa alfa")
w.add_document(id=u"5", c=u"alfa alfa")
w.add_document(id=u"6", c=u"alfa")
w.commit()
s = ix.searcher(weighting=Frequency)
q = query.Term("c", u"alfa")
r = s.search(q)
self.assertEqual([d["id"] for d in r], ["1", "2", "3", "4", "5", "6"])
r = s.search_page(q, 2, pagelen=2)
self.assertEqual([d["id"] for d in r], ["3", "4"])
r = s.search_page(q, 10, pagelen=4)
self.assertEqual(r.total, 6)
self.assertEqual(r.pagenum, 2)
self.assertEqual(r.pagelen, 2)
示例10: test_add_sortable
# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
def test_add_sortable():
st = RamStorage()
schema = fields.Schema(chapter=fields.ID(stored=True), price=fields.NUMERIC)
ix = st.create_index(schema)
with ix.writer() as w:
w.add_document(chapter=u("alfa"), price=100)
w.add_document(chapter=u("bravo"), price=200)
w.add_document(chapter=u("charlie"), price=300)
w.add_document(chapter=u("delta"), price=400)
with ix.writer() as w:
w.add_document(chapter=u("bravo"), price=500)
w.add_document(chapter=u("alfa"), price=600)
w.add_document(chapter=u("delta"), price=100)
w.add_document(chapter=u("charlie"), price=200)
w.merge = False
with ix.reader() as r:
assert not r.has_column("chapter")
assert not r.has_column("price")
with ix.writer() as w:
sorting.add_sortable(w, "chapter", sorting.StoredFieldFacet("chapter"))
sorting.add_sortable(w, "price", sorting.FieldFacet("price"))
w.schema.test = 100
with ix.reader() as r:
assert r.has_column("chapter")
assert r.has_column("price")
chapr = r.column_reader("chapter")
pricer = r.column_reader("price")
assert chapr[0] == "alfa"
assert pricer[0] == 100
示例11: test_finalweighting
# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
def test_finalweighting(self):
from whoosh.scoring import Weighting
schema = fields.Schema(id=fields.ID(stored=True),
summary=fields.TEXT,
n_comments=fields.ID(stored=True))
st = RamStorage()
ix = st.create_index(schema)
w = ix.writer()
w.add_document(id=u"1", summary=u"alfa bravo", n_comments=u"5")
w.add_document(id=u"2", summary=u"alfa", n_comments=u"12")
w.add_document(id=u"3", summary=u"bravo", n_comments=u"2")
w.add_document(id=u"4", summary=u"bravo bravo", n_comments=u"7")
w.commit()
class CommentWeighting(Weighting):
def score(self, *args, **kwargs):
return 0
def final(self, searcher, docnum, score):
ncomments = int(searcher.stored_fields(docnum).get("n_comments"))
return ncomments
s = ix.searcher(weighting=CommentWeighting())
r = s.search(qparser.QueryParser("summary").parse("alfa OR bravo"))
ids = [fs["id"] for fs in r]
self.assertEqual(ids, ["2", "4", "1", "3"])
示例12: test_merged
# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
def test_merged(self):
sc = fields.Schema(id=fields.ID(stored=True), content=fields.TEXT)
st = RamStorage()
ix = st.create_index(sc)
w = ix.writer()
w.add_document(id=u"alfa", content=u"alfa")
w.add_document(id=u"bravo", content=u"bravo")
w.add_document(id=u"charlie", content=u"charlie")
w.add_document(id=u"delta", content=u"delta")
w.commit()
s = ix.searcher()
r = s.search(query.Term("content", u"bravo"))
self.assertEqual(len(r), 1)
self.assertEqual(r[0]["id"], "bravo")
w = ix.writer()
w.add_document(id=u"echo", content=u"echo")
w.commit()
self.assertEqual(len(ix.segments), 1)
s = ix.searcher()
r = s.search(query.Term("content", u"bravo"))
self.assertEqual(len(r), 1)
self.assertEqual(r[0]["id"], "bravo")
示例13: test_intersection
# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
def test_intersection():
schema = fields.Schema(key=fields.ID(stored=True),
value=fields.TEXT(stored=True))
st = RamStorage()
ix = st.create_index(schema)
w = ix.writer()
w.add_document(key=u("a"), value=u("alpha bravo charlie delta"))
w.add_document(key=u("b"), value=u("echo foxtrot alpha bravo"))
w.add_document(key=u("c"), value=u("charlie delta golf hotel"))
w.commit()
w = ix.writer()
w.add_document(key=u("d"), value=u("india alpha bravo charlie"))
w.add_document(key=u("e"), value=u("delta bravo india bravo"))
w.commit()
with ix.searcher() as s:
q = And([Term("value", u("bravo")), Term("value", u("delta"))])
m = q.matcher(s)
assert _keys(s, m.all_ids()) == ["a", "e"]
q = And([Term("value", u("bravo")), Term("value", u("alpha"))])
m = q.matcher(s)
assert _keys(s, m.all_ids()) == ["a", "b", "d"]
示例14: test_midlevel_writing
# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
def test_midlevel_writing():
st = RamStorage()
schema = fields.Schema(t=fields.TEXT(phrase=False))
ix = st.create_index(schema)
w = ix.writer()
w.add_document(t=u("alfa bravo charlie delta alfa bravo alfa"))
w.commit()
with ix.reader() as r:
ti = r.termsindex["t", u("alfa")]
assert_equal(ti.weight(), 3.0)
assert_equal(ti.doc_frequency(), 1)
assert_equal(ti.min_length(), 7)
assert_equal(ti.max_length(), 7)
assert_equal(ti.max_weight(), 3.0)
assert_almost_equal(ti.max_wol(), 3.0 / 7)
assert_equal(ti.postings, ((0,), (3.0,), (b('\x00\x00\x00\x03'),)))
w = ix.writer()
w.add_document(t=u("alfa charlie alfa"))
w.commit()
with ix.reader() as r:
ti = r.termsindex["t", u("alfa")]
assert_equal(ti.weight(), 5.0)
assert_equal(ti.doc_frequency(), 2)
assert_equal(ti.min_length(), 3)
assert_equal(ti.max_length(), 7)
assert_equal(ti.max_weight(), 3.0)
assert_almost_equal(ti.max_wol(), 2.0 / 3)
assert_equal(ti.postings, 0)
示例15: test_intersection
# 需要导入模块: from whoosh.filedb.filestore import RamStorage [as 别名]
# 或者: from whoosh.filedb.filestore.RamStorage import create_index [as 别名]
def test_intersection(self):
schema = fields.Schema(key = fields.ID(stored=True), value = fields.TEXT(stored=True))
st = RamStorage()
ix = st.create_index(schema)
w = ix.writer()
w.add_document(key=u"a", value=u"alpha bravo charlie delta")
w.add_document(key=u"b", value=u"echo foxtrot alpha bravo")
w.add_document(key=u"c", value=u"charlie delta golf hotel")
w.commit()
w = ix.writer()
w.add_document(key=u"d", value=u"india alpha bravo charlie")
w.add_document(key=u"e", value=u"delta bravo india bravo")
w.commit()
searcher = ix.searcher()
q = And([Term("value", u"bravo"), Term("value", u"delta")])
sc = q.scorer(searcher)
self.assertEqual(self._keys(searcher, sc.all_ids()), ["a", "e"])
q = And([Term("value", u"bravo"), Term("value", u"alpha")])
sc = q.scorer(searcher)
self.assertEqual(self._keys(searcher, sc.all_ids()), ["a", "b", "d"])