本文整理汇总了Python中org.apache.lucene.search.IndexSearcher.getTopReaderContext方法的典型用法代码示例。如果您正苦于以下问题:Python IndexSearcher.getTopReaderContext方法的具体用法?Python IndexSearcher.getTopReaderContext怎么用?Python IndexSearcher.getTopReaderContext使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.search.IndexSearcher
的用法示例。
在下文中一共展示了IndexSearcher.getTopReaderContext方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Lucene
# 需要导入模块: from org.apache.lucene.search import IndexSearcher [as 别名]
# 或者: from org.apache.lucene.search.IndexSearcher import getTopReaderContext [as 别名]
#.........这里部分代码省略.........
return phq
def get_span_query(self, terms, field, slop, ordered=True):
"""
Creates near span query
:param terms: list of terms
:param field: field name
:param slop: number of terms between the query terms
:param ordered: If true, ordered search; otherwise unordered search
:return: lucene span near query
"""
span_queries = []
for term in terms:
span_queries.append(SpanTermQuery(Term(field, term)))
span_near_query = SpanNearQuery(span_queries, slop, ordered)
return span_near_query
def get_doc_phrase_freq(self, phrase, field, slop, ordered):
"""
Returns collection frequency for a given phrase and field.
:param phrase: str
:param field: field name
:param slop: number of terms in between
:param ordered: If true, term occurrences should be ordered
:return: dictionary {doc: freq, ...}
"""
# creates span near query
span_near_query = self.get_span_query(phrase.split(" "), field, slop=slop, ordered=ordered)
# extracts document frequency
self.open_searcher()
index_reader_context = self.searcher.getTopReaderContext()
term_contexts = HashMap()
terms = TreeSet()
span_near_query.extractTerms(terms)
for term in terms:
term_contexts.put(term, TermContext.build(index_reader_context, term))
leaves = index_reader_context.leaves()
doc_phrase_freq = {}
# iterates over all atomic readers
for atomic_reader_context in leaves:
bits = atomic_reader_context.reader().getLiveDocs()
spans = span_near_query.getSpans(atomic_reader_context, bits, term_contexts)
while spans.next():
lucene_doc_id = spans.doc()
doc_id = atomic_reader_context.reader().document(lucene_doc_id).get(self.FIELDNAME_ID)
if doc_id not in doc_phrase_freq:
doc_phrase_freq[doc_id] = 1
else:
doc_phrase_freq[doc_id] += 1
return doc_phrase_freq
def get_id_filter(self):
return FieldValueFilter(self.FIELDNAME_ID)
def __to_retrieval_results(self, scoredocs, field_id=FIELDNAME_ID):
"""Converts Lucene scoreDocs results to RetrievalResults format."""
rr = RetrievalResults()
if scoredocs is not None:
for i in xrange(len(scoredocs)):
score = scoredocs[i].score
lucene_doc_id = scoredocs[i].doc # internal doc_id
doc_id = self.reader.document(lucene_doc_id).get(field_id)
rr.append(doc_id, score, lucene_doc_id)