本文整理汇总了Python中org.apache.lucene.search.IndexSearcher.explain方法的典型用法代码示例。如果您正苦于以下问题:Python IndexSearcher.explain方法的具体用法?Python IndexSearcher.explain怎么用?Python IndexSearcher.explain使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.search.IndexSearcher
的用法示例。
在下文中一共展示了IndexSearcher.explain方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: LuceneRetrieval
# 需要导入模块: from org.apache.lucene.search import IndexSearcher [as 别名]
# 或者: from org.apache.lucene.search.IndexSearcher import explain [as 别名]
class LuceneRetrieval(BaseRetrieval):
"""
Encapsulates the Lucene retrieval engine
"""
def __init__(self, index_path, method, logger=None, use_default_similarity=False):
self.index_path=index_path
directory = SimpleFSDirectory(File(self.index_path))
self.analyzer = StandardAnalyzer(LuceneVersion.LUCENE_CURRENT)
self.reader=DirectoryReader.open(directory)
self.searcher = IndexSearcher(self.reader)
# uncomment one of these lines to change the type of parser, query and weight used
if use_default_similarity:
self.query_parser=QueryParser
else:
self.query_parser=FieldAgnosticQueryParser
if use_default_similarity:
similarity=DefaultSimilarity()
self.useExplainQuery=False
else:
similarity=FieldAgnosticSimilarity()
self.useExplainQuery=True
# by default, FieldAgnosticSimilarity uses coord factor, can be disabled
## similarity.useCoord=False
self.searcher.setSimilarity(similarity)
self.method=method # never used?
self.logger=logger
def runQueryViaExplain(self,query, max_results):
"""
Really crappy solution to make sure that explanations and searches are the same
while I fix Lucene
"""
results=[]
index=0
for index in range(self.reader.numDocs()):
explanation=self.searcher.explain(query,index)
score=explanation.getValue()
## match=re.search(r"(.*?)\s=",explanation.toString(),re.IGNORECASE|re.DOTALL)
## if match:
## score=float(match.group(1))
hit=namedtuple("Hit",["doc","score"])
hit.doc=index
hit.score=score
## heapq.heappush(results,hit)
results.append(hit)
results.sort(key=lambda x:x.score,reverse=True)
if max_results < self.reader.numDocs():
results=results[:max_results]
return results
def runQuery(self, structured_query, max_results=MAX_RESULTS_RECALL):
"""
LOTS OF SWEET LUCENE
"""
original_query=structured_query
if not structured_query or len(structured_query) == 0 :
return []
self.last_query=structured_query
query_text=self.rewriteQuery(structured_query["structured_query"], ["text"])
try:
query = self.query_parser(lucene.Version.LUCENE_CURRENT, "text", self.analyzer).parse(query_text)
except:
print("Lucene exception:",sys.exc_info()[:2])
return None
structured_query["lucene_query"]=query_text
if self.useExplainQuery:
# this should only exist until I fix the lucene bulkScorer to give the same results
hits=self.runQueryViaExplain(query,max_results)
else:
collector=TopScoreDocCollector.create(max_results, True)
self.searcher.search(query, collector)
hits = collector.topDocs().scoreDocs
## print("Found %d document(s) that matched query '%s':" % (hits.totalHits, query))
res=[]
## if len(hits.scoreDocs) ==0:
## print "Original query:",original_query
## print "Query:", query
for hit in hits:
doc = self.searcher.doc(hit.doc)
metadata= json.loads(doc.get("metadata"))
res.append((hit.score,metadata))
return res
#.........这里部分代码省略.........
示例2: LuceneAnnotator
# 需要导入模块: from org.apache.lucene.search import IndexSearcher [as 别名]
# 或者: from org.apache.lucene.search.IndexSearcher import explain [as 别名]
#.........这里部分代码省略.........
logging.debug("For " + str(query) + " : " + str(result.totalHits))
freq = result.totalHits
if freq > 0:
result = self._searcher.search(query, freq)
hits = pyJava.JArray2List(result.scoreDocs)
logging.debug("For " + str(query) + " : " + str(result.totalHits))
if freq <= 0:
# search lowercased exact
lowerCasedParser = QueryParser(Version.LUCENE_CURRENT, FreyaConstants.FIELD_EXACT_LOWERCASED_CONTENT, analyser)
query = lowerCasedParser.parse(preparePocStringLowercase)
# logging.info("Searching for: " + query.toString());
result = self._searcher.search(query, 1)
freq = result.totalHits
if freq > 0:
result = self._searcher.search(query, freq)
hits = pyJava.JArray2List(result.scoreDocs)
logging.debug("For " + str(query) + " : " + str(result.totalHits))
if len(hits) == 0 and preparePocStringLowercase.index(" ") < 0:
# search stemmed
stemParser = QueryParser(Version.LUCENE_CURRENT, FreyaConstants.FIELD_STEMMED_CONTENT, stemAnalyser)
query = stemParser.parse(preparePocStringLowercase)
# logging.info("Searching for: " + query.toString());
result = self._searcher.search(query, 1)
freq = result.totalHits
if freq > 0:
result = self._searcher.search(query, freq)
hits = pyJava.JArray2List(result.scoreDocs)
logging.info("For " + str(query) + " : " + str(result.totalHits))
# for (ScoreDoc hit : hits) {
indexus = 0
while indexus < len(hits):
hit = hits[indexus]
doc = self._searcher.doc(hit.doc)
self._searcher.explain(query, hit.doc)
ann = Annotation()
features = dict()
features[FreyaConstants.CLASS_FEATURE_LKB]=doc.get(FreyaConstants.CLASS_FEATURE_LKB)
features[FreyaConstants.INST_FEATURE_LKB]=doc.get(FreyaConstants.INST_FEATURE_LKB)
features[FreyaConstants.PROPERTY_FEATURE_LKB]=doc.get(FreyaConstants.PROPERTY_FEATURE_LKB)
features["string"]=doc.get(FreyaConstants.FIELD_EXACT_CONTENT)
features[FreyaConstants.SCORE]=hit.score
ann.setFeatures(features)
ann.setEndOffset(annotation.getEndOffset())
ann.setStartOffset(annotation.getStartOffset())
ann.setSyntaxTree(annotation.getSyntaxTree())
ann.setText(annotation.getText())
annotations.append(ann)
indexus += 1
except Exception as e:#CorruptIndexException(e):
print e.message
logging.error("Error")
return annotations
# *
# * this method now search both stem and lowercase
# *
# * @param annotation
# * @return
#
def searchStemFirst(self, annotation):
annotations = list()
pocString = QueryParser.escape(annotation.getText())
preparePocStringOriginal = "\"" + pocString + "\""
preparePocStringLowercase = "\"" + pocString.lower() + "\""
try:
maxSynonyms = 0