当前位置: 首页>>代码示例>>Python>>正文


Python IndexSearcher.explain方法代码示例

本文整理汇总了Python中org.apache.lucene.search.IndexSearcher.explain方法的典型用法代码示例。如果您正苦于以下问题:Python IndexSearcher.explain方法的具体用法?Python IndexSearcher.explain怎么用?Python IndexSearcher.explain使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.search.IndexSearcher的用法示例。


在下文中一共展示了IndexSearcher.explain方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: LuceneRetrieval

# 需要导入模块: from org.apache.lucene.search import IndexSearcher [as 别名]
# 或者: from org.apache.lucene.search.IndexSearcher import explain [as 别名]
class LuceneRetrieval(BaseRetrieval):
    """
        Encapsulates the Lucene retrieval engine
    """
    def __init__(self, index_path, method, logger=None, use_default_similarity=False):
        self.index_path=index_path
        directory = SimpleFSDirectory(File(self.index_path))
        self.analyzer = StandardAnalyzer(LuceneVersion.LUCENE_CURRENT)
        self.reader=DirectoryReader.open(directory)
        self.searcher = IndexSearcher(self.reader)

        # uncomment one of these lines to change the type of parser, query and weight used
        if use_default_similarity:
            self.query_parser=QueryParser
        else:
            self.query_parser=FieldAgnosticQueryParser

        if use_default_similarity:
            similarity=DefaultSimilarity()
            self.useExplainQuery=False
        else:
            similarity=FieldAgnosticSimilarity()
            self.useExplainQuery=True
        # by default, FieldAgnosticSimilarity uses coord factor, can be disabled
##        similarity.useCoord=False

        self.searcher.setSimilarity(similarity)
        self.method=method # never used?
        self.logger=logger

    def runQueryViaExplain(self,query, max_results):
        """
            Really crappy solution to make sure that explanations and searches are the same
            while I fix Lucene
        """
        results=[]

        index=0
        for index in range(self.reader.numDocs()):
            explanation=self.searcher.explain(query,index)
            score=explanation.getValue()
##            match=re.search(r"(.*?)\s=",explanation.toString(),re.IGNORECASE|re.DOTALL)
##            if match:
##                score=float(match.group(1))
            hit=namedtuple("Hit",["doc","score"])
            hit.doc=index
            hit.score=score
##            heapq.heappush(results,hit)
            results.append(hit)

        results.sort(key=lambda x:x.score,reverse=True)

        if max_results < self.reader.numDocs():
            results=results[:max_results]

        return results

    def runQuery(self, structured_query, max_results=MAX_RESULTS_RECALL):
        """
            LOTS OF SWEET LUCENE
        """
        original_query=structured_query

        if not structured_query or len(structured_query) == 0 :
            return []

        self.last_query=structured_query
        query_text=self.rewriteQuery(structured_query["structured_query"], ["text"])

        try:
            query = self.query_parser(lucene.Version.LUCENE_CURRENT, "text", self.analyzer).parse(query_text)
        except:
            print("Lucene exception:",sys.exc_info()[:2])
            return None

        structured_query["lucene_query"]=query_text

        if self.useExplainQuery:
            # this should only exist until I fix the lucene bulkScorer to give the same results
            hits=self.runQueryViaExplain(query,max_results)
        else:
            collector=TopScoreDocCollector.create(max_results, True)
            self.searcher.search(query, collector)
            hits = collector.topDocs().scoreDocs

##        print("Found %d document(s) that matched query '%s':" % (hits.totalHits, query))
        res=[]


##        if len(hits.scoreDocs) ==0:
##            print "Original query:",original_query
##            print "Query:", query

        for hit in hits:
            doc = self.searcher.doc(hit.doc)
            metadata= json.loads(doc.get("metadata"))
            res.append((hit.score,metadata))
        return res


#.........这里部分代码省略.........
开发者ID:danieldmm,项目名称:minerva,代码行数:103,代码来源:lucene_retrieval.py

示例2: LuceneAnnotator

# 需要导入模块: from org.apache.lucene.search import IndexSearcher [as 别名]
# 或者: from org.apache.lucene.search.IndexSearcher import explain [as 别名]

#.........这里部分代码省略.........
            logging.debug("For " + str(query) + " : " + str(result.totalHits))
            freq = result.totalHits
            if freq > 0:
                result = self._searcher.search(query, freq)
            hits = pyJava.JArray2List(result.scoreDocs)
            logging.debug("For " + str(query) + " : " + str(result.totalHits))
            if freq <= 0:
                # search lowercased exact
                lowerCasedParser = QueryParser(Version.LUCENE_CURRENT, FreyaConstants.FIELD_EXACT_LOWERCASED_CONTENT, analyser)
                query = lowerCasedParser.parse(preparePocStringLowercase)
                # logging.info("Searching for: " + query.toString());
                result = self._searcher.search(query, 1)
                freq = result.totalHits
                if freq > 0:
                    result = self._searcher.search(query, freq)
                hits = pyJava.JArray2List(result.scoreDocs)
                logging.debug("For " + str(query) + " : " + str(result.totalHits))
            if len(hits) == 0 and preparePocStringLowercase.index(" ") < 0:
                # search stemmed
                stemParser = QueryParser(Version.LUCENE_CURRENT, FreyaConstants.FIELD_STEMMED_CONTENT, stemAnalyser)
                query = stemParser.parse(preparePocStringLowercase)
                # logging.info("Searching for: " + query.toString());
                result = self._searcher.search(query, 1)
                freq = result.totalHits
                if freq > 0:
                    result = self._searcher.search(query, freq)
                hits = pyJava.JArray2List(result.scoreDocs)
                logging.info("For " + str(query) + " : " + str(result.totalHits))
            # for (ScoreDoc hit : hits) {
            indexus = 0
            while indexus < len(hits):
                hit = hits[indexus]
                doc = self._searcher.doc(hit.doc)
                self._searcher.explain(query, hit.doc)
                ann = Annotation()
                features = dict()
                features[FreyaConstants.CLASS_FEATURE_LKB]=doc.get(FreyaConstants.CLASS_FEATURE_LKB)
                features[FreyaConstants.INST_FEATURE_LKB]=doc.get(FreyaConstants.INST_FEATURE_LKB)
                features[FreyaConstants.PROPERTY_FEATURE_LKB]=doc.get(FreyaConstants.PROPERTY_FEATURE_LKB)
                features["string"]=doc.get(FreyaConstants.FIELD_EXACT_CONTENT)
                features[FreyaConstants.SCORE]=hit.score
                ann.setFeatures(features)
                ann.setEndOffset(annotation.getEndOffset())
                ann.setStartOffset(annotation.getStartOffset())
                ann.setSyntaxTree(annotation.getSyntaxTree())
                ann.setText(annotation.getText())
                annotations.append(ann)
                indexus += 1
        except Exception as e:#CorruptIndexException(e):
            print e.message
            logging.error("Error")
        return annotations

    # *
    # * this method now search both stem and lowercase
    # *
    # * @param annotation
    # * @return
    #
    def searchStemFirst(self, annotation):
        annotations = list()
        pocString = QueryParser.escape(annotation.getText())
        preparePocStringOriginal = "\"" + pocString + "\""
        preparePocStringLowercase = "\"" + pocString.lower() + "\""
        try:
            maxSynonyms = 0
开发者ID:gitter-badger,项目名称:pyFreya,代码行数:70,代码来源:LuceneAnnotator.py


注:本文中的org.apache.lucene.search.IndexSearcher.explain方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。