当前位置: 首页>>代码示例>>Python>>正文


Python IndexSearcher.doc方法代码示例

本文整理汇总了Python中org.apache.lucene.search.IndexSearcher.doc方法的典型用法代码示例。如果您正苦于以下问题:Python IndexSearcher.doc方法的具体用法?Python IndexSearcher.doc怎么用?Python IndexSearcher.doc使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.search.IndexSearcher的用法示例。


在下文中一共展示了IndexSearcher.doc方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_candidates

# 需要导入模块: from org.apache.lucene.search import IndexSearcher [as 别名]
# 或者: from org.apache.lucene.search.IndexSearcher import doc [as 别名]
def get_candidates(qatp):

    if prm.create_index:
        create_index()

    lucene.initVM()
    analyzer = StandardAnalyzer(Version.LUCENE_4_10_1)
    reader = IndexReader.open(SimpleFSDirectory(File(prm.index_folder)))
    searcher = IndexSearcher(reader)
    candidates = []
    n = 0
    for q,a,t,p in qatp:
        if n % 100 == 0:
            print 'finding candidates sample', n
        n+=1

        q = q.replace('AND','\\AND').replace('OR','\\OR').replace('NOT','\\NOT')
        query = QueryParser(Version.LUCENE_4_10_1, "text", analyzer).parse(QueryParser.escape(q))
        hits = searcher.search(query, prm.max_candidates)
        c = []
        for hit in hits.scoreDocs:
            doc = searcher.doc(hit.doc)
            c.append(doc.get("id"))

        candidates.append(c)
        
    return candidates
开发者ID:domarps,项目名称:WebNav,代码行数:29,代码来源:lucene_search.py

示例2: search

# 需要导入模块: from org.apache.lucene.search import IndexSearcher [as 别名]
# 或者: from org.apache.lucene.search.IndexSearcher import doc [as 别名]
	def search(self, input_query=None, max_answers=10):
		''' Searches the given query in the index '''
		if input_query is None:
			return None

		base_dir = '.'
		directory = SimpleFSDirectory(File(os.path.join(base_dir, self.index_dir)))
		searcher = IndexSearcher(DirectoryReader.open(directory))
		analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
		
		
		# query = QueryParser(Version.LUCENE_CURRENT, "title", analyzer).parse(input_query)
		parser = MultiFieldQueryParser(Version.LUCENE_CURRENT, (self._posts_fields + self._answer_fields), analyzer)
		query = MultiFieldQueryParser.parse(parser, input_query)

		scoreDocs = searcher.search(query, max_answers).scoreDocs
		print "%s total matching documents." % len(scoreDocs)

		docs = []
		for scoreDoc in scoreDocs:
			doc = searcher.doc(scoreDoc.doc)
			doc_dict = dict((field.name(), field.stringValue()) for field in doc.getFields())
			docs.append(doc_dict)
			# print doc
		return docs
开发者ID:devs4v,项目名称:devs4v-information-retrieval15,代码行数:27,代码来源:QA_Searcher.py

示例3: Searcher

# 需要导入模块: from org.apache.lucene.search import IndexSearcher [as 别名]
# 或者: from org.apache.lucene.search.IndexSearcher import doc [as 别名]
class Searcher(object):
    def __init__(self, **kwargs):
        """ Initialize a new instance of the Searcher

        :param count: The number of counts to return from a query
        :param output: The output directory of the underlying index
        """
        self.count = kwargs.get("count", 100)
        self.output = kwargs.get("root", "index")
        self.store = SimpleFSDirectory(File(self.output))
        self.analyzer = StandardAnalyzer(Version.LUCENE_30)
        self.searcher = IndexSearcher(DirectoryReader.open(self.store))

    def search(self, query):
        """ Given a query, apply it against the existing index.

        :param query: The query to apply to the index
        :returns: A generator of the matching documents
        """
        query = QueryParser(Version.LUCENE_30, "data", self.analyzer).parse(query)
        results = self.searcher.search(query, self.count)
        for result in results.scoreDocs or []:
            # logger.debug("%s %s %s", hit.score, hit.doc, hit.toString())
            document = self.searcher.doc(result.doc)
            yield document.get("path"), result.score
开发者ID:bashwork,项目名称:common,代码行数:27,代码来源:filesearch.py

示例4: LuceneSearcher

# 需要导入模块: from org.apache.lucene.search import IndexSearcher [as 别名]
# 或者: from org.apache.lucene.search.IndexSearcher import doc [as 别名]
class LuceneSearcher(object):
    fields = ['id', 'text', 'types']

    def __init__(self, db_path):
        directory = SimpleFSDirectory(File(db_path))
        reader = DirectoryReader.open(directory)
        self.searcher = IndexSearcher(reader)
        self.analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
        logger.info("Loaded DB from %s with %d documents: ",
                    db_path, reader.numDocs())
        
    def search(self, query, max_matches=1000):
        query = VALID_CHARS_PATTERN.sub(' ', query)
        logger.debug("Searching for %s", query)
        query = QueryParser(Version.LUCENE_CURRENT, "text",
                            self.analyzer).parse(query)
        score_docs = self.searcher.search(query, max_matches).scoreDocs
        logger.debug("%s total matching documents.",
                     len(score_docs))
        
        docs = [self.searcher.doc(d.doc) for d in score_docs]
        return [self.convert_to_dict(doc) for doc in docs]
        
    def convert_to_dict(self, doc):
        return {field: doc.get(field) for field in self.fields}
开发者ID:daoudclarke,项目名称:fbsearch,代码行数:27,代码来源:lucenesearch.py

示例5: search

# 需要导入模块: from org.apache.lucene.search import IndexSearcher [as 别名]
# 或者: from org.apache.lucene.search.IndexSearcher import doc [as 别名]
	def search(self):
		''' Searches the given query in the index '''

		lucene.initVM(vmargs=['-Djava.awt.headless=true'])
		# print 'lucene', lucene.VERSION
		# base_dir = os.path.dirname(os.path.abspath('.'))
		base_dir = '.'
		directory = SimpleFSDirectory(File(os.path.join(base_dir, self.index_dir)))
		searcher = IndexSearcher(DirectoryReader.open(directory))
		analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
		

		while True:
			print
			print "Hit enter with no input to quit."
			command = raw_input("Query:")
			if command == '':
				return

			print
			print "Searching for:", command

			query = QueryParser(Version.LUCENE_CURRENT, "title",
								analyzer).parse(command)
			scoreDocs = searcher.search(query, 50).scoreDocs
			print "%s total matching documents." % len(scoreDocs)

			for scoreDoc in scoreDocs:
				doc = searcher.doc(scoreDoc.doc)
				# print 'path:', doc.get("path"), 'name:', doc.get("name")
				print doc
开发者ID:devs4v,项目名称:devs4v-information-retrieval15,代码行数:33,代码来源:YahooManners_QA.py

示例6: get_image_pmcid

# 需要导入模块: from org.apache.lucene.search import IndexSearcher [as 别名]
# 或者: from org.apache.lucene.search.IndexSearcher import doc [as 别名]
def get_image_pmcid(pmcid, classes = ""):
    fields = ["pmcid", "class"]
    docs = []
    location = web.__path__[0] + "/static/web/files/index/index.figures"
    #lucene.initVM()
    vm_env = lucene.getVMEnv()
    vm_env.attachCurrentThread()
    analyzer = StandardAnalyzer(Version.LUCENE_4_10_1)
    reader = IndexReader.open(SimpleFSDirectory(File(location)))
    searcher = IndexSearcher(reader)
    # multi field query: http://stackoverflow.com/questions/2005084/how-to-specify-two-fields-in-lucene-queryparser
    
    #query = MultiFieldQueryParser(Version.LUCENE_4_10_1, fields, analyzer)
    # query.setDefaultOperator(QueryParserBase.AND_OPERATOR)
    
    #query = query.parse(query, ('4175339','1'))
    # query.parse(queryString)#"Shigella sonnei"
    # query = QueryParser(Version.LUCENE_4_10_1, "abstract", analyzer).parse(queryString)#"Shigella sonnei"

    MAX = 10000
    #hits = searcher.search(query, MAX)
    if classes == "all":
        queryStr = "pmcid:(" + ' '.join(pmcid) +")"
    else:
        queryStr = "pmcid:(" + ' '.join(pmcid) +")" + " AND class:" + classes
    query = QueryParser(Version.LUCENE_4_10_1, "pmcid",analyzer)#needed to build a custom query
    q = query.parse(queryStr) 
    hits = searcher.search(q, MAX)
    for hit in hits.scoreDocs:#should only be one
        #print hit.score, hit.doc, hit.toString()
        docs.append(searcher.doc(hit.doc))
    return docs #This will return the image documents that belong to a pmcid(article)
开发者ID:kevkid,项目名称:YIF,代码行数:34,代码来源:retriever.py

示例7: search

# 需要导入模块: from org.apache.lucene.search import IndexSearcher [as 别名]
# 或者: from org.apache.lucene.search.IndexSearcher import doc [as 别名]
    def search(self, field, text):
        """
        search text within indexed data

        input:
            field   fieldname of the value that will be indexed
            text    text to search

        output:
            hits    return a list of hits

        """
        results = []
        idx_reader = DirectoryReader.open(self.directory)
        idx_searcher = IndexSearcher(idx_reader)

        # parse query
        parser = AnalyzingQueryParser(Version.LUCENE_CURRENT, field, self.analyser)
        query = parser.parse(text)

        # search
        hits = idx_searcher.search(query, 1000).scoreDocs.tolist()
        for hit in hits:
            doc = idx_searcher.doc(hit.doc)
            score = hit.score
            title = doc.get(field)
            url = doc.get("url")
            results.append((score, url, title))

        return results
开发者ID:mkind,项目名称:crawler,代码行数:32,代码来源:idx.py

示例8: query

# 需要导入模块: from org.apache.lucene.search import IndexSearcher [as 别名]
# 或者: from org.apache.lucene.search.IndexSearcher import doc [as 别名]
    def query(self, data):
        if self.fil.exists():
            searcher = IndexSearcher(DirectoryReader.open(self.d))
            query = QueryParser(
                Version.LUCENE_30,
                "id",
                self.analyzer).parse(
                data['query'])
            hits = searcher.search(query, 100000)

            results = {}

            results['totalHits'] = hits.totalHits
            results['hits'] = {}

            for hit in hits.scoreDocs:
                record = {}
                doc = searcher.doc(hit.doc)
                fields = doc.getFields()
                record['score'] = hit.score
                for field in fields:
                    if field.name() != "id":
                        record[field.name()] = field.stringValue()
                results['hits'][doc.get('id')] = record

            searcher.getIndexReader().close()
            return results
开发者ID:bradleyjones,项目名称:apiary,代码行数:29,代码来源:lucenedriver.py

示例9: get_query_results

# 需要导入模块: from org.apache.lucene.search import IndexSearcher [as 别名]
# 或者: from org.apache.lucene.search.IndexSearcher import doc [as 别名]
def get_query_results(reader,query,n,field):
    searcher = IndexSearcher(reader)
    hits = searcher.search(query, n).scoreDocs
    print("Found %d hits:" % len(hits))
    for i, hit in enumerate(hits):
        doc = searcher.doc(hit.doc)
        print("%d. %s" % (i + 1, doc.get(field)))
开发者ID:msnyder,项目名称:wikipedia-indexing,代码行数:9,代码来源:wikipedia_topic_modeling.py

示例10: search

# 需要导入模块: from org.apache.lucene.search import IndexSearcher [as 别名]
# 或者: from org.apache.lucene.search.IndexSearcher import doc [as 别名]
def search():

	lucene.initVM(vmargs=['-Djava.awt.headless=true'])
    
	args = []
	if request.method == 'POST':
		if request.form['ies']:
			args.append('+ies:'+request.form['ies'])
		if request.form['area']:
			args.append('+area:'+request.form['area'])
		if request.form['professor']:
			args.append('+professor:'+request.form['professor'])
		if request.form['conceito']:
			#args.append('m:'+request.form['conceito']+'d:'+request.form['conceito']+'f:'+request.form['conceito'])
			args.append('m:'+request.form['conceito'])
			args.append('d:'+request.form['conceito'])
			args.append('f:'+request.form['conceito'])

	table = []
	if(len(args) > 0): 
		scoreDocs = mansearch.buscar('indexer/',args)
		fsDir = SimpleFSDirectory(File(indexDir))
		searcher = IndexSearcher(DirectoryReader.open(fsDir))
		for scoreDoc in scoreDocs:
			doc = searcher.doc(scoreDoc.doc)
			table.append(dict((field.name(), field.stringValue()) for field in doc.getFields()))
	return render_template('busca.html',table = table)
	
	pass
开发者ID:nancibonfim,项目名称:information_retrieval_20132,代码行数:31,代码来源:page.py

示例11: search_docs

# 需要导入模块: from org.apache.lucene.search import IndexSearcher [as 别名]
# 或者: from org.apache.lucene.search.IndexSearcher import doc [as 别名]
 def search_docs(self, value, field="general_info"):
     MAX_RESULTS = 1000
     searcher = IndexSearcher(DirectoryReader.open(self.store))
     query = QueryParser(Version.LUCENE_CURRENT, field,
                         self.analyzer).parse(value)
     topDocs = searcher.search(query, MAX_RESULTS)
     
     return [searcher.doc(hit.doc) for hit in topDocs.scoreDocs]
开发者ID:inteligencia-coletiva-lsd,项目名称:app-tabletranscriber,代码行数:10,代码来源:pylucene.py

示例12: perform_search

# 需要导入模块: from org.apache.lucene.search import IndexSearcher [as 别名]
# 或者: from org.apache.lucene.search.IndexSearcher import doc [as 别名]
    def perform_search(self, searchterm, results_per_page, page):
        # if there is a field in the searchterm
        """if ":" in searchterm:
            # processing a query
            parser = QueryParser(Version.LUCENE_CURRENT, "content", self.analyzer)
            parser.setDefaultOperator(QueryParser.Operator.AND)

            query = parser.parse(searchterm)

        else:
            query = BooleanQuery()
            query_title = TermQuery(Term("title", searchterm))
            query_description = TermQuery(Term("description", searchterm))
            query_content = TermQuery(Term("content", searchterm))

            #  BooleanClause.Occur.MUST for AND queries
            query.add(query_title, BooleanClause.Occur.SHOULD)
            query.add(query_description, BooleanClause.Occur.SHOULD)
            query.add(query_content, BooleanClause.Occur.SHOULD)"""

        # create QueryParser for each field to be searched
        parser_title = QueryParser(Version.LUCENE_CURRENT, "title", self.analyzer)
        parser_description = QueryParser(Version.LUCENE_CURRENT, "description", self.analyzer)
        parser_content = QueryParser(Version.LUCENE_CURRENT, "content", self.analyzer)

        # put fields together
        query = BooleanQuery()
        query.add(parser_title.parse(searchterm), BooleanClause.Occur.SHOULD)
        query.add(parser_description.parse(searchterm), BooleanClause.Occur.SHOULD)
        query.add(parser_content.parse(searchterm), BooleanClause.Occur.SHOULD)

        # conducting search
        searcher = IndexSearcher(DirectoryReader.open(self.store))

        start = datetime.now()
        hits = searcher.search(query, results_per_page + (results_per_page * page))
        score_docs = hits.scoreDocs
        count_results = hits.totalHits
        duration = datetime.now() - start

        # results to return
        results = []
        count = 0

        for scoreDoc in score_docs:

            # skip offset
            if count < results_per_page * page:
                count += 1
                continue
            count += 1


            doc = searcher.doc(scoreDoc.doc)
            table = dict((field.name(), field.stringValue()) for field in doc.getFields())
            results.append(table)

        return results, duration, count_results
开发者ID:martinjuhasz,项目名称:HSRM_KI_Projekt,代码行数:60,代码来源:LuceneSearcher.py

示例13: SearchQuery

# 需要导入模块: from org.apache.lucene.search import IndexSearcher [as 别名]
# 或者: from org.apache.lucene.search.IndexSearcher import doc [as 别名]
def SearchQuery(queryString, fields, classification): 
    #if __name__ == "__main__":
    #if __name__ == "retriever":
    location = web.__path__[0] + "/static/web/files/index/index.articles"
    #lucene.initVM()
    vm_env = lucene.getVMEnv()
    vm_env.attachCurrentThread()
    analyzer = StandardAnalyzer(Version.LUCENE_4_10_1)
    reader = IndexReader.open(SimpleFSDirectory(File(location)))
    searcher = IndexSearcher(reader)
    #multi field query: http://stackoverflow.com/questions/2005084/how-to-specify-two-fields-in-lucene-queryparser
    
    query = MultiFieldQueryParser(Version.LUCENE_4_10_1, fields, analyzer)
    #query.setDefaultOperator(QueryParserBase.AND_OPERATOR)
    query = MultiFieldQueryParser.parse(query, queryString)
    #query.parse(queryString)#"Shigella sonnei"
    #query = QueryParser(Version.LUCENE_4_10_1, "abstract", analyzer).parse(queryString)#"Shigella sonnei"

    MAX = 10000
    hits = searcher.search(query, MAX)
 
    print "Found %d document(s) that matched query '%s':" % (hits.totalHits, query)
    paths = []
    pmcids = []
    documentDict = {}
    for hit in hits.scoreDocs:
        doc = searcher.doc(hit.doc)
        pmcids.append(doc.get("pmcid"))
        docDict = {"title" : doc.get("title")}#we can add any other field we want...
        documentDict[doc.get("pmcid")] = docDict 
    
    #Where we get the images for all the pmcids    
    images = get_image_pmcid(pmcids, classification)#should take in pmcids and class
    #create dictionary of images with pmcid being their key
    imagesDict = {}
    for img in images:
        img_pmcid = img.get("pmcid") 
        if img_pmcid in imagesDict.keys():
            imagesDict[img_pmcid].append(img.get("filepath") + "/" + img.get("figureid"))
            
        else:
            imagesDict[img_pmcid] = [(img.get("filepath") + "/" + img.get("figureid"))]
            
    #for each pmcid, we will assign an image to it for the search results
    for pmcid in pmcids:
        if imagesDict:
            docDict = documentDict[pmcid]
            docDict["imgURL"] = imagesDict[pmcid][0] 
            documentDict[pmcid] = docDict 
        else:
            docDict = documentDict[pmcid]
            docDict["imgURL"] = "images/NoImageAvailable.jpg"
            documentDict[pmcid] = docDict
    
    #END - Where we get the images for all the pmcids
    
    
    return documentDict
开发者ID:kevkid,项目名称:YIF,代码行数:60,代码来源:retriever.py

示例14: SearchIndex

# 需要导入模块: from org.apache.lucene.search import IndexSearcher [as 别名]
# 或者: from org.apache.lucene.search.IndexSearcher import doc [as 别名]
class SearchIndex(object):

    def __init__(self):
        vm_env = lucene.getVMEnv()
        vm_env.attachCurrentThread()

        indexDir = SimpleFSDirectory(File(app.config['INDEX_PATH']))
        self.searcher = IndexSearcher(DirectoryReader.open(indexDir))

        self.analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)

        self.parser = QueryParser(Version.LUCENE_CURRENT, "contents", self.analyzer)


    def search(self, q, page = 1, duplicates = False):
        query = self.parser.parse(q)

        if not duplicates:
            query = self.addDuplicatesQuery(query)
        
        perPage = 10
        start = (page - 1) * perPage

        results = TopScoreDocCollector.create(1000, True)
        self.searcher.search(query, results)

        highlighter = Highlighter(QueryScorer(query))
        highlighter.setTextFragmenter(SimpleFragmenter(40))

        docs = []
        for scoreDoc in results.topDocs(start, perPage).scoreDocs:
            doc = self.searcher.doc(scoreDoc.doc)
            tokenStream = self.analyzer.tokenStream("contents", StringReader(doc['contents']))
            highlight = highlighter.getBestFragments(tokenStream, doc['contents'], 3, "...")
            
            docs.append({
                'title': doc['title'],
                'url': doc['url'],
                'duplicate': doc['duplicate'],
                'highlight': highlight}
            )

        del self.searcher
        
        totalPages = int(math.ceil(results.getTotalHits()/float(perPage)))

        return totalPages, docs

    def addDuplicatesQuery(self, query):
        not_duplicate = TermQuery(Term('duplicate', 'false'))
        booleanQuery = BooleanQuery()
        booleanQuery.add(not_duplicate, BooleanClause.Occur.MUST)
        booleanQuery.add(query, BooleanClause.Occur.MUST)
        return booleanQuery
开发者ID:thoughts1053,项目名称:search,代码行数:56,代码来源:searcher.py

示例15: search

# 需要导入模块: from org.apache.lucene.search import IndexSearcher [as 别名]
# 或者: from org.apache.lucene.search.IndexSearcher import doc [as 别名]
def search(term, n_docs=10, index='index'):
    store = SimpleFSDirectory(File(index))
    searcher = IndexSearcher(DirectoryReader.open(store))
    analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)

    query = QueryParser(Version.LUCENE_CURRENT, 'art_body', analyzer).parse(term)

    # str(query.getClass().toString()) == "class org.apache.lucene.search.TermQuery"

    score_docs = searcher.search(query, n_docs).scoreDocs

    return [(score_doc.score, unicode(searcher.doc(score_doc.doc).get('art_body'))) for score_doc in score_docs]
开发者ID:andrely,项目名称:vg-pipeline,代码行数:14,代码来源:indexing.py


注:本文中的org.apache.lucene.search.IndexSearcher.doc方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。