当前位置: 首页>>代码示例>>Python>>正文


Python IndexSearcher.doc方法代码示例

本文整理汇总了Python中lucene.IndexSearcher.doc方法的典型用法代码示例。如果您正苦于以下问题:Python IndexSearcher.doc方法的具体用法?Python IndexSearcher.doc怎么用?Python IndexSearcher.doc使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在lucene.IndexSearcher的用法示例。


在下文中一共展示了IndexSearcher.doc方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_indexed_file_details

# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
def get_indexed_file_details(ts_results, lucene_index_dir):
    '''
    This function gets each files details from the lucene 
    index. 
    
    Arguments: 
        ts_results - topic search results, each item contains 
                     [file id, root, file name, similarity score]
        lucene_index_dir - lucene index directory 
    
    Returns: 
        file details in a list 
    '''
    
    store = SimpleFSDirectory(File(lucene_index_dir))
    searcher = IndexSearcher(store, True)
    
    rows = []
    for rs in ts_results:
        doc = searcher.doc(rs[0])
        table = dict((field.name(), field.stringValue())
                     for field in doc.getFields())
        row = []
        metadata = MetadataType._types
        for field in metadata:
            if table.get(field,'empty') != 'empty' :
                row.append(table.get(field,'empty'))
            else: 
                row.append('')
        row.append(str(table.get(MetadataType.FILE_ID,'empty')))
        row.append(str(rs[3])) # similarity score
        
        rows.append(row)
    
    return rows
开发者ID:clintpgeorge,项目名称:ediscovery,代码行数:37,代码来源:lucene_index_dir.py

示例2: run

# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
def run(writer, analyzer):
	while True:
		print 
		print "Hit enter with no input to quit."
		command = raw_input("Query:")
		if command == '':
			return

		print "Searching for:", command
		IndexReader = writer.getReader()
		searcher = IndexSearcher(IndexReader)
		#query = QueryParser(Version.LUCENE_CURRENT, "hashtag", analyzer).parse(command)
		#scoreDocs = searcher.search(query, 50).scoreDocs
		wildquery = command + "*"
		term = Term("hashtag", wildquery)
		query = WildcardQuery(term)
		scoreDocs = searcher.search(query, 5).scoreDocs
		print "%s total matching documents." % len(scoreDocs)
		
		for scoreDoc in scoreDocs:
			doc = searcher.doc(scoreDoc.doc)
			
			score = ( len(command) / len(doc.get("hashtag")) ) * scoreDoc.score
			print 'tweet:', doc.get("contents")
			print 'user_name:', doc.get("user_name")
			print 'when', doc.get("creation_date")
开发者ID:greedo,项目名称:TweetCrowdRanking,代码行数:28,代码来源:streamingIndexer.py

示例3: get_doc_details

# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
def get_doc_details(doc_id, lucene_index_dir):
    '''
    This function gets a file's details from 
    the lucene index. 
    
    Arguments: 
        doc_id - file id
        lucene_index_dir - lucene index directory 
    
    Returns: 
        file details as a list 
    '''
    
    store = SimpleFSDirectory(File(lucene_index_dir))
    searcher = IndexSearcher(store, True)
    
    doc = searcher.doc(doc_id)
    table = dict((field.name(), field.stringValue())
                 for field in doc.getFields())
    row = []
    metadata = MetadataType._types
    for field in metadata:
        if table.get(field,'empty') != 'empty' :
            row.append(table.get(field,'empty'))
        else: 
            row.append('')
    row.append(str(table.get(MetadataType.FILE_ID,'empty')))

    return row 
开发者ID:clintpgeorge,项目名称:ediscovery,代码行数:31,代码来源:lucene_index_dir.py

示例4: displayResults

# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
    def displayResults(self, query, sort):

        searcher = IndexSearcher(self.directory, True)

        fillFields = False
        computeMaxScore = False
        docsScoredInOrder = False
        computeScores = True

        collector = TopFieldCollector.create(sort, 20,
                                             fillFields,
                                             computeScores,
                                             computeMaxScore,
                                             docsScoredInOrder)

        searcher.search(query, None, collector)
        scoreDocs = collector.topDocs().scoreDocs

        print "\nResults for:", query, "sorted by", sort
        print "Title".rjust(30), "pubmonth".rjust(10), \
              "id".center(4), "score".center(15)

        for scoreDoc in scoreDocs:
            doc = searcher.doc(scoreDoc.doc)
            title = doc["title"]
            if len(title) > 30:
                title = title[:30]
            print title.encode('ascii', 'replace').rjust(30), \
                  doc["pubmonth"].rjust(10), \
                  str(scoreDoc.doc).center(4), \
                  ("%06f" % (scoreDoc.score)).rjust(12)
            print "  ", doc["category"]
            # print searcher.explain(query, scoreDoc.doc)

        searcher.close()
开发者ID:pombredanne,项目名称:python-lucenepp,代码行数:37,代码来源:SortingExample.py

示例5: retrieve

# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
	def retrieve( self, query, max_res = 10 ):
		lucene.initVM()
		inDir = SimpleFSDirectory( File( self.INDEX_DIR ) )
		lucene_analyzer = StandardAnalyzer( Version.LUCENE_30 )
		lucene_searcher = IndexSearcher( inDir )
		my_query = QueryParser( Version.LUCENE_30, 'content' , lucene_analyzer ).parse( query )
		MAX = max_res
		total_hits = lucene_searcher.search( my_query, MAX )
		res_head = '{"query":"' + query + '","results":['
		res_tail = ']}'
		result = res_head
		hits = total_hits.totalHits
		if ( hits > 0 ):
			res_body = ''
			it = 0
			for hit in total_hits.scoreDocs:
				it += 1
				doc = lucene_searcher.doc( hit.doc )
				res_body += '{"rank":' +\
							str( it ) +\
							',"score":"' +\
							str( hit.score ) +\
							'","title":"' +\
							doc.get( 'title' ).encode('utf-8') +\
							'","id":"' +\
							doc.get( 'id' ).encode('utf-8') +\
							'"}'
				if ( it < hits ):
					res_body += ','
			result += res_body
		result += res_tail
		return result
开发者ID:farbod-s,项目名称:Noormags,代码行数:34,代码来源:retrieval.py

示例6: document

# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
	def document( self, docId, max_res = 1 ):
		lucene.initVM()
		inDir = SimpleFSDirectory( File( self.INDEX_DIR ) )
		lucene_analyzer = StandardAnalyzer( Version.LUCENE_30 )
		lucene_searcher = IndexSearcher( inDir )
		my_query = QueryParser( Version.LUCENE_30, 'id' , lucene_analyzer ).parse( docId )
		MAX = max_res
		total_hits = lucene_searcher.search( my_query, MAX )
		result = '{'
		hits = total_hits.totalHits
		if ( hits == 1 ):
			for hit in total_hits.scoreDocs:
				doc = lucene_searcher.doc( hit.doc )
				result += '"id":"' +\
						  doc.get( 'id' ) +\
						  '","title":"' +\
						  doc.get( 'title' ) +\
						  '","abstract":"' +\
						  doc.get( 'abstract' ) +\
						  '","keyword":"' +\
						  doc.get( 'keyword' ) +\
						  '","content":"' +\
						  doc.get( 'content' ) +\
						  '","authors":"' +\
						  doc.get( 'authors' ) +\
						  '"'
		result += '}'
		return result
开发者ID:farbod-s,项目名称:Noormags,代码行数:30,代码来源:retrieval.py

示例7: __init__

# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
class OccuredCandidates:
	indexDir = 'data/index'
	max_candidates = 30

	def __init__(self):
		lucene.initVM()
		self._lversion = Version.LUCENE_30
		self._analyzer = EnglishAnalyzer(self._lversion)
		self._searcher = IndexSearcher(SimpleFSDirectory(File(self.indexDir)))

		self._translation = loadTranslation()
		self._links = loadLinks()

	def find(self, phrase):
		phrase = phrase.lower().encode('utf8')
		query = ' '.join(['+'+ word for word in phrase.split(' ')]);
		query = QueryParser(self._lversion, 'contents', self._analyzer).parse(query)
		hits = self._searcher.search(query, self.max_candidates)

		# if not hits.totalHits: print "%d documents for '%s'" % (hits.totalHits, str(query)) # potential bug

		# todo put article_id in lucene index instead of translating document title

		links = {}
		for hit in hits.scoreDocs:
			title = quote(self._searcher.doc(hit.doc).get("title").encode('utf-8').replace(' ', '_')).replace('%28', '(').replace('%29', ')')
			if title in self._translation:
				links[self._translation[title]] = hit.score
			# else: print title # potential bug

		return self._links[phrase].get(-1, 0), links

	def clear_links(self, annotations):
		return filter(lambda annotation: annotation['links'] and max(annotation['links'].values()) > 1, annotations)
开发者ID:nournia,项目名称:wikifier,代码行数:36,代码来源:candidates.py

示例8: lucene_search

# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
def lucene_search(index_dir, limit, query_text):
    '''
    lucene_search: Search a built index and return upto limit number of responses 
    Arguments: Input index folder, limit value of results returned, query(as string)
    Returns: paths of responsive files as list
    '''
    
    logging.basicConfig(file=os.path.join(index_dir,"lucene_search.log"))
    logger.info("Initializing search....")
    lucene.initVM()
    logger.info("Reading index from "+index_dir)
    index = SimpleFSDirectory(File(index_dir))
    analyzer = StandardAnalyzer(Version.LUCENE_30) #Lucene version used to generate index
    searcher = IndexSearcher(index)
    
    logger.info("Parsing query :"+ query_text)
    query = QueryParser(Version.LUCENE_30, "text", analyzer).parse(query_text)
    hits = searcher.search(query, limit)

    logger.info("Found %d document(s) that matched query '%s':" % (hits.totalHits, query))
    hit_paths = []

    for hit in hits.scoreDocs:
        # The following code also generates score for responsive/found documents and the 
        # content index which matched
        # print hit.score, hit.doc, hit.toString()
        doc = searcher.doc(hit.doc)
        hit_paths.append(doc.get("path"))
    
    return hit_paths 
开发者ID:clintpgeorge,项目名称:ediscovery,代码行数:32,代码来源:lucene_search.py

示例9: luceneRetriver

# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
def luceneRetriver(query):

	lucene.initVM()

	indir = SimpleFSDirectory(File(INDEXDIR))

	lucene_analyzer = StandardAnalyzer(Version.LUCENE_30)

	lucene_searcher = IndexSearcher(indir)

	my_query = QueryParser(Version.LUCENE_30,"text",\

	lucene_analyzer).parse(query)

	MAX = 1000

	total_hits = lucene_searcher.search(my_query,MAX)

	print "Hits: ",total_hits.totalHits

	for hit in total_hits.scoreDocs:

		print "Hit Score: ",hit.score, "Hit Doc:",hit.doc, "Hit String:",hit.toString()

		doc = lucene_searcher.doc(hit.doc)

		print doc.get("text").encode("utf-8")
开发者ID:ri0day,项目名称:gangster,代码行数:29,代码来源:pylucene_search.py

示例10: query

# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
 def query(self,title):
     self._th.attachCurrentThread()
     searcher = IndexSearcher(self._dir)
     query=QueryParser(Version.LUCENE_30, "title", self._analyzer).parse(title)
     total_hits = searcher.search(query, 10)
     for hit in total_hits.scoreDocs:
         doc = (searcher.doc(hit.doc))
         return doc.get("title")+"\n"+doc.get("content")+"--------------------------------"
     return "None"
开发者ID:MarekGrznar,项目名称:wikipedia,代码行数:11,代码来源:statisticscats.py

示例11: search

# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
def search(command=command1):
    searcher = IndexSearcher(reader)
    analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
    query = QueryParser(Version.LUCENE_CURRENT, "contents",
                        analyzer).parse(command)
    res = searcher.search(query, 1000000)
    print 'Total hits:', res.totalHits
#    return searcher, res
    return [searcher.doc(doc.doc) for doc in res.scoreDocs[:20]]
开发者ID:TalLinzen,项目名称:hebrew-blog-corpus,代码行数:11,代码来源:build.py

示例12: getCrowds

# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
 def getCrowds(self, query, field = CrowdFields.text): 
     searcher = IndexSearcher(self.index, True)
     q = QueryParser(Version.LUCENE_CURRENT, field, self.analyzer).parse(query)
     collector = TopScoreDocCollector.create(hitsPerPage, True)
     searcher.search(q, collector)
     hits = collector.topDocs().scoreDocs
     
     return [
         searcher.doc(scoreDoc.doc).get(CrowdFields.id)
         for scoreDoc in hits]
开发者ID:JeffAMcGee,项目名称:crowdy,代码行数:12,代码来源:search.py

示例13: LuceneSearch

# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
class LuceneSearch(object):
    def __init__(self):
        STORE_DIR = "index"
        initVM()
        print 'lucene', VERSION
        self.directory = SimpleFSDirectory(File(STORE_DIR))
        print self.directory
        self.searcher = IndexSearcher(self.directory, True)
        self.analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)

    def close(self):
        self.searcher.close()
    
    def raw_search(self, query_string):
        query = QueryParser(Version.LUCENE_CURRENT, "contents",
                            self.analyzer).parse(query_string)
        scoreDocs = self.searcher.search(query, 50).scoreDocs
        print "%s total matching documents." % len(scoreDocs)
        matches = []
        for scoreDoc in scoreDocs:
            doc = self.searcher.doc(scoreDoc.doc)
            #print 'doc matched = ', dir(doc)
            contents = LuceneDoc.load(doc.get('name'))
            matches.append({'contents' : contents, 'doc' : doc})
        return matches
           
    def search(self, query):
        matches = self.raw_search(query)
        results = ''
        if len(matches) > 0:
            results += str(len(matches))+" results <br/>"
            for match in matches:
                results += '<a href='+str(match['contents']['dealUrl'])+'>'+str(match['contents']['merchant'])+'</a><br />'
                results += '<p>'+str(match['contents']['shortAnnouncementTitle'])+','+str(match['contents']['redemptionLocation'])+'</p><br/>'
        else:
            results = "0 results <br/>"
        return results
        
    def cli_search(self):
        while True:
            print
            print "Hit enter with no input to quit."
            command = raw_input("Query:")
            if command == '':
                return
            matches = self.raw_search(command)
            print
            print "Searching for:", command
            
            for match in matches:
                print match['contents']['dealUrl']
                print match['contents']['merchant'], ',', match['contents']['redemptionLocation'], ', ', match['contents']['div']
                print match['contents']['shortAnnouncementTitle']
                print '-'*80
开发者ID:jpercent,项目名称:syndeticlogic,代码行数:56,代码来源:lucene_search.py

示例14: pesquisar_com_lucene

# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
def pesquisar_com_lucene():
    initVM()
    #print 'lucene', VERSION

    # Get handle to index directory
    directory = SimpleFSDirectory(File(STORE_DIR))

    # Creates a searcher searching the provided index.
    ireader  = IndexReader.open(directory, True)

    # Implements search over a single IndexReader.
    # Use a single instance and use it across queries
    # to improve performance.
    searcher = IndexSearcher(ireader)

    # Get the analyzer
    analyzer = EnglishAnalyzer(Version.LUCENE_CURRENT)

    for query in querys:
        query_number =  query.query_number
        # Constructs a query parser. We specify what field to search into.
        query.query_text = query.query_text.replace('?','')
        query.query_text = query.query_text.replace('*','')
        queryParser = QueryParser(Version.LUCENE_CURRENT,
                                  FIELD_CONTENTS, analyzer)

        # Create the query
        query = queryParser.parse(query.query_text)

        # Run the query and get top 50 results
        topDocs = searcher.search(query,50000)

        # Get top hits
        scoreDocs = topDocs.scoreDocs

        r = resultado_query(query_number,scoreDocs)
        resultados.append(r)
        #print "%s total matching documents." % len(scoreDocs)
        #for scoreDoc in scoreDocs:
        #    doc = searcher.doc(scoreDoc.doc)
        #    print doc.get(FIELD_PATH)

    with open('resultados_da_busca/resultados.csv', 'w') as csvfile:
        spamwriter = csv.writer(csvfile, delimiter=';',
                                quotechar='|', quoting=csv.QUOTE_MINIMAL)
        for row in resultados:
            resultados_da_row = []
            i = 1
            for resultado_da_query in row.query_results:
                doc = searcher.doc(resultado_da_query.doc)
                resultados_da_row.append((i,int(doc.get(FIELD_PATH))))
                i = i + 1
            spamwriter.writerow([row.query_number,resultados_da_row])
开发者ID:mandoju,项目名称:trabalho_bri_pylucene,代码行数:55,代码来源:search.py

示例15: retrieve_document_details

# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
def retrieve_document_details(docid, index_dir):
    '''
    This method will be used to retrieve a single document associated with the docid 
    that is passed to it as parameter. 
    The document will be searched in the directory referred by index_dir.
    
    If you want to access a specific field's value you can access that using the instance 
    of this document class as document.get(<field_name>). Here <field_name> is a string.
    '''
    
    store = SimpleFSDirectory(File(index_dir))
    searcher = IndexSearcher(store, True)
    document = searcher.doc(int(docid))
    return document
开发者ID:clintpgeorge,项目名称:ediscovery,代码行数:16,代码来源:lucene_index_dir.py


注:本文中的lucene.IndexSearcher.doc方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。