当前位置: 首页>>代码示例>>Python>>正文


Python lucene.IndexSearcher类代码示例

本文整理汇总了Python中lucene.IndexSearcher的典型用法代码示例。如果您正苦于以下问题:Python IndexSearcher类的具体用法?Python IndexSearcher怎么用?Python IndexSearcher使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了IndexSearcher类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: retrieve

	def retrieve( self, query, max_res = 10 ):
		lucene.initVM()
		inDir = SimpleFSDirectory( File( self.INDEX_DIR ) )
		lucene_analyzer = StandardAnalyzer( Version.LUCENE_30 )
		lucene_searcher = IndexSearcher( inDir )
		my_query = QueryParser( Version.LUCENE_30, 'content' , lucene_analyzer ).parse( query )
		MAX = max_res
		total_hits = lucene_searcher.search( my_query, MAX )
		res_head = '{"query":"' + query + '","results":['
		res_tail = ']}'
		result = res_head
		hits = total_hits.totalHits
		if ( hits > 0 ):
			res_body = ''
			it = 0
			for hit in total_hits.scoreDocs:
				it += 1
				doc = lucene_searcher.doc( hit.doc )
				res_body += '{"rank":' +\
							str( it ) +\
							',"score":"' +\
							str( hit.score ) +\
							'","title":"' +\
							doc.get( 'title' ).encode('utf-8') +\
							'","id":"' +\
							doc.get( 'id' ).encode('utf-8') +\
							'"}'
				if ( it < hits ):
					res_body += ','
			result += res_body
		result += res_tail
		return result
开发者ID:farbod-s,项目名称:Noormags,代码行数:32,代码来源:retrieval.py

示例2: document

	def document( self, docId, max_res = 1 ):
		lucene.initVM()
		inDir = SimpleFSDirectory( File( self.INDEX_DIR ) )
		lucene_analyzer = StandardAnalyzer( Version.LUCENE_30 )
		lucene_searcher = IndexSearcher( inDir )
		my_query = QueryParser( Version.LUCENE_30, 'id' , lucene_analyzer ).parse( docId )
		MAX = max_res
		total_hits = lucene_searcher.search( my_query, MAX )
		result = '{'
		hits = total_hits.totalHits
		if ( hits == 1 ):
			for hit in total_hits.scoreDocs:
				doc = lucene_searcher.doc( hit.doc )
				result += '"id":"' +\
						  doc.get( 'id' ) +\
						  '","title":"' +\
						  doc.get( 'title' ) +\
						  '","abstract":"' +\
						  doc.get( 'abstract' ) +\
						  '","keyword":"' +\
						  doc.get( 'keyword' ) +\
						  '","content":"' +\
						  doc.get( 'content' ) +\
						  '","authors":"' +\
						  doc.get( 'authors' ) +\
						  '"'
		result += '}'
		return result
开发者ID:farbod-s,项目名称:Noormags,代码行数:28,代码来源:retrieval.py

示例3: lucene_search

def lucene_search(index_dir, limit, query_text):
    '''
    lucene_search: Search a built index and return upto limit number of responses 
    Arguments: Input index folder, limit value of results returned, query(as string)
    Returns: paths of responsive files as list
    '''
    
    logging.basicConfig(file=os.path.join(index_dir,"lucene_search.log"))
    logger.info("Initializing search....")
    lucene.initVM()
    logger.info("Reading index from "+index_dir)
    index = SimpleFSDirectory(File(index_dir))
    analyzer = StandardAnalyzer(Version.LUCENE_30) #Lucene version used to generate index
    searcher = IndexSearcher(index)
    
    logger.info("Parsing query :"+ query_text)
    query = QueryParser(Version.LUCENE_30, "text", analyzer).parse(query_text)
    hits = searcher.search(query, limit)

    logger.info("Found %d document(s) that matched query '%s':" % (hits.totalHits, query))
    hit_paths = []

    for hit in hits.scoreDocs:
        # The following code also generates score for responsive/found documents and the 
        # content index which matched
        # print hit.score, hit.doc, hit.toString()
        doc = searcher.doc(hit.doc)
        hit_paths.append(doc.get("path"))
    
    return hit_paths 
开发者ID:clintpgeorge,项目名称:ediscovery,代码行数:30,代码来源:lucene_search.py

示例4: __init__

class OccuredCandidates:
	indexDir = 'data/index'
	max_candidates = 30

	def __init__(self):
		lucene.initVM()
		self._lversion = Version.LUCENE_30
		self._analyzer = EnglishAnalyzer(self._lversion)
		self._searcher = IndexSearcher(SimpleFSDirectory(File(self.indexDir)))

		self._translation = loadTranslation()
		self._links = loadLinks()

	def find(self, phrase):
		phrase = phrase.lower().encode('utf8')
		query = ' '.join(['+'+ word for word in phrase.split(' ')]);
		query = QueryParser(self._lversion, 'contents', self._analyzer).parse(query)
		hits = self._searcher.search(query, self.max_candidates)

		# if not hits.totalHits: print "%d documents for '%s'" % (hits.totalHits, str(query)) # potential bug

		# todo put article_id in lucene index instead of translating document title

		links = {}
		for hit in hits.scoreDocs:
			title = quote(self._searcher.doc(hit.doc).get("title").encode('utf-8').replace(' ', '_')).replace('%28', '(').replace('%29', ')')
			if title in self._translation:
				links[self._translation[title]] = hit.score
			# else: print title # potential bug

		return self._links[phrase].get(-1, 0), links

	def clear_links(self, annotations):
		return filter(lambda annotation: annotation['links'] and max(annotation['links'].values()) > 1, annotations)
开发者ID:nournia,项目名称:wikifier,代码行数:34,代码来源:candidates.py

示例5: testKeyword

    def testKeyword(self):

        searcher = IndexSearcher(self.directory, True)
        t = Term("isbn", "1930110995")
        query = TermQuery(t)
        scoreDocs = searcher.search(query, 50).scoreDocs
        self.assertEqual(1, len(scoreDocs), "JUnit in Action")
开发者ID:bpgriner01,项目名称:pylucene,代码行数:7,代码来源:BasicSearchingTest.py

示例6: testChinese

    def testChinese(self):

        searcher = IndexSearcher(self.directory, True)
        query = TermQuery(Term("contents", "道"))
        scoreDocs = searcher.search(query, 50).scoreDocs

        self.assertEqual(1, len(scoreDocs), "tao")
开发者ID:bpgriner01,项目名称:pylucene,代码行数:7,代码来源:ChineseTest.py

示例7: luceneRetriver

def luceneRetriver(query):

	lucene.initVM()

	indir = SimpleFSDirectory(File(INDEXDIR))

	lucene_analyzer = StandardAnalyzer(Version.LUCENE_30)

	lucene_searcher = IndexSearcher(indir)

	my_query = QueryParser(Version.LUCENE_30,"text",\

	lucene_analyzer).parse(query)

	MAX = 1000

	total_hits = lucene_searcher.search(my_query,MAX)

	print "Hits: ",total_hits.totalHits

	for hit in total_hits.scoreDocs:

		print "Hit Score: ",hit.score, "Hit Doc:",hit.doc, "Hit String:",hit.toString()

		doc = lucene_searcher.doc(hit.doc)

		print doc.get("text").encode("utf-8")
开发者ID:ri0day,项目名称:gangster,代码行数:27,代码来源:pylucene_search.py

示例8: get_doc_details

def get_doc_details(doc_id, lucene_index_dir):
    '''
    This function gets a file's details from 
    the lucene index. 
    
    Arguments: 
        doc_id - file id
        lucene_index_dir - lucene index directory 
    
    Returns: 
        file details as a list 
    '''
    
    store = SimpleFSDirectory(File(lucene_index_dir))
    searcher = IndexSearcher(store, True)
    
    doc = searcher.doc(doc_id)
    table = dict((field.name(), field.stringValue())
                 for field in doc.getFields())
    row = []
    metadata = MetadataType._types
    for field in metadata:
        if table.get(field,'empty') != 'empty' :
            row.append(table.get(field,'empty'))
        else: 
            row.append('')
    row.append(str(table.get(MetadataType.FILE_ID,'empty')))

    return row 
开发者ID:clintpgeorge,项目名称:ediscovery,代码行数:29,代码来源:lucene_index_dir.py

示例9: run

def run(writer, analyzer):
	while True:
		print 
		print "Hit enter with no input to quit."
		command = raw_input("Query:")
		if command == '':
			return

		print "Searching for:", command
		IndexReader = writer.getReader()
		searcher = IndexSearcher(IndexReader)
		#query = QueryParser(Version.LUCENE_CURRENT, "hashtag", analyzer).parse(command)
		#scoreDocs = searcher.search(query, 50).scoreDocs
		wildquery = command + "*"
		term = Term("hashtag", wildquery)
		query = WildcardQuery(term)
		scoreDocs = searcher.search(query, 5).scoreDocs
		print "%s total matching documents." % len(scoreDocs)
		
		for scoreDoc in scoreDocs:
			doc = searcher.doc(scoreDoc.doc)
			
			score = ( len(command) / len(doc.get("hashtag")) ) * scoreDoc.score
			print 'tweet:', doc.get("contents")
			print 'user_name:', doc.get("user_name")
			print 'when', doc.get("creation_date")
开发者ID:greedo,项目名称:TweetCrowdRanking,代码行数:26,代码来源:streamingIndexer.py

示例10: get_indexed_file_details

def get_indexed_file_details(ts_results, lucene_index_dir):
    '''
    This function gets each files details from the lucene 
    index. 
    
    Arguments: 
        ts_results - topic search results, each item contains 
                     [file id, root, file name, similarity score]
        lucene_index_dir - lucene index directory 
    
    Returns: 
        file details in a list 
    '''
    
    store = SimpleFSDirectory(File(lucene_index_dir))
    searcher = IndexSearcher(store, True)
    
    rows = []
    for rs in ts_results:
        doc = searcher.doc(rs[0])
        table = dict((field.name(), field.stringValue())
                     for field in doc.getFields())
        row = []
        metadata = MetadataType._types
        for field in metadata:
            if table.get(field,'empty') != 'empty' :
                row.append(table.get(field,'empty'))
            else: 
                row.append('')
        row.append(str(table.get(MetadataType.FILE_ID,'empty')))
        row.append(str(rs[3])) # similarity score
        
        rows.append(row)
    
    return rows
开发者ID:clintpgeorge,项目名称:ediscovery,代码行数:35,代码来源:lucene_index_dir.py

示例11: search

def search(r, keyword=""):
    import logging

    logger = logging.getLogger("search")
    bench = Benchmark(logger)
    from lucene import IndexSearcher, StandardAnalyzer, FSDirectory, QueryParser, File, Hit
    import lucene, os

    os.environ["JAVA_HOME"] = "/usr/local/jdk1.6.0_17"
    lucene.initVM(lucene.CLASSPATH)

    directory = FSDirectory.open(File(CONFIG.INDEX_PATH))
    ROBOT_INDEX = IndexSearcher(directory, True)
    ROBOT_ANALYZER = StandardAnalyzer()

    keyword = keyword or r.GET["keyword"]
    query = QueryParser("context", ROBOT_ANALYZER)
    query = query.parse('"%s"' % keyword)

    bench.start_mark("search")
    hits = ROBOT_INDEX.search(query)
    count = len(hits)
    result = []
    i = 0
    for hit in hits:
        i += 1
        if i > 100:
            break
        doc = Hit.cast_(hit).getDocument()
        result.append(SearchResult(doc, i, keyword))
    ROBOT_INDEX.close()

    et = bench.stop_mark()

    return render_to_response("robot_search_result.html", {"result": result, "count": count, "elaspe": et})
开发者ID:dalinhuang,项目名称:demodemo,代码行数:35,代码来源:views.py

示例12: query

    def query(indexName, queryString):

        indSearcher = IndexSearcher(SimpleFSDirectory(File(indexName)))
        qp = QueryParser(Version.LUCENE_CURRENT, "content", StandardAnalyzer(Version.LUCENE_CURRENT))
        qp.setDefaultOperator(qp.Operator.AND)
         
        query = qp.parse(queryString.replace("-","_"))
                
        aux = indSearcher.search(query, 100)
        results = aux.scoreDocs
        hits = aux.totalHits
        
        ir = indSearcher.getIndexReader()

        #results = collector.topDocs()
        i = 0

        res = []
    
        for r in results:        
            doc = ir.document(i)
            res.insert(i, doc.get('id'))
            i+=1
            
        return res
开发者ID:KasperBrandt,项目名称:UvA-AIR,代码行数:25,代码来源:query.py

示例13: testTermRangeQuery

    def testTermRangeQuery(self):

        searcher = IndexSearcher(self.directory, True)
        query = TermRangeQuery("title2", "d", "j", True, True)

        topDocs = searcher.search(query, 100)
        self.assertEqual(3, topDocs.totalHits)
        searcher.close()
开发者ID:bpgriner01,项目名称:pylucene,代码行数:8,代码来源:TermRangeQueryTest.py

示例14: testExactPhrase

    def testExactPhrase(self):

        searcher = IndexSearcher(self.directory, True)
        query = QueryParser(Version.LUCENE_24, "contents",
                            self.porterAnalyzer).parse('"over the lazy"')
        topDocs = searcher.search(query, 50)

        self.assertEqual(0, topDocs.totalHits, "exact match not found!")
开发者ID:bpgriner01,项目名称:pylucene,代码行数:8,代码来源:PositionalPorterStopAnalyzerTest.py

示例15: main

    def main(cls, argv):

        if len(argv) != 2:
            print "Usage: BerkeleyDbSearcher <index dir>"
            return

        dbHome = argv[1]

        env = DBEnv()
        env.set_flags(DB_LOG_INMEMORY, 1);
        if os.name == 'nt':
            env.set_cachesize(0, 0x4000000, 1)
        elif os.name == 'posix':
            from commands import getstatusoutput
            if getstatusoutput('uname') == (0, 'Linux'):
                env.set_cachesize(0, 0x4000000, 1)

        env.open(dbHome, (DB_THREAD |
                          DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_TXN), 0)

        index = DB(env)
        blocks = DB(env)
        txn = None

        try:
            txn = env.txn_begin(None)
            index.open(filename = '__index__', dbtype = DB_BTREE,
                       flags = DB_THREAD, txn = txn)
            blocks.open(filename = '__blocks__', dbtype = DB_BTREE,
                        flags = DB_THREAD, txn = txn)
        except:
            if txn is not None:
                txn.abort()
                txn = None
            raise
        else:
            txn.commit()
            txn = None

        try:
            txn = env.txn_begin(None)
            directory = DbDirectory(txn, index, blocks, 0)
            searcher = IndexSearcher(directory, True)

            topDocs = searcher.search(TermQuery(Term("contents", "fox")), 50)
            print topDocs.totalHits, "document(s) found"
            searcher.close()
        except:
            if txn is not None:
                txn.abort()
                txn = None
            raise
        else:
            txn.abort()

            index.close()
            blocks.close()
            env.close()
开发者ID:bpgriner01,项目名称:pylucene,代码行数:58,代码来源:BerkeleyDbSearcher.py


注:本文中的lucene.IndexSearcher类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。