本文整理汇总了Python中lucene.IndexSearcher类的典型用法代码示例。如果您正苦于以下问题:Python IndexSearcher类的具体用法?Python IndexSearcher怎么用?Python IndexSearcher使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了IndexSearcher类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: retrieve
def retrieve( self, query, max_res = 10 ):
lucene.initVM()
inDir = SimpleFSDirectory( File( self.INDEX_DIR ) )
lucene_analyzer = StandardAnalyzer( Version.LUCENE_30 )
lucene_searcher = IndexSearcher( inDir )
my_query = QueryParser( Version.LUCENE_30, 'content' , lucene_analyzer ).parse( query )
MAX = max_res
total_hits = lucene_searcher.search( my_query, MAX )
res_head = '{"query":"' + query + '","results":['
res_tail = ']}'
result = res_head
hits = total_hits.totalHits
if ( hits > 0 ):
res_body = ''
it = 0
for hit in total_hits.scoreDocs:
it += 1
doc = lucene_searcher.doc( hit.doc )
res_body += '{"rank":' +\
str( it ) +\
',"score":"' +\
str( hit.score ) +\
'","title":"' +\
doc.get( 'title' ).encode('utf-8') +\
'","id":"' +\
doc.get( 'id' ).encode('utf-8') +\
'"}'
if ( it < hits ):
res_body += ','
result += res_body
result += res_tail
return result
示例2: document
def document( self, docId, max_res = 1 ):
lucene.initVM()
inDir = SimpleFSDirectory( File( self.INDEX_DIR ) )
lucene_analyzer = StandardAnalyzer( Version.LUCENE_30 )
lucene_searcher = IndexSearcher( inDir )
my_query = QueryParser( Version.LUCENE_30, 'id' , lucene_analyzer ).parse( docId )
MAX = max_res
total_hits = lucene_searcher.search( my_query, MAX )
result = '{'
hits = total_hits.totalHits
if ( hits == 1 ):
for hit in total_hits.scoreDocs:
doc = lucene_searcher.doc( hit.doc )
result += '"id":"' +\
doc.get( 'id' ) +\
'","title":"' +\
doc.get( 'title' ) +\
'","abstract":"' +\
doc.get( 'abstract' ) +\
'","keyword":"' +\
doc.get( 'keyword' ) +\
'","content":"' +\
doc.get( 'content' ) +\
'","authors":"' +\
doc.get( 'authors' ) +\
'"'
result += '}'
return result
示例3: lucene_search
def lucene_search(index_dir, limit, query_text):
'''
lucene_search: Search a built index and return upto limit number of responses
Arguments: Input index folder, limit value of results returned, query(as string)
Returns: paths of responsive files as list
'''
logging.basicConfig(file=os.path.join(index_dir,"lucene_search.log"))
logger.info("Initializing search....")
lucene.initVM()
logger.info("Reading index from "+index_dir)
index = SimpleFSDirectory(File(index_dir))
analyzer = StandardAnalyzer(Version.LUCENE_30) #Lucene version used to generate index
searcher = IndexSearcher(index)
logger.info("Parsing query :"+ query_text)
query = QueryParser(Version.LUCENE_30, "text", analyzer).parse(query_text)
hits = searcher.search(query, limit)
logger.info("Found %d document(s) that matched query '%s':" % (hits.totalHits, query))
hit_paths = []
for hit in hits.scoreDocs:
# The following code also generates score for responsive/found documents and the
# content index which matched
# print hit.score, hit.doc, hit.toString()
doc = searcher.doc(hit.doc)
hit_paths.append(doc.get("path"))
return hit_paths
示例4: __init__
class OccuredCandidates:
indexDir = 'data/index'
max_candidates = 30
def __init__(self):
lucene.initVM()
self._lversion = Version.LUCENE_30
self._analyzer = EnglishAnalyzer(self._lversion)
self._searcher = IndexSearcher(SimpleFSDirectory(File(self.indexDir)))
self._translation = loadTranslation()
self._links = loadLinks()
def find(self, phrase):
phrase = phrase.lower().encode('utf8')
query = ' '.join(['+'+ word for word in phrase.split(' ')]);
query = QueryParser(self._lversion, 'contents', self._analyzer).parse(query)
hits = self._searcher.search(query, self.max_candidates)
# if not hits.totalHits: print "%d documents for '%s'" % (hits.totalHits, str(query)) # potential bug
# todo put article_id in lucene index instead of translating document title
links = {}
for hit in hits.scoreDocs:
title = quote(self._searcher.doc(hit.doc).get("title").encode('utf-8').replace(' ', '_')).replace('%28', '(').replace('%29', ')')
if title in self._translation:
links[self._translation[title]] = hit.score
# else: print title # potential bug
return self._links[phrase].get(-1, 0), links
def clear_links(self, annotations):
return filter(lambda annotation: annotation['links'] and max(annotation['links'].values()) > 1, annotations)
示例5: testKeyword
def testKeyword(self):
searcher = IndexSearcher(self.directory, True)
t = Term("isbn", "1930110995")
query = TermQuery(t)
scoreDocs = searcher.search(query, 50).scoreDocs
self.assertEqual(1, len(scoreDocs), "JUnit in Action")
示例6: testChinese
def testChinese(self):
searcher = IndexSearcher(self.directory, True)
query = TermQuery(Term("contents", "道"))
scoreDocs = searcher.search(query, 50).scoreDocs
self.assertEqual(1, len(scoreDocs), "tao")
示例7: luceneRetriver
def luceneRetriver(query):
lucene.initVM()
indir = SimpleFSDirectory(File(INDEXDIR))
lucene_analyzer = StandardAnalyzer(Version.LUCENE_30)
lucene_searcher = IndexSearcher(indir)
my_query = QueryParser(Version.LUCENE_30,"text",\
lucene_analyzer).parse(query)
MAX = 1000
total_hits = lucene_searcher.search(my_query,MAX)
print "Hits: ",total_hits.totalHits
for hit in total_hits.scoreDocs:
print "Hit Score: ",hit.score, "Hit Doc:",hit.doc, "Hit String:",hit.toString()
doc = lucene_searcher.doc(hit.doc)
print doc.get("text").encode("utf-8")
示例8: get_doc_details
def get_doc_details(doc_id, lucene_index_dir):
'''
This function gets a file's details from
the lucene index.
Arguments:
doc_id - file id
lucene_index_dir - lucene index directory
Returns:
file details as a list
'''
store = SimpleFSDirectory(File(lucene_index_dir))
searcher = IndexSearcher(store, True)
doc = searcher.doc(doc_id)
table = dict((field.name(), field.stringValue())
for field in doc.getFields())
row = []
metadata = MetadataType._types
for field in metadata:
if table.get(field,'empty') != 'empty' :
row.append(table.get(field,'empty'))
else:
row.append('')
row.append(str(table.get(MetadataType.FILE_ID,'empty')))
return row
示例9: run
def run(writer, analyzer):
while True:
print
print "Hit enter with no input to quit."
command = raw_input("Query:")
if command == '':
return
print "Searching for:", command
IndexReader = writer.getReader()
searcher = IndexSearcher(IndexReader)
#query = QueryParser(Version.LUCENE_CURRENT, "hashtag", analyzer).parse(command)
#scoreDocs = searcher.search(query, 50).scoreDocs
wildquery = command + "*"
term = Term("hashtag", wildquery)
query = WildcardQuery(term)
scoreDocs = searcher.search(query, 5).scoreDocs
print "%s total matching documents." % len(scoreDocs)
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
score = ( len(command) / len(doc.get("hashtag")) ) * scoreDoc.score
print 'tweet:', doc.get("contents")
print 'user_name:', doc.get("user_name")
print 'when', doc.get("creation_date")
示例10: get_indexed_file_details
def get_indexed_file_details(ts_results, lucene_index_dir):
'''
This function gets each files details from the lucene
index.
Arguments:
ts_results - topic search results, each item contains
[file id, root, file name, similarity score]
lucene_index_dir - lucene index directory
Returns:
file details in a list
'''
store = SimpleFSDirectory(File(lucene_index_dir))
searcher = IndexSearcher(store, True)
rows = []
for rs in ts_results:
doc = searcher.doc(rs[0])
table = dict((field.name(), field.stringValue())
for field in doc.getFields())
row = []
metadata = MetadataType._types
for field in metadata:
if table.get(field,'empty') != 'empty' :
row.append(table.get(field,'empty'))
else:
row.append('')
row.append(str(table.get(MetadataType.FILE_ID,'empty')))
row.append(str(rs[3])) # similarity score
rows.append(row)
return rows
示例11: search
def search(r, keyword=""):
import logging
logger = logging.getLogger("search")
bench = Benchmark(logger)
from lucene import IndexSearcher, StandardAnalyzer, FSDirectory, QueryParser, File, Hit
import lucene, os
os.environ["JAVA_HOME"] = "/usr/local/jdk1.6.0_17"
lucene.initVM(lucene.CLASSPATH)
directory = FSDirectory.open(File(CONFIG.INDEX_PATH))
ROBOT_INDEX = IndexSearcher(directory, True)
ROBOT_ANALYZER = StandardAnalyzer()
keyword = keyword or r.GET["keyword"]
query = QueryParser("context", ROBOT_ANALYZER)
query = query.parse('"%s"' % keyword)
bench.start_mark("search")
hits = ROBOT_INDEX.search(query)
count = len(hits)
result = []
i = 0
for hit in hits:
i += 1
if i > 100:
break
doc = Hit.cast_(hit).getDocument()
result.append(SearchResult(doc, i, keyword))
ROBOT_INDEX.close()
et = bench.stop_mark()
return render_to_response("robot_search_result.html", {"result": result, "count": count, "elaspe": et})
示例12: query
def query(indexName, queryString):
indSearcher = IndexSearcher(SimpleFSDirectory(File(indexName)))
qp = QueryParser(Version.LUCENE_CURRENT, "content", StandardAnalyzer(Version.LUCENE_CURRENT))
qp.setDefaultOperator(qp.Operator.AND)
query = qp.parse(queryString.replace("-","_"))
aux = indSearcher.search(query, 100)
results = aux.scoreDocs
hits = aux.totalHits
ir = indSearcher.getIndexReader()
#results = collector.topDocs()
i = 0
res = []
for r in results:
doc = ir.document(i)
res.insert(i, doc.get('id'))
i+=1
return res
示例13: testTermRangeQuery
def testTermRangeQuery(self):
searcher = IndexSearcher(self.directory, True)
query = TermRangeQuery("title2", "d", "j", True, True)
topDocs = searcher.search(query, 100)
self.assertEqual(3, topDocs.totalHits)
searcher.close()
示例14: testExactPhrase
def testExactPhrase(self):
searcher = IndexSearcher(self.directory, True)
query = QueryParser(Version.LUCENE_24, "contents",
self.porterAnalyzer).parse('"over the lazy"')
topDocs = searcher.search(query, 50)
self.assertEqual(0, topDocs.totalHits, "exact match not found!")
示例15: main
def main(cls, argv):
if len(argv) != 2:
print "Usage: BerkeleyDbSearcher <index dir>"
return
dbHome = argv[1]
env = DBEnv()
env.set_flags(DB_LOG_INMEMORY, 1);
if os.name == 'nt':
env.set_cachesize(0, 0x4000000, 1)
elif os.name == 'posix':
from commands import getstatusoutput
if getstatusoutput('uname') == (0, 'Linux'):
env.set_cachesize(0, 0x4000000, 1)
env.open(dbHome, (DB_THREAD |
DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_TXN), 0)
index = DB(env)
blocks = DB(env)
txn = None
try:
txn = env.txn_begin(None)
index.open(filename = '__index__', dbtype = DB_BTREE,
flags = DB_THREAD, txn = txn)
blocks.open(filename = '__blocks__', dbtype = DB_BTREE,
flags = DB_THREAD, txn = txn)
except:
if txn is not None:
txn.abort()
txn = None
raise
else:
txn.commit()
txn = None
try:
txn = env.txn_begin(None)
directory = DbDirectory(txn, index, blocks, 0)
searcher = IndexSearcher(directory, True)
topDocs = searcher.search(TermQuery(Term("contents", "fox")), 50)
print topDocs.totalHits, "document(s) found"
searcher.close()
except:
if txn is not None:
txn.abort()
txn = None
raise
else:
txn.abort()
index.close()
blocks.close()
env.close()