本文整理汇总了Python中lucene.IndexSearcher.search方法的典型用法代码示例。如果您正苦于以下问题:Python IndexSearcher.search方法的具体用法?Python IndexSearcher.search怎么用?Python IndexSearcher.search使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lucene.IndexSearcher
的用法示例。
在下文中一共展示了IndexSearcher.search方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: displayResults
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import search [as 别名]
def displayResults(self, query, sort):
searcher = IndexSearcher(self.directory, True)
fillFields = False
computeMaxScore = False
docsScoredInOrder = False
computeScores = True
collector = TopFieldCollector.create(sort, 20,
fillFields,
computeScores,
computeMaxScore,
docsScoredInOrder)
searcher.search(query, None, collector)
scoreDocs = collector.topDocs().scoreDocs
print "\nResults for:", query, "sorted by", sort
print "Title".rjust(30), "pubmonth".rjust(10), \
"id".center(4), "score".center(15)
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
title = doc["title"]
if len(title) > 30:
title = title[:30]
print title.encode('ascii', 'replace').rjust(30), \
doc["pubmonth"].rjust(10), \
str(scoreDoc.doc).center(4), \
("%06f" % (scoreDoc.score)).rjust(12)
print " ", doc["category"]
# print searcher.explain(query, scoreDoc.doc)
searcher.close()
示例2: MultiPhraseQueryTest
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import search [as 别名]
class MultiPhraseQueryTest(TestCase):
def setUp(self):
directory = RAMDirectory()
writer = IndexWriter(directory, WhitespaceAnalyzer(), True,
IndexWriter.MaxFieldLength.UNLIMITED)
doc1 = Document()
doc1.add(Field("field", "the quick brown fox jumped over the lazy dog",
Field.Store.YES, Field.Index.ANALYZED))
writer.addDocument(doc1)
doc2 = Document()
doc2.add(Field("field", "the fast fox hopped over the hound",
Field.Store.YES, Field.Index.ANALYZED))
writer.addDocument(doc2)
writer.close()
self.searcher = IndexSearcher(directory, True)
def testBasic(self):
query = MultiPhraseQuery()
query.add([Term("field", "quick"),
Term("field", "fast")])
query.add(Term("field", "fox"))
print query
topDocs = self.searcher.search(query, 10)
self.assertEqual(1, topDocs.totalHits, "fast fox match")
query.setSlop(1);
topDocs = self.searcher.search(query, 10)
self.assertEqual(2, topDocs.totalHits, "both match");
def testAgainstOR(self):
quickFox = PhraseQuery()
quickFox.setSlop(1)
quickFox.add(Term("field", "quick"))
quickFox.add(Term("field", "fox"))
fastFox = PhraseQuery()
fastFox.add(Term("field", "fast"))
fastFox.add(Term("field", "fox"))
query = BooleanQuery()
query.add(quickFox, BooleanClause.Occur.SHOULD)
query.add(fastFox, BooleanClause.Occur.SHOULD)
topDocs = self.searcher.search(query, 10)
self.assertEqual(2, topDocs.totalHits)
def debug(self, hits):
for i, doc in hits:
print "%s: %s" %(hits.score(i), doc['field'])
示例3: getCrowds
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import search [as 别名]
def getCrowds(self, query, field = CrowdFields.text):
searcher = IndexSearcher(self.index, True)
q = QueryParser(Version.LUCENE_CURRENT, field, self.analyzer).parse(query)
collector = TopScoreDocCollector.create(hitsPerPage, True)
searcher.search(q, collector)
hits = collector.topDocs().scoreDocs
return [
searcher.doc(scoreDoc.doc).get(CrowdFields.id)
for scoreDoc in hits]
示例4: testTerm
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import search [as 别名]
def testTerm(self):
searcher = IndexSearcher(self.directory, True)
t = Term("subject", "ant")
query = TermQuery(t)
scoreDocs = searcher.search(query, 50).scoreDocs
self.assertEqual(1, len(scoreDocs), "JDwA")
t = Term("subject", "junit")
scoreDocs = searcher.search(TermQuery(t), 50).scoreDocs
self.assertEqual(2, len(scoreDocs))
searcher.close()
示例5: testStems
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import search [as 别名]
def testStems(self):
searcher = IndexSearcher(self.directory)
query = QueryParser(Version.LUCENE_CURRENT, "contents",
self.porterAnalyzer).parse("laziness")
topDocs = searcher.search(query, 50)
self.assertEqual(1, topDocs.totalHits, "lazi")
query = QueryParser(Version.LUCENE_CURRENT, "contents",
self.porterAnalyzer).parse('"fox jumped"')
topDocs = searcher.search(query, 50)
self.assertEqual(1, topDocs.totalHits, "jump jumps jumped jumping")
示例6: testQueryParser
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import search [as 别名]
def testQueryParser(self):
searcher = IndexSearcher(self.directory, True)
query = QueryParser(Version.LUCENE_CURRENT, "contents",
SimpleAnalyzer()).parse("+JUNIT +ANT -MOCK")
scoreDocs = searcher.search(query, 50).scoreDocs
self.assertEqual(1, len(scoreDocs))
d = searcher.doc(scoreDocs[0].doc)
self.assertEqual("Java Development with Ant", d.get("title"))
query = QueryParser(Version.LUCENE_CURRENT, "contents",
SimpleAnalyzer()).parse("mock OR junit")
scoreDocs = searcher.search(query, 50).scoreDocs
self.assertEqual(2, len(scoreDocs), "JDwA and JIA")
示例7: testSecurityFilter
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import search [as 别名]
def testSecurityFilter(self):
query = TermQuery(Term("keywords", "info"))
searcher = IndexSearcher(self.directory, True)
topDocs = searcher.search(query, 50)
self.assertEqual(2, topDocs.totalHits, "Both documents match")
jakeFilter = QueryWrapperFilter(TermQuery(Term("owner", "jake")))
scoreDocs = searcher.search(query, jakeFilter, 50).scoreDocs
self.assertEqual(1, len(scoreDocs))
self.assertEqual("jakes sensitive info",
searcher.doc(scoreDocs[0].doc).get("keywords"),
"elwood is safe")
示例8: query
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import search [as 别名]
def query(indexName, queryFile, runName):
indReader = IndexReader.open(SimpleFSDirectory(File(indexName)))
indSearcher = IndexSearcher(indReader)
ir = indSearcher.getIndexReader()
qp = QueryParser(Version.LUCENE_CURRENT, "content", StandardAnalyzer(Version.LUCENE_CURRENT))
f = open('results-'+runName, 'w')
while(True):
id = queryFile.readline()
if id == "":
break
id = id.replace("C","")
id = id.replace("\n","")
queryString = queryFile.readline()
queryString = queryString.replace("?","")
queryString = queryString.replace("*","")
queryString = queryString.replace("-","_")
queryString = queryString.replace("\n","")
query = qp.parse(queryString)
queryFile.readline()
returnedDocs = 1000
collector = TopScoreDocCollector.create(returnedDocs, True)
indSearcher.search(query, collector)
hits = collector.topDocs().scoreDocs
size = len(hits)
print "Total hits for query " +id+ ": "+str(size)
i = 0
for hit in hits:
docId = hits[i].doc
score = hits[i].score
doc = ir.document(docId)
j = i + 1
f.write(id + " 0 " + doc.get('id') + " " + str(j) + " " + str(score) +" " + runName +"\n")
i+=1
f.close()
示例9: luceneRetriver
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import search [as 别名]
def luceneRetriver(query):
lucene.initVM()
indir = SimpleFSDirectory(File(INDEXDIR))
lucene_analyzer = StandardAnalyzer(Version.LUCENE_30)
lucene_searcher = IndexSearcher(indir)
my_query = QueryParser(Version.LUCENE_30,"text",\
lucene_analyzer).parse(query)
MAX = 1000
total_hits = lucene_searcher.search(my_query,MAX)
print "Hits: ",total_hits.totalHits
for hit in total_hits.scoreDocs:
print "Hit Score: ",hit.score, "Hit Doc:",hit.doc, "Hit String:",hit.toString()
doc = lucene_searcher.doc(hit.doc)
print doc.get("text").encode("utf-8")
示例10: run
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import search [as 别名]
def run(writer, analyzer):
while True:
print
print "Hit enter with no input to quit."
command = raw_input("Query:")
if command == '':
return
print "Searching for:", command
IndexReader = writer.getReader()
searcher = IndexSearcher(IndexReader)
#query = QueryParser(Version.LUCENE_CURRENT, "hashtag", analyzer).parse(command)
#scoreDocs = searcher.search(query, 50).scoreDocs
wildquery = command + "*"
term = Term("hashtag", wildquery)
query = WildcardQuery(term)
scoreDocs = searcher.search(query, 5).scoreDocs
print "%s total matching documents." % len(scoreDocs)
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
score = ( len(command) / len(doc.get("hashtag")) ) * scoreDoc.score
print 'tweet:', doc.get("contents")
print 'user_name:', doc.get("user_name")
print 'when', doc.get("creation_date")
示例11: testPrefix
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import search [as 别名]
def testPrefix(self):
searcher = IndexSearcher(self.directory, True)
# search for programming books, including subcategories
term = Term("category", "/technology/computers/programming")
query = PrefixQuery(term)
topDocs = searcher.search(query, 50)
programmingAndBelow = topDocs.totalHits
# only programming books, not subcategories
topDocs = searcher.search(TermQuery(term), 50)
justProgramming = topDocs.totalHits
self.assert_(programmingAndBelow > justProgramming)
示例12: query
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import search [as 别名]
def query(indexName, queryString):
indSearcher = IndexSearcher(SimpleFSDirectory(File(indexName)))
qp = QueryParser(Version.LUCENE_CURRENT, "content", StandardAnalyzer(Version.LUCENE_CURRENT))
qp.setDefaultOperator(qp.Operator.AND)
query = qp.parse(queryString.replace("-","_"))
aux = indSearcher.search(query, 100)
results = aux.scoreDocs
hits = aux.totalHits
ir = indSearcher.getIndexReader()
#results = collector.topDocs()
i = 0
res = []
for r in results:
doc = ir.document(i)
res.insert(i, doc.get('id'))
i+=1
return res
示例13: testCollecting
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import search [as 别名]
def testCollecting(self):
query = TermQuery(Term("contents", "junit"))
searcher = IndexSearcher(self.directory, True)
collector = BookLinkCollector(searcher)
searcher.search(query, collector)
links = collector.getLinks()
self.assertEqual("java development with ant",
links["http://www.manning.com/antbook"])
scoreDocs = searcher.search(query, 10).scoreDocs
self.dumpHits(searcher, scoreDocs)
searcher.close()
示例14: document
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import search [as 别名]
def document( self, docId, max_res = 1 ):
lucene.initVM()
inDir = SimpleFSDirectory( File( self.INDEX_DIR ) )
lucene_analyzer = StandardAnalyzer( Version.LUCENE_30 )
lucene_searcher = IndexSearcher( inDir )
my_query = QueryParser( Version.LUCENE_30, 'id' , lucene_analyzer ).parse( docId )
MAX = max_res
total_hits = lucene_searcher.search( my_query, MAX )
result = '{'
hits = total_hits.totalHits
if ( hits == 1 ):
for hit in total_hits.scoreDocs:
doc = lucene_searcher.doc( hit.doc )
result += '"id":"' +\
doc.get( 'id' ) +\
'","title":"' +\
doc.get( 'title' ) +\
'","abstract":"' +\
doc.get( 'abstract' ) +\
'","keyword":"' +\
doc.get( 'keyword' ) +\
'","content":"' +\
doc.get( 'content' ) +\
'","authors":"' +\
doc.get( 'authors' ) +\
'"'
result += '}'
return result
示例15: retrieve
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import search [as 别名]
def retrieve( self, query, max_res = 10 ):
lucene.initVM()
inDir = SimpleFSDirectory( File( self.INDEX_DIR ) )
lucene_analyzer = StandardAnalyzer( Version.LUCENE_30 )
lucene_searcher = IndexSearcher( inDir )
my_query = QueryParser( Version.LUCENE_30, 'content' , lucene_analyzer ).parse( query )
MAX = max_res
total_hits = lucene_searcher.search( my_query, MAX )
res_head = '{"query":"' + query + '","results":['
res_tail = ']}'
result = res_head
hits = total_hits.totalHits
if ( hits > 0 ):
res_body = ''
it = 0
for hit in total_hits.scoreDocs:
it += 1
doc = lucene_searcher.doc( hit.doc )
res_body += '{"rank":' +\
str( it ) +\
',"score":"' +\
str( hit.score ) +\
'","title":"' +\
doc.get( 'title' ).encode('utf-8') +\
'","id":"' +\
doc.get( 'id' ).encode('utf-8') +\
'"}'
if ( it < hits ):
res_body += ','
result += res_body
result += res_tail
return result