本文整理汇总了Python中lucene.IndexSearcher.doc方法的典型用法代码示例。如果您正苦于以下问题:Python IndexSearcher.doc方法的具体用法?Python IndexSearcher.doc怎么用?Python IndexSearcher.doc使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lucene.IndexSearcher
的用法示例。
在下文中一共展示了IndexSearcher.doc方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_indexed_file_details
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
def get_indexed_file_details(ts_results, lucene_index_dir):
'''
This function gets each files details from the lucene
index.
Arguments:
ts_results - topic search results, each item contains
[file id, root, file name, similarity score]
lucene_index_dir - lucene index directory
Returns:
file details in a list
'''
store = SimpleFSDirectory(File(lucene_index_dir))
searcher = IndexSearcher(store, True)
rows = []
for rs in ts_results:
doc = searcher.doc(rs[0])
table = dict((field.name(), field.stringValue())
for field in doc.getFields())
row = []
metadata = MetadataType._types
for field in metadata:
if table.get(field,'empty') != 'empty' :
row.append(table.get(field,'empty'))
else:
row.append('')
row.append(str(table.get(MetadataType.FILE_ID,'empty')))
row.append(str(rs[3])) # similarity score
rows.append(row)
return rows
示例2: run
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
def run(writer, analyzer):
while True:
print
print "Hit enter with no input to quit."
command = raw_input("Query:")
if command == '':
return
print "Searching for:", command
IndexReader = writer.getReader()
searcher = IndexSearcher(IndexReader)
#query = QueryParser(Version.LUCENE_CURRENT, "hashtag", analyzer).parse(command)
#scoreDocs = searcher.search(query, 50).scoreDocs
wildquery = command + "*"
term = Term("hashtag", wildquery)
query = WildcardQuery(term)
scoreDocs = searcher.search(query, 5).scoreDocs
print "%s total matching documents." % len(scoreDocs)
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
score = ( len(command) / len(doc.get("hashtag")) ) * scoreDoc.score
print 'tweet:', doc.get("contents")
print 'user_name:', doc.get("user_name")
print 'when', doc.get("creation_date")
示例3: get_doc_details
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
def get_doc_details(doc_id, lucene_index_dir):
'''
This function gets a file's details from
the lucene index.
Arguments:
doc_id - file id
lucene_index_dir - lucene index directory
Returns:
file details as a list
'''
store = SimpleFSDirectory(File(lucene_index_dir))
searcher = IndexSearcher(store, True)
doc = searcher.doc(doc_id)
table = dict((field.name(), field.stringValue())
for field in doc.getFields())
row = []
metadata = MetadataType._types
for field in metadata:
if table.get(field,'empty') != 'empty' :
row.append(table.get(field,'empty'))
else:
row.append('')
row.append(str(table.get(MetadataType.FILE_ID,'empty')))
return row
示例4: displayResults
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
def displayResults(self, query, sort):
searcher = IndexSearcher(self.directory, True)
fillFields = False
computeMaxScore = False
docsScoredInOrder = False
computeScores = True
collector = TopFieldCollector.create(sort, 20,
fillFields,
computeScores,
computeMaxScore,
docsScoredInOrder)
searcher.search(query, None, collector)
scoreDocs = collector.topDocs().scoreDocs
print "\nResults for:", query, "sorted by", sort
print "Title".rjust(30), "pubmonth".rjust(10), \
"id".center(4), "score".center(15)
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
title = doc["title"]
if len(title) > 30:
title = title[:30]
print title.encode('ascii', 'replace').rjust(30), \
doc["pubmonth"].rjust(10), \
str(scoreDoc.doc).center(4), \
("%06f" % (scoreDoc.score)).rjust(12)
print " ", doc["category"]
# print searcher.explain(query, scoreDoc.doc)
searcher.close()
示例5: retrieve
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
def retrieve( self, query, max_res = 10 ):
lucene.initVM()
inDir = SimpleFSDirectory( File( self.INDEX_DIR ) )
lucene_analyzer = StandardAnalyzer( Version.LUCENE_30 )
lucene_searcher = IndexSearcher( inDir )
my_query = QueryParser( Version.LUCENE_30, 'content' , lucene_analyzer ).parse( query )
MAX = max_res
total_hits = lucene_searcher.search( my_query, MAX )
res_head = '{"query":"' + query + '","results":['
res_tail = ']}'
result = res_head
hits = total_hits.totalHits
if ( hits > 0 ):
res_body = ''
it = 0
for hit in total_hits.scoreDocs:
it += 1
doc = lucene_searcher.doc( hit.doc )
res_body += '{"rank":' +\
str( it ) +\
',"score":"' +\
str( hit.score ) +\
'","title":"' +\
doc.get( 'title' ).encode('utf-8') +\
'","id":"' +\
doc.get( 'id' ).encode('utf-8') +\
'"}'
if ( it < hits ):
res_body += ','
result += res_body
result += res_tail
return result
示例6: document
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
def document( self, docId, max_res = 1 ):
lucene.initVM()
inDir = SimpleFSDirectory( File( self.INDEX_DIR ) )
lucene_analyzer = StandardAnalyzer( Version.LUCENE_30 )
lucene_searcher = IndexSearcher( inDir )
my_query = QueryParser( Version.LUCENE_30, 'id' , lucene_analyzer ).parse( docId )
MAX = max_res
total_hits = lucene_searcher.search( my_query, MAX )
result = '{'
hits = total_hits.totalHits
if ( hits == 1 ):
for hit in total_hits.scoreDocs:
doc = lucene_searcher.doc( hit.doc )
result += '"id":"' +\
doc.get( 'id' ) +\
'","title":"' +\
doc.get( 'title' ) +\
'","abstract":"' +\
doc.get( 'abstract' ) +\
'","keyword":"' +\
doc.get( 'keyword' ) +\
'","content":"' +\
doc.get( 'content' ) +\
'","authors":"' +\
doc.get( 'authors' ) +\
'"'
result += '}'
return result
示例7: __init__
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
class OccuredCandidates:
indexDir = 'data/index'
max_candidates = 30
def __init__(self):
lucene.initVM()
self._lversion = Version.LUCENE_30
self._analyzer = EnglishAnalyzer(self._lversion)
self._searcher = IndexSearcher(SimpleFSDirectory(File(self.indexDir)))
self._translation = loadTranslation()
self._links = loadLinks()
def find(self, phrase):
phrase = phrase.lower().encode('utf8')
query = ' '.join(['+'+ word for word in phrase.split(' ')]);
query = QueryParser(self._lversion, 'contents', self._analyzer).parse(query)
hits = self._searcher.search(query, self.max_candidates)
# if not hits.totalHits: print "%d documents for '%s'" % (hits.totalHits, str(query)) # potential bug
# todo put article_id in lucene index instead of translating document title
links = {}
for hit in hits.scoreDocs:
title = quote(self._searcher.doc(hit.doc).get("title").encode('utf-8').replace(' ', '_')).replace('%28', '(').replace('%29', ')')
if title in self._translation:
links[self._translation[title]] = hit.score
# else: print title # potential bug
return self._links[phrase].get(-1, 0), links
def clear_links(self, annotations):
return filter(lambda annotation: annotation['links'] and max(annotation['links'].values()) > 1, annotations)
示例8: lucene_search
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
def lucene_search(index_dir, limit, query_text):
'''
lucene_search: Search a built index and return upto limit number of responses
Arguments: Input index folder, limit value of results returned, query(as string)
Returns: paths of responsive files as list
'''
logging.basicConfig(file=os.path.join(index_dir,"lucene_search.log"))
logger.info("Initializing search....")
lucene.initVM()
logger.info("Reading index from "+index_dir)
index = SimpleFSDirectory(File(index_dir))
analyzer = StandardAnalyzer(Version.LUCENE_30) #Lucene version used to generate index
searcher = IndexSearcher(index)
logger.info("Parsing query :"+ query_text)
query = QueryParser(Version.LUCENE_30, "text", analyzer).parse(query_text)
hits = searcher.search(query, limit)
logger.info("Found %d document(s) that matched query '%s':" % (hits.totalHits, query))
hit_paths = []
for hit in hits.scoreDocs:
# The following code also generates score for responsive/found documents and the
# content index which matched
# print hit.score, hit.doc, hit.toString()
doc = searcher.doc(hit.doc)
hit_paths.append(doc.get("path"))
return hit_paths
示例9: luceneRetriver
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
def luceneRetriver(query):
lucene.initVM()
indir = SimpleFSDirectory(File(INDEXDIR))
lucene_analyzer = StandardAnalyzer(Version.LUCENE_30)
lucene_searcher = IndexSearcher(indir)
my_query = QueryParser(Version.LUCENE_30,"text",\
lucene_analyzer).parse(query)
MAX = 1000
total_hits = lucene_searcher.search(my_query,MAX)
print "Hits: ",total_hits.totalHits
for hit in total_hits.scoreDocs:
print "Hit Score: ",hit.score, "Hit Doc:",hit.doc, "Hit String:",hit.toString()
doc = lucene_searcher.doc(hit.doc)
print doc.get("text").encode("utf-8")
示例10: query
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
def query(self,title):
self._th.attachCurrentThread()
searcher = IndexSearcher(self._dir)
query=QueryParser(Version.LUCENE_30, "title", self._analyzer).parse(title)
total_hits = searcher.search(query, 10)
for hit in total_hits.scoreDocs:
doc = (searcher.doc(hit.doc))
return doc.get("title")+"\n"+doc.get("content")+"--------------------------------"
return "None"
示例11: search
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
def search(command=command1):
searcher = IndexSearcher(reader)
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
query = QueryParser(Version.LUCENE_CURRENT, "contents",
analyzer).parse(command)
res = searcher.search(query, 1000000)
print 'Total hits:', res.totalHits
# return searcher, res
return [searcher.doc(doc.doc) for doc in res.scoreDocs[:20]]
示例12: getCrowds
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
def getCrowds(self, query, field = CrowdFields.text):
searcher = IndexSearcher(self.index, True)
q = QueryParser(Version.LUCENE_CURRENT, field, self.analyzer).parse(query)
collector = TopScoreDocCollector.create(hitsPerPage, True)
searcher.search(q, collector)
hits = collector.topDocs().scoreDocs
return [
searcher.doc(scoreDoc.doc).get(CrowdFields.id)
for scoreDoc in hits]
示例13: LuceneSearch
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
class LuceneSearch(object):
def __init__(self):
STORE_DIR = "index"
initVM()
print 'lucene', VERSION
self.directory = SimpleFSDirectory(File(STORE_DIR))
print self.directory
self.searcher = IndexSearcher(self.directory, True)
self.analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
def close(self):
self.searcher.close()
def raw_search(self, query_string):
query = QueryParser(Version.LUCENE_CURRENT, "contents",
self.analyzer).parse(query_string)
scoreDocs = self.searcher.search(query, 50).scoreDocs
print "%s total matching documents." % len(scoreDocs)
matches = []
for scoreDoc in scoreDocs:
doc = self.searcher.doc(scoreDoc.doc)
#print 'doc matched = ', dir(doc)
contents = LuceneDoc.load(doc.get('name'))
matches.append({'contents' : contents, 'doc' : doc})
return matches
def search(self, query):
matches = self.raw_search(query)
results = ''
if len(matches) > 0:
results += str(len(matches))+" results <br/>"
for match in matches:
results += '<a href='+str(match['contents']['dealUrl'])+'>'+str(match['contents']['merchant'])+'</a><br />'
results += '<p>'+str(match['contents']['shortAnnouncementTitle'])+','+str(match['contents']['redemptionLocation'])+'</p><br/>'
else:
results = "0 results <br/>"
return results
def cli_search(self):
while True:
print
print "Hit enter with no input to quit."
command = raw_input("Query:")
if command == '':
return
matches = self.raw_search(command)
print
print "Searching for:", command
for match in matches:
print match['contents']['dealUrl']
print match['contents']['merchant'], ',', match['contents']['redemptionLocation'], ', ', match['contents']['div']
print match['contents']['shortAnnouncementTitle']
print '-'*80
示例14: pesquisar_com_lucene
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
def pesquisar_com_lucene():
initVM()
#print 'lucene', VERSION
# Get handle to index directory
directory = SimpleFSDirectory(File(STORE_DIR))
# Creates a searcher searching the provided index.
ireader = IndexReader.open(directory, True)
# Implements search over a single IndexReader.
# Use a single instance and use it across queries
# to improve performance.
searcher = IndexSearcher(ireader)
# Get the analyzer
analyzer = EnglishAnalyzer(Version.LUCENE_CURRENT)
for query in querys:
query_number = query.query_number
# Constructs a query parser. We specify what field to search into.
query.query_text = query.query_text.replace('?','')
query.query_text = query.query_text.replace('*','')
queryParser = QueryParser(Version.LUCENE_CURRENT,
FIELD_CONTENTS, analyzer)
# Create the query
query = queryParser.parse(query.query_text)
# Run the query and get top 50 results
topDocs = searcher.search(query,50000)
# Get top hits
scoreDocs = topDocs.scoreDocs
r = resultado_query(query_number,scoreDocs)
resultados.append(r)
#print "%s total matching documents." % len(scoreDocs)
#for scoreDoc in scoreDocs:
# doc = searcher.doc(scoreDoc.doc)
# print doc.get(FIELD_PATH)
with open('resultados_da_busca/resultados.csv', 'w') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=';',
quotechar='|', quoting=csv.QUOTE_MINIMAL)
for row in resultados:
resultados_da_row = []
i = 1
for resultado_da_query in row.query_results:
doc = searcher.doc(resultado_da_query.doc)
resultados_da_row.append((i,int(doc.get(FIELD_PATH))))
i = i + 1
spamwriter.writerow([row.query_number,resultados_da_row])
示例15: retrieve_document_details
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import doc [as 别名]
def retrieve_document_details(docid, index_dir):
'''
This method will be used to retrieve a single document associated with the docid
that is passed to it as parameter.
The document will be searched in the directory referred by index_dir.
If you want to access a specific field's value you can access that using the instance
of this document class as document.get(<field_name>). Here <field_name> is a string.
'''
store = SimpleFSDirectory(File(index_dir))
searcher = IndexSearcher(store, True)
document = searcher.doc(int(docid))
return document