本文整理汇总了Python中org.apache.lucene.search.IndexSearcher类的典型用法代码示例。如果您正苦于以下问题:Python IndexSearcher类的具体用法?Python IndexSearcher怎么用?Python IndexSearcher使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了IndexSearcher类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Searcher
class Searcher(object):
def __init__(self, **kwargs):
""" Initialize a new instance of the Searcher
:param count: The number of counts to return from a query
:param output: The output directory of the underlying index
"""
self.count = kwargs.get("count", 100)
self.output = kwargs.get("root", "index")
self.store = SimpleFSDirectory(File(self.output))
self.analyzer = StandardAnalyzer(Version.LUCENE_30)
self.searcher = IndexSearcher(DirectoryReader.open(self.store))
def search(self, query):
""" Given a query, apply it against the existing index.
:param query: The query to apply to the index
:returns: A generator of the matching documents
"""
query = QueryParser(Version.LUCENE_30, "data", self.analyzer).parse(query)
results = self.searcher.search(query, self.count)
for result in results.scoreDocs or []:
# logger.debug("%s %s %s", hit.score, hit.doc, hit.toString())
document = self.searcher.doc(result.doc)
yield document.get("path"), result.score
示例2: search
def search():
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
args = []
if request.method == 'POST':
if request.form['ies']:
args.append('+ies:'+request.form['ies'])
if request.form['area']:
args.append('+area:'+request.form['area'])
if request.form['professor']:
args.append('+professor:'+request.form['professor'])
if request.form['conceito']:
#args.append('m:'+request.form['conceito']+'d:'+request.form['conceito']+'f:'+request.form['conceito'])
args.append('m:'+request.form['conceito'])
args.append('d:'+request.form['conceito'])
args.append('f:'+request.form['conceito'])
table = []
if(len(args) > 0):
scoreDocs = mansearch.buscar('indexer/',args)
fsDir = SimpleFSDirectory(File(indexDir))
searcher = IndexSearcher(DirectoryReader.open(fsDir))
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
table.append(dict((field.name(), field.stringValue()) for field in doc.getFields()))
return render_template('busca.html',table = table)
pass
示例3: search
def search(self):
''' Searches the given query in the index '''
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
# print 'lucene', lucene.VERSION
# base_dir = os.path.dirname(os.path.abspath('.'))
base_dir = '.'
directory = SimpleFSDirectory(File(os.path.join(base_dir, self.index_dir)))
searcher = IndexSearcher(DirectoryReader.open(directory))
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
while True:
print
print "Hit enter with no input to quit."
command = raw_input("Query:")
if command == '':
return
print
print "Searching for:", command
query = QueryParser(Version.LUCENE_CURRENT, "title",
analyzer).parse(command)
scoreDocs = searcher.search(query, 50).scoreDocs
print "%s total matching documents." % len(scoreDocs)
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
# print 'path:', doc.get("path"), 'name:', doc.get("name")
print doc
示例4: buscar
def buscar(indexDir, args,options = None):
#lucene.initVM(vmargs=['-Djava.awt.headless=true'])
fsDir = SimpleFSDirectory(File(indexDir))
#print fsDir
#Criando buscador baseado no diretorio dos indices passados pelo usuario
searcher = IndexSearcher(DirectoryReader.open(fsDir))
#Analizador para filtro dos tokens
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
#print analyzer
#Criando um QueryParser usando por padrao contents
#Variavel com as restricoes da busca
parser = QueryParser(Version.LUCENE_CURRENT, "keywords", analyzer)
#print parser
parser.setDefaultOperator(QueryParser.Operator.AND)
#print args
#Juntando parametros passados com o valor do mesmo
command = ' +'.join(args)
#print command
query = parser.parse(command)
print query
#Criando um JArray com resultado da consulta
return searcher.search(query, 200).scoreDocs
示例5: search
def search(self, field, text):
"""
search text within indexed data
input:
field fieldname of the value that will be indexed
text text to search
output:
hits return a list of hits
"""
results = []
idx_reader = DirectoryReader.open(self.directory)
idx_searcher = IndexSearcher(idx_reader)
# parse query
parser = AnalyzingQueryParser(Version.LUCENE_CURRENT, field, self.analyser)
query = parser.parse(text)
# search
hits = idx_searcher.search(query, 1000).scoreDocs.tolist()
for hit in hits:
doc = idx_searcher.doc(hit.doc)
score = hit.score
title = doc.get(field)
url = doc.get("url")
results.append((score, url, title))
return results
示例6: search
def search(self, input_query=None, max_answers=10):
''' Searches the given query in the index '''
if input_query is None:
return None
base_dir = '.'
directory = SimpleFSDirectory(File(os.path.join(base_dir, self.index_dir)))
searcher = IndexSearcher(DirectoryReader.open(directory))
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
# query = QueryParser(Version.LUCENE_CURRENT, "title", analyzer).parse(input_query)
parser = MultiFieldQueryParser(Version.LUCENE_CURRENT, (self._posts_fields + self._answer_fields), analyzer)
query = MultiFieldQueryParser.parse(parser, input_query)
scoreDocs = searcher.search(query, max_answers).scoreDocs
print "%s total matching documents." % len(scoreDocs)
docs = []
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
doc_dict = dict((field.name(), field.stringValue()) for field in doc.getFields())
docs.append(doc_dict)
# print doc
return docs
示例7: get_candidates
def get_candidates(qatp):
if prm.create_index:
create_index()
lucene.initVM()
analyzer = StandardAnalyzer(Version.LUCENE_4_10_1)
reader = IndexReader.open(SimpleFSDirectory(File(prm.index_folder)))
searcher = IndexSearcher(reader)
candidates = []
n = 0
for q,a,t,p in qatp:
if n % 100 == 0:
print 'finding candidates sample', n
n+=1
q = q.replace('AND','\\AND').replace('OR','\\OR').replace('NOT','\\NOT')
query = QueryParser(Version.LUCENE_4_10_1, "text", analyzer).parse(QueryParser.escape(q))
hits = searcher.search(query, prm.max_candidates)
c = []
for hit in hits.scoreDocs:
doc = searcher.doc(hit.doc)
c.append(doc.get("id"))
candidates.append(c)
return candidates
示例8: query
def query(self, data):
if self.fil.exists():
searcher = IndexSearcher(DirectoryReader.open(self.d))
query = QueryParser(
Version.LUCENE_30,
"id",
self.analyzer).parse(
data['query'])
hits = searcher.search(query, 100000)
results = {}
results['totalHits'] = hits.totalHits
results['hits'] = {}
for hit in hits.scoreDocs:
record = {}
doc = searcher.doc(hit.doc)
fields = doc.getFields()
record['score'] = hit.score
for field in fields:
if field.name() != "id":
record[field.name()] = field.stringValue()
results['hits'][doc.get('id')] = record
searcher.getIndexReader().close()
return results
示例9: get_image_pmcid
def get_image_pmcid(pmcid, classes = ""):
fields = ["pmcid", "class"]
docs = []
location = web.__path__[0] + "/static/web/files/index/index.figures"
#lucene.initVM()
vm_env = lucene.getVMEnv()
vm_env.attachCurrentThread()
analyzer = StandardAnalyzer(Version.LUCENE_4_10_1)
reader = IndexReader.open(SimpleFSDirectory(File(location)))
searcher = IndexSearcher(reader)
# multi field query: http://stackoverflow.com/questions/2005084/how-to-specify-two-fields-in-lucene-queryparser
#query = MultiFieldQueryParser(Version.LUCENE_4_10_1, fields, analyzer)
# query.setDefaultOperator(QueryParserBase.AND_OPERATOR)
#query = query.parse(query, ('4175339','1'))
# query.parse(queryString)#"Shigella sonnei"
# query = QueryParser(Version.LUCENE_4_10_1, "abstract", analyzer).parse(queryString)#"Shigella sonnei"
MAX = 10000
#hits = searcher.search(query, MAX)
if classes == "all":
queryStr = "pmcid:(" + ' '.join(pmcid) +")"
else:
queryStr = "pmcid:(" + ' '.join(pmcid) +")" + " AND class:" + classes
query = QueryParser(Version.LUCENE_4_10_1, "pmcid",analyzer)#needed to build a custom query
q = query.parse(queryStr)
hits = searcher.search(q, MAX)
for hit in hits.scoreDocs:#should only be one
#print hit.score, hit.doc, hit.toString()
docs.append(searcher.doc(hit.doc))
return docs #This will return the image documents that belong to a pmcid(article)
示例10: getIndexSearcher
def getIndexSearcher(self):
indexSearcher = IndexSearcher(self.mIndexReader)
if self.mSimilarity != None:
indexSearcher.setSimilarity(self.mSimilarity)
return indexSearcher
示例11: get_query_results
def get_query_results(reader,query,n,field):
searcher = IndexSearcher(reader)
hits = searcher.search(query, n).scoreDocs
print("Found %d hits:" % len(hits))
for i, hit in enumerate(hits):
doc = searcher.doc(hit.doc)
print("%d. %s" % (i + 1, doc.get(field)))
示例12: retrieve
def retrieve(indexdir, queries):
lucene.initVM()
f = open("results_lucene.txt", "w")
analyzer = StandardAnalyzer(Version.LUCENE_4_10_1)
reader = IndexReader.open(SimpleFSDirectory(File(indexdir)))
searcher = IndexSearcher(reader)
fields = ["title", "abstract", "authors"]
st = PorterStemmer()
for id, q in queries.iteritems():
query = q
tokenizer = RegexpTokenizer(r'\w+')
qwords = tokenizer.tokenize(query)
qwords_k = [st.stem(q) for q in qwords]
query = " ".join(qwords_k)
parser = MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, analyzer)
parser.setDefaultOperator(QueryParserBase.OR_OPERATOR)
query = MultiFieldQueryParser.parse(parser, query)
MAX = 1000
hits = searcher.search(query, MAX)
# print "Found %d document(s) that matched query '%s':" % (hits.totalHits, query)
for i, hit in enumerate(hits.scoreDocs):
f.write("%s Q0 %s %s %s G17R3\n" % (id, hit.doc+1, i+1, hit.score))
# print hit.doc+1, hit.score
# doc = searcher.doc(hit.doc)
# print doc.get("authors").encode("utf-8")
f.close()
示例13: LuceneSearcher
class LuceneSearcher(object):
fields = ['id', 'text', 'types']
def __init__(self, db_path):
directory = SimpleFSDirectory(File(db_path))
reader = DirectoryReader.open(directory)
self.searcher = IndexSearcher(reader)
self.analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
logger.info("Loaded DB from %s with %d documents: ",
db_path, reader.numDocs())
def search(self, query, max_matches=1000):
query = VALID_CHARS_PATTERN.sub(' ', query)
logger.debug("Searching for %s", query)
query = QueryParser(Version.LUCENE_CURRENT, "text",
self.analyzer).parse(query)
score_docs = self.searcher.search(query, max_matches).scoreDocs
logger.debug("%s total matching documents.",
len(score_docs))
docs = [self.searcher.doc(d.doc) for d in score_docs]
return [self.convert_to_dict(doc) for doc in docs]
def convert_to_dict(self, doc):
return {field: doc.get(field) for field in self.fields}
示例14: IndexAndTaxonomy
class IndexAndTaxonomy(object):
def __init__(self, settings, indexDirectory=None, taxoDirectory=None):
self._settings = settings
self._similarity = settings.similarity
self._numberOfConcurrentTasks = settings.numberOfConcurrentTasks
self._reader = DirectoryReader.open(indexDirectory)
self.taxoReader = DirectoryTaxonomyReader(taxoDirectory)
self._readerSettingsWrapper = ReaderSettingsWrapper()
self._readerSettingsWrapper.get = lambda: {"similarity": self.searcher.getSimilarity().toString(), "numberOfConcurrentTasks": self._numberOfConcurrentTasks}
self._readerSettingsWrapper.set = self._setReadSettings
self._searcher = None
self._executor = None
self._reopenSearcher = True
def reopen(self):
reader = DirectoryReader.openIfChanged(self._reader)
if reader is None:
return
self._reader.close()
self._reader = reader
self._reopenSearcher = True
taxoReader = DirectoryTaxonomyReader.openIfChanged(self.taxoReader)
if taxoReader is None:
return
self.taxoReader.close()
self.taxoReader = taxoReader
@property
def searcher(self):
if not self._reopenSearcher:
return self._searcher
if self._settings.multithreaded:
if self._executor:
self._executor.shutdown();
self._executor = Executors.newFixedThreadPool(self._numberOfConcurrentTasks);
self._searcher = SuperIndexSearcher(self._reader, self._executor, self._numberOfConcurrentTasks)
else:
self._searcher = IndexSearcher(self._reader)
self._searcher.setSimilarity(self._similarity)
self._reopenSearcher = False
return self._searcher
def _setReadSettings(self, similarity=None, numberOfConcurrentTasks=None):
# This method must be thread-safe
if similarity is None:
self._similarity = self._settings.similarity
else:
self._similarity = BM25Similarity(similarity["k1"], similarity["b"])
if numberOfConcurrentTasks is None:
self._numberOfConcurrentTasks = self._settings.numberOfConcurrentTasks
else:
self._numberOfConcurrentTasks = numberOfConcurrentTasks
self._reopenSearcher = True
def close(self):
self.taxoReader.close()
self._reader.close()
示例15: search_docs
def search_docs(self, value, field="general_info"):
MAX_RESULTS = 1000
searcher = IndexSearcher(DirectoryReader.open(self.store))
query = QueryParser(Version.LUCENE_CURRENT, field,
self.analyzer).parse(value)
topDocs = searcher.search(query, MAX_RESULTS)
return [searcher.doc(hit.doc) for hit in topDocs.scoreDocs]