本文整理汇总了Python中org.apache.lucene.index.DirectoryReader类的典型用法代码示例。如果您正苦于以下问题:Python DirectoryReader类的具体用法?Python DirectoryReader怎么用?Python DirectoryReader使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了DirectoryReader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: open_reader
def open_reader(self):
"""Open IndexReader."""
if self.reader is None:
if self.use_ram:
print "reading from ram directory ..."
self.reader = DirectoryReader.open(self.ram_dir)
else:
self.reader = DirectoryReader.open(self.dir)
示例2: search
def search(self, input_query=None, max_answers=10):
''' Searches the given query in the index '''
if input_query is None:
return None
base_dir = '.'
directory = SimpleFSDirectory(File(os.path.join(base_dir, self.index_dir)))
searcher = IndexSearcher(DirectoryReader.open(directory))
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
# query = QueryParser(Version.LUCENE_CURRENT, "title", analyzer).parse(input_query)
parser = MultiFieldQueryParser(Version.LUCENE_CURRENT, (self._posts_fields + self._answer_fields), analyzer)
query = MultiFieldQueryParser.parse(parser, input_query)
scoreDocs = searcher.search(query, max_answers).scoreDocs
print "%s total matching documents." % len(scoreDocs)
docs = []
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
doc_dict = dict((field.name(), field.stringValue()) for field in doc.getFields())
docs.append(doc_dict)
# print doc
return docs
示例3: __init__
def __init__(self, db_path):
directory = SimpleFSDirectory(File(db_path))
reader = DirectoryReader.open(directory)
self.searcher = IndexSearcher(reader)
self.analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
logger.info("Loaded DB from %s with %d documents: ",
db_path, reader.numDocs())
示例4: __init__
def __init__ (self):
self.mDocumentDirectory = "/home/hnguyen/Projects/CLIFinder/operations.sub"
self.mIndexDirectory = "/home/hnguyen/Projects/CLIFinder/cli.index"
self.mIndexReader = None
if os.path.isdir(self.mIndexDirectory) and self.mIndexReader == None:
directory = SimpleFSDirectory(File(self.mIndexDirectory))
self.mIndexReader = DirectoryReader.open(directory)
############################### IndexingEngine Settings ######################################
self.mSimilarity = DecreaseLengthNormSimilarity()
self.mOpenMode = IndexWriterConfig.OpenMode.CREATE
##############################################################################################
self.mIsDebug = False
if self.mIsDebug:
############################### Setting up loggers ###########################################
self.mIndexingLogPath = "/home/hnguyen/Projects/CLIFinder/logs/indexing.log"
self.mSearchingLogPath = "/home/hnguyen/Projects/CLIFinder/logs/searching.log"
self.mIndexingLogger = LoggingEngine(self.mIndexingLogPath, "IndexingLogger", Queue.Queue())
self.mSearchingLogger = LoggingEngine(self.mSearchingLogPath, "SearchingLogger", Queue.Queue())
self.mIndexingLogger.start()
self.mSearchingLogger.start()
atexit.register(self.clear)
示例5: search
def search(self, field, text):
"""
search text within indexed data
input:
field fieldname of the value that will be indexed
text text to search
output:
hits return a list of hits
"""
results = []
idx_reader = DirectoryReader.open(self.directory)
idx_searcher = IndexSearcher(idx_reader)
# parse query
parser = AnalyzingQueryParser(Version.LUCENE_CURRENT, field, self.analyser)
query = parser.parse(text)
# search
hits = idx_searcher.search(query, 1000).scoreDocs.tolist()
for hit in hits:
doc = idx_searcher.doc(hit.doc)
score = hit.score
title = doc.get(field)
url = doc.get("url")
results.append((score, url, title))
return results
示例6: buscar
def buscar(indexDir, args,options = None):
#lucene.initVM(vmargs=['-Djava.awt.headless=true'])
fsDir = SimpleFSDirectory(File(indexDir))
#print fsDir
#Criando buscador baseado no diretorio dos indices passados pelo usuario
searcher = IndexSearcher(DirectoryReader.open(fsDir))
#Analizador para filtro dos tokens
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
#print analyzer
#Criando um QueryParser usando por padrao contents
#Variavel com as restricoes da busca
parser = QueryParser(Version.LUCENE_CURRENT, "keywords", analyzer)
#print parser
parser.setDefaultOperator(QueryParser.Operator.AND)
#print args
#Juntando parametros passados com o valor do mesmo
command = ' +'.join(args)
#print command
query = parser.parse(command)
print query
#Criando um JArray com resultado da consulta
return searcher.search(query, 200).scoreDocs
示例7: query
def query(self, data):
if self.fil.exists():
searcher = IndexSearcher(DirectoryReader.open(self.d))
query = QueryParser(
Version.LUCENE_30,
"id",
self.analyzer).parse(
data['query'])
hits = searcher.search(query, 100000)
results = {}
results['totalHits'] = hits.totalHits
results['hits'] = {}
for hit in hits.scoreDocs:
record = {}
doc = searcher.doc(hit.doc)
fields = doc.getFields()
record['score'] = hit.score
for field in fields:
if field.name() != "id":
record[field.name()] = field.stringValue()
results['hits'][doc.get('id')] = record
searcher.getIndexReader().close()
return results
示例8: __init__
def __init__(self, index_path, method, logger=None, use_default_similarity=False):
self.index_path=index_path
directory = SimpleFSDirectory(File(self.index_path))
self.analyzer = StandardAnalyzer(LuceneVersion.LUCENE_CURRENT)
self.reader=DirectoryReader.open(directory)
self.searcher = IndexSearcher(self.reader)
# uncomment one of these lines to change the type of parser, query and weight used
if use_default_similarity:
self.query_parser=QueryParser
else:
self.query_parser=FieldAgnosticQueryParser
if use_default_similarity:
similarity=DefaultSimilarity()
self.useExplainQuery=False
else:
similarity=FieldAgnosticSimilarity()
self.useExplainQuery=True
# by default, FieldAgnosticSimilarity uses coord factor, can be disabled
## similarity.useCoord=False
self.searcher.setSimilarity(similarity)
self.method=method # never used?
self.logger=logger
示例9: search
def search(self):
''' Searches the given query in the index '''
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
# print 'lucene', lucene.VERSION
# base_dir = os.path.dirname(os.path.abspath('.'))
base_dir = '.'
directory = SimpleFSDirectory(File(os.path.join(base_dir, self.index_dir)))
searcher = IndexSearcher(DirectoryReader.open(directory))
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
while True:
print
print "Hit enter with no input to quit."
command = raw_input("Query:")
if command == '':
return
print
print "Searching for:", command
query = QueryParser(Version.LUCENE_CURRENT, "title",
analyzer).parse(command)
scoreDocs = searcher.search(query, 50).scoreDocs
print "%s total matching documents." % len(scoreDocs)
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
# print 'path:', doc.get("path"), 'name:', doc.get("name")
print doc
示例10: search
def search():
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
args = []
if request.method == 'POST':
if request.form['ies']:
args.append('+ies:'+request.form['ies'])
if request.form['area']:
args.append('+area:'+request.form['area'])
if request.form['professor']:
args.append('+professor:'+request.form['professor'])
if request.form['conceito']:
#args.append('m:'+request.form['conceito']+'d:'+request.form['conceito']+'f:'+request.form['conceito'])
args.append('m:'+request.form['conceito'])
args.append('d:'+request.form['conceito'])
args.append('f:'+request.form['conceito'])
table = []
if(len(args) > 0):
scoreDocs = mansearch.buscar('indexer/',args)
fsDir = SimpleFSDirectory(File(indexDir))
searcher = IndexSearcher(DirectoryReader.open(fsDir))
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
table.append(dict((field.name(), field.stringValue()) for field in doc.getFields()))
return render_template('busca.html',table = table)
pass
示例11: config
def config():
base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
directory = SimpleFSDirectory(File(os.path.join(base_dir, INDEX_DIR)))
searcher = IndexSearcher(DirectoryReader.open(directory))
bm25Sim = BM25Similarity(2.0,0.75) #BM25 with these default values: k1 = 1.2, b = 0.75.
searcher.setSimilarity(bm25Sim)
analyzer = SmartChineseAnalyzer(Version.LUCENE_CURRENT)
return searcher,analyzer
示例12: search_docs
def search_docs(self, value, field="general_info"):
MAX_RESULTS = 1000
searcher = IndexSearcher(DirectoryReader.open(self.store))
query = QueryParser(Version.LUCENE_CURRENT, field,
self.analyzer).parse(value)
topDocs = searcher.search(query, MAX_RESULTS)
return [searcher.doc(hit.doc) for hit in topDocs.scoreDocs]
示例13: _maybeReopen
def _maybeReopen(self):
if len(self._latestModifications) > 10000:
newReader = DirectoryReader.openIfChanged(self._reader, self._writer, True)
if not newReader is None:
self._reader.close()
self._reader = newReader
self._searcher = IndexSearcher(self._reader)
self._latestModifications.clear()
示例14: __init__
def __init__(self,base_dir, index_dir,index_file,queryDict):
self.baseDir = base_dir
self.indexFile = os.path.join(index_dir,index_file)
lucene.initVM(vmargs=['-Djava.awt.headless=true']) # uncomment when run Retrieve separately
directory = SimpleFSDirectory(File(self.indexFile))
searcher = IndexSearcher(DirectoryReader.open(directory))
self.BM25(searcher,queryDict)
del searcher
示例15: build_corpus
def build_corpus(n=0):
sbcs = texeval_corpus.test_subcorpora
sbc = sbcs[n]
# Hack for parallelizing queries, uses one index per domain.
directory = FSDirectory.open(File(wiki_index+'-'+sbc))
searcher = IndexSearcher(DirectoryReader.open(directory))
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
build_corpus_from_terms_with_wiki(sbc, searcher, analyzer)