本文整理汇总了Python中org.apache.lucene.index.DirectoryReader.open方法的典型用法代码示例。如果您正苦于以下问题:Python DirectoryReader.open方法的具体用法?Python DirectoryReader.open怎么用?Python DirectoryReader.open使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.index.DirectoryReader
的用法示例。
在下文中一共展示了DirectoryReader.open方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: open_reader
# 需要导入模块: from org.apache.lucene.index import DirectoryReader [as 别名]
# 或者: from org.apache.lucene.index.DirectoryReader import open [as 别名]
def open_reader(self):
"""Open IndexReader."""
if self.reader is None:
if self.use_ram:
print "reading from ram directory ..."
self.reader = DirectoryReader.open(self.ram_dir)
else:
self.reader = DirectoryReader.open(self.dir)
示例2: search
# 需要导入模块: from org.apache.lucene.index import DirectoryReader [as 别名]
# 或者: from org.apache.lucene.index.DirectoryReader import open [as 别名]
def search(self):
''' Searches the given query in the index '''
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
# print 'lucene', lucene.VERSION
# base_dir = os.path.dirname(os.path.abspath('.'))
base_dir = '.'
directory = SimpleFSDirectory(File(os.path.join(base_dir, self.index_dir)))
searcher = IndexSearcher(DirectoryReader.open(directory))
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
while True:
print
print "Hit enter with no input to quit."
command = raw_input("Query:")
if command == '':
return
print
print "Searching for:", command
query = QueryParser(Version.LUCENE_CURRENT, "title",
analyzer).parse(command)
scoreDocs = searcher.search(query, 50).scoreDocs
print "%s total matching documents." % len(scoreDocs)
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
# print 'path:', doc.get("path"), 'name:', doc.get("name")
print doc
示例3: search
# 需要导入模块: from org.apache.lucene.index import DirectoryReader [as 别名]
# 或者: from org.apache.lucene.index.DirectoryReader import open [as 别名]
def search():
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
args = []
if request.method == 'POST':
if request.form['ies']:
args.append('+ies:'+request.form['ies'])
if request.form['area']:
args.append('+area:'+request.form['area'])
if request.form['professor']:
args.append('+professor:'+request.form['professor'])
if request.form['conceito']:
#args.append('m:'+request.form['conceito']+'d:'+request.form['conceito']+'f:'+request.form['conceito'])
args.append('m:'+request.form['conceito'])
args.append('d:'+request.form['conceito'])
args.append('f:'+request.form['conceito'])
table = []
if(len(args) > 0):
scoreDocs = mansearch.buscar('indexer/',args)
fsDir = SimpleFSDirectory(File(indexDir))
searcher = IndexSearcher(DirectoryReader.open(fsDir))
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
table.append(dict((field.name(), field.stringValue()) for field in doc.getFields()))
return render_template('busca.html',table = table)
pass
示例4: __init__
# 需要导入模块: from org.apache.lucene.index import DirectoryReader [as 别名]
# 或者: from org.apache.lucene.index.DirectoryReader import open [as 别名]
def __init__(self, db_path):
directory = SimpleFSDirectory(File(db_path))
reader = DirectoryReader.open(directory)
self.searcher = IndexSearcher(reader)
self.analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
logger.info("Loaded DB from %s with %d documents: ",
db_path, reader.numDocs())
示例5: __init__
# 需要导入模块: from org.apache.lucene.index import DirectoryReader [as 别名]
# 或者: from org.apache.lucene.index.DirectoryReader import open [as 别名]
def __init__ (self):
self.mDocumentDirectory = "/home/hnguyen/Projects/CLIFinder/operations.sub"
self.mIndexDirectory = "/home/hnguyen/Projects/CLIFinder/cli.index"
self.mIndexReader = None
if os.path.isdir(self.mIndexDirectory) and self.mIndexReader == None:
directory = SimpleFSDirectory(File(self.mIndexDirectory))
self.mIndexReader = DirectoryReader.open(directory)
############################### IndexingEngine Settings ######################################
self.mSimilarity = DecreaseLengthNormSimilarity()
self.mOpenMode = IndexWriterConfig.OpenMode.CREATE
##############################################################################################
self.mIsDebug = False
if self.mIsDebug:
############################### Setting up loggers ###########################################
self.mIndexingLogPath = "/home/hnguyen/Projects/CLIFinder/logs/indexing.log"
self.mSearchingLogPath = "/home/hnguyen/Projects/CLIFinder/logs/searching.log"
self.mIndexingLogger = LoggingEngine(self.mIndexingLogPath, "IndexingLogger", Queue.Queue())
self.mSearchingLogger = LoggingEngine(self.mSearchingLogPath, "SearchingLogger", Queue.Queue())
self.mIndexingLogger.start()
self.mSearchingLogger.start()
atexit.register(self.clear)
示例6: buscar
# 需要导入模块: from org.apache.lucene.index import DirectoryReader [as 别名]
# 或者: from org.apache.lucene.index.DirectoryReader import open [as 别名]
def buscar(indexDir, args,options = None):
#lucene.initVM(vmargs=['-Djava.awt.headless=true'])
fsDir = SimpleFSDirectory(File(indexDir))
#print fsDir
#Criando buscador baseado no diretorio dos indices passados pelo usuario
searcher = IndexSearcher(DirectoryReader.open(fsDir))
#Analizador para filtro dos tokens
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
#print analyzer
#Criando um QueryParser usando por padrao contents
#Variavel com as restricoes da busca
parser = QueryParser(Version.LUCENE_CURRENT, "keywords", analyzer)
#print parser
parser.setDefaultOperator(QueryParser.Operator.AND)
#print args
#Juntando parametros passados com o valor do mesmo
command = ' +'.join(args)
#print command
query = parser.parse(command)
print query
#Criando um JArray com resultado da consulta
return searcher.search(query, 200).scoreDocs
示例7: __init__
# 需要导入模块: from org.apache.lucene.index import DirectoryReader [as 别名]
# 或者: from org.apache.lucene.index.DirectoryReader import open [as 别名]
def __init__(self, index_path, method, logger=None, use_default_similarity=False):
self.index_path=index_path
directory = SimpleFSDirectory(File(self.index_path))
self.analyzer = StandardAnalyzer(LuceneVersion.LUCENE_CURRENT)
self.reader=DirectoryReader.open(directory)
self.searcher = IndexSearcher(self.reader)
# uncomment one of these lines to change the type of parser, query and weight used
if use_default_similarity:
self.query_parser=QueryParser
else:
self.query_parser=FieldAgnosticQueryParser
if use_default_similarity:
similarity=DefaultSimilarity()
self.useExplainQuery=False
else:
similarity=FieldAgnosticSimilarity()
self.useExplainQuery=True
# by default, FieldAgnosticSimilarity uses coord factor, can be disabled
## similarity.useCoord=False
self.searcher.setSimilarity(similarity)
self.method=method # never used?
self.logger=logger
示例8: search
# 需要导入模块: from org.apache.lucene.index import DirectoryReader [as 别名]
# 或者: from org.apache.lucene.index.DirectoryReader import open [as 别名]
def search(self, field, text):
"""
search text within indexed data
input:
field fieldname of the value that will be indexed
text text to search
output:
hits return a list of hits
"""
results = []
idx_reader = DirectoryReader.open(self.directory)
idx_searcher = IndexSearcher(idx_reader)
# parse query
parser = AnalyzingQueryParser(Version.LUCENE_CURRENT, field, self.analyser)
query = parser.parse(text)
# search
hits = idx_searcher.search(query, 1000).scoreDocs.tolist()
for hit in hits:
doc = idx_searcher.doc(hit.doc)
score = hit.score
title = doc.get(field)
url = doc.get("url")
results.append((score, url, title))
return results
示例9: query
# 需要导入模块: from org.apache.lucene.index import DirectoryReader [as 别名]
# 或者: from org.apache.lucene.index.DirectoryReader import open [as 别名]
def query(self, data):
if self.fil.exists():
searcher = IndexSearcher(DirectoryReader.open(self.d))
query = QueryParser(
Version.LUCENE_30,
"id",
self.analyzer).parse(
data['query'])
hits = searcher.search(query, 100000)
results = {}
results['totalHits'] = hits.totalHits
results['hits'] = {}
for hit in hits.scoreDocs:
record = {}
doc = searcher.doc(hit.doc)
fields = doc.getFields()
record['score'] = hit.score
for field in fields:
if field.name() != "id":
record[field.name()] = field.stringValue()
results['hits'][doc.get('id')] = record
searcher.getIndexReader().close()
return results
示例10: search
# 需要导入模块: from org.apache.lucene.index import DirectoryReader [as 别名]
# 或者: from org.apache.lucene.index.DirectoryReader import open [as 别名]
def search(self, input_query=None, max_answers=10):
''' Searches the given query in the index '''
if input_query is None:
return None
base_dir = '.'
directory = SimpleFSDirectory(File(os.path.join(base_dir, self.index_dir)))
searcher = IndexSearcher(DirectoryReader.open(directory))
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
# query = QueryParser(Version.LUCENE_CURRENT, "title", analyzer).parse(input_query)
parser = MultiFieldQueryParser(Version.LUCENE_CURRENT, (self._posts_fields + self._answer_fields), analyzer)
query = MultiFieldQueryParser.parse(parser, input_query)
scoreDocs = searcher.search(query, max_answers).scoreDocs
print "%s total matching documents." % len(scoreDocs)
docs = []
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
doc_dict = dict((field.name(), field.stringValue()) for field in doc.getFields())
docs.append(doc_dict)
# print doc
return docs
示例11: search_docs
# 需要导入模块: from org.apache.lucene.index import DirectoryReader [as 别名]
# 或者: from org.apache.lucene.index.DirectoryReader import open [as 别名]
def search_docs(self, value, field="general_info"):
MAX_RESULTS = 1000
searcher = IndexSearcher(DirectoryReader.open(self.store))
query = QueryParser(Version.LUCENE_CURRENT, field,
self.analyzer).parse(value)
topDocs = searcher.search(query, MAX_RESULTS)
return [searcher.doc(hit.doc) for hit in topDocs.scoreDocs]
示例12: __init__
# 需要导入模块: from org.apache.lucene.index import DirectoryReader [as 别名]
# 或者: from org.apache.lucene.index.DirectoryReader import open [as 别名]
def __init__(self,base_dir, index_dir,index_file,queryDict):
self.baseDir = base_dir
self.indexFile = os.path.join(index_dir,index_file)
lucene.initVM(vmargs=['-Djava.awt.headless=true']) # uncomment when run Retrieve separately
directory = SimpleFSDirectory(File(self.indexFile))
searcher = IndexSearcher(DirectoryReader.open(directory))
self.BM25(searcher,queryDict)
del searcher
示例13: build_corpus
# 需要导入模块: from org.apache.lucene.index import DirectoryReader [as 别名]
# 或者: from org.apache.lucene.index.DirectoryReader import open [as 别名]
def build_corpus(n=0):
sbcs = texeval_corpus.test_subcorpora
sbc = sbcs[n]
# Hack for parallelizing queries, uses one index per domain.
directory = FSDirectory.open(File(wiki_index+'-'+sbc))
searcher = IndexSearcher(DirectoryReader.open(directory))
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
build_corpus_from_terms_with_wiki(sbc, searcher, analyzer)
示例14: config
# 需要导入模块: from org.apache.lucene.index import DirectoryReader [as 别名]
# 或者: from org.apache.lucene.index.DirectoryReader import open [as 别名]
def config():
base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
directory = SimpleFSDirectory(File(os.path.join(base_dir, INDEX_DIR)))
searcher = IndexSearcher(DirectoryReader.open(directory))
bm25Sim = BM25Similarity(2.0,0.75) #BM25 with these default values: k1 = 1.2, b = 0.75.
searcher.setSimilarity(bm25Sim)
analyzer = SmartChineseAnalyzer(Version.LUCENE_CURRENT)
return searcher,analyzer
示例15: perform_search
# 需要导入模块: from org.apache.lucene.index import DirectoryReader [as 别名]
# 或者: from org.apache.lucene.index.DirectoryReader import open [as 别名]
def perform_search(self, searchterm, results_per_page, page):
# if there is a field in the searchterm
"""if ":" in searchterm:
# processing a query
parser = QueryParser(Version.LUCENE_CURRENT, "content", self.analyzer)
parser.setDefaultOperator(QueryParser.Operator.AND)
query = parser.parse(searchterm)
else:
query = BooleanQuery()
query_title = TermQuery(Term("title", searchterm))
query_description = TermQuery(Term("description", searchterm))
query_content = TermQuery(Term("content", searchterm))
# BooleanClause.Occur.MUST for AND queries
query.add(query_title, BooleanClause.Occur.SHOULD)
query.add(query_description, BooleanClause.Occur.SHOULD)
query.add(query_content, BooleanClause.Occur.SHOULD)"""
# create QueryParser for each field to be searched
parser_title = QueryParser(Version.LUCENE_CURRENT, "title", self.analyzer)
parser_description = QueryParser(Version.LUCENE_CURRENT, "description", self.analyzer)
parser_content = QueryParser(Version.LUCENE_CURRENT, "content", self.analyzer)
# put fields together
query = BooleanQuery()
query.add(parser_title.parse(searchterm), BooleanClause.Occur.SHOULD)
query.add(parser_description.parse(searchterm), BooleanClause.Occur.SHOULD)
query.add(parser_content.parse(searchterm), BooleanClause.Occur.SHOULD)
# conducting search
searcher = IndexSearcher(DirectoryReader.open(self.store))
start = datetime.now()
hits = searcher.search(query, results_per_page + (results_per_page * page))
score_docs = hits.scoreDocs
count_results = hits.totalHits
duration = datetime.now() - start
# results to return
results = []
count = 0
for scoreDoc in score_docs:
# skip offset
if count < results_per_page * page:
count += 1
continue
count += 1
doc = searcher.doc(scoreDoc.doc)
table = dict((field.name(), field.stringValue()) for field in doc.getFields())
results.append(table)
return results, duration, count_results