本文整理汇总了Python中lucene.QueryParser.setDefaultOperator方法的典型用法代码示例。如果您正苦于以下问题:Python QueryParser.setDefaultOperator方法的具体用法?Python QueryParser.setDefaultOperator怎么用?Python QueryParser.setDefaultOperator使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lucene.QueryParser
的用法示例。
在下文中一共展示了QueryParser.setDefaultOperator方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: searchXYPair
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import setDefaultOperator [as 别名]
def searchXYPair(self,x,y):
"""
Returns all sentences, which are tagged with the given two entities (x,y)
"""
tmp_hm = {}
if x == "" or y == "":
return []
try:
array = re.findall(r'[\w\s]+',x)
x = ""
for item in array:
x+=item
qp = QueryParser(Version.LUCENE_35, "X", analyzer)
qp.setDefaultOperator(qp.Operator.AND)
query = qp.parse(x)
MAX = 100000
result_list = []
hits = searcher.search(query, MAX)
for hit in hits.scoreDocs:
doc = searcher.doc(hit.doc)
y_entry = doc["Y"]
if y_entry == y:
tmp_hm[doc["Sentence"]]=""
for key in tmp_hm:
result_list.append(IndexUtils.sentence_wrapper(key))
tmp_hm = {}
return result_list
except:
print("Fail (search XYPair) in x:"+x+" y:"+y)
print "Unexpected error:", sys.exc_info()[0]
print
return []
示例2: searchKey
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import setDefaultOperator [as 别名]
def searchKey(self, key , rank = None):
query = ""
try:
MAX = 100000
qp = QueryParser(Version.LUCENE_35, "key", analyzer)
qp.setDefaultOperator(qp.Operator.AND)
query = qp.parse(key)
# print ("query",query)
hits = searcher.search(query, MAX)
sentence_list = []
for hit in hits.scoreDocs:
doc = searcher.doc(hit.doc)
try:
sentence_list.append(eval(doc.get("sentence").encode("utf-8")))
except:
print doc.get("sentence")
return sentence_list
except:
print("Fail in receiving sentence with term "+key)
print ("query",query)
print "Unexpected error:", sys.exc_info()[0]
# raw_input("wait")
print
return []
示例3: does_line_existNew
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import setDefaultOperator [as 别名]
def does_line_existNew(self,line,x,y):
"""
Checks, if parsed sentence already exists in index
"""
query = ""
try:
array = re.findall(r'[\w]+',line)
string = ""
for item in array:
string+=item+" "
qp = QueryParser(Version.LUCENE_35, "Sentence", analyzer)
qp.setDefaultOperator(qp.Operator.AND)
query = qp.parse(string)
MAX = 10
hits = searcher.search(query, MAX)
if len(hits.scoreDocs)>0:
return True
else:
return False
except Exception:
s_tmp = str(sys.exc_info())
if "too many boolean clauses" in s_tmp:
print "too many boolean clauses"
"""
Returns true, so that the sentence is not added each time, to avoid further error messages.
Only occours with very large sentences.
"""
return True
else:
print "Unexpected error:", sys.exc_info()[0]
print "in does line exist"
print s_tmp
return False
示例4: query
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import setDefaultOperator [as 别名]
def query(indexName, queryString):
indSearcher = IndexSearcher(SimpleFSDirectory(File(indexName)))
qp = QueryParser(Version.LUCENE_CURRENT, "content", StandardAnalyzer(Version.LUCENE_CURRENT))
qp.setDefaultOperator(qp.Operator.AND)
query = qp.parse(queryString.replace("-","_"))
aux = indSearcher.search(query, 100)
results = aux.scoreDocs
hits = aux.totalHits
ir = indSearcher.getIndexReader()
#results = collector.topDocs()
i = 0
res = []
for r in results:
doc = ir.document(i)
res.insert(i, doc.get('id'))
i+=1
return res
示例5: searchString
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import setDefaultOperator [as 别名]
def searchString(self, string):
'searches for a string and returns an array of POS-tagged sentences'
query = ""
#print("Input String: ",string)
try:
MAX = 100000
#for dates such as 1931.08.06
string = string.replace("."," ")
array = re.findall(r'[\w\s]+',string)
string = ""
for item in array:
string+=item
#print("Input String2: ",string)
qp = QueryParser(Version.LUCENE_35, "sentence", analyzer)
qp.setDefaultOperator(qp.Operator.AND)
query = qp.parse(string)
#print ("query",query)
hits = searcher.search(query, MAX)
#print len(hits)
sentence_list = []
for hit in hits.scoreDocs:
doc = searcher.doc(hit.doc)
#print doc.get("sentence")
sentence_list.append(eval(doc.get("sentence").encode("utf-8")))
return sentence_list
except:
print("Fail in receiving sentence with term "+string+" in search term")
print ("query",query)
print "Unexpected error:", sys.exc_info()[0]
# raw_input("wait")
print
return []
示例6: search
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import setDefaultOperator [as 别名]
def search(self, string ,special = None):
query = ""
try:
MAX = 100000
#for dates such as 1931.08.06
string = string.replace("."," ")
array = re.findall(r'[\w\s]+',string)
string = ""
for item in array:
string+=item
qp = QueryParser(Version.LUCENE_35, "title", analyzer)
qp.setDefaultOperator(qp.Operator.AND)
query = qp.parse(string)
# print ("query",query)
hits = searcher.search(query, MAX)
sentence_list = []
for hit in hits.scoreDocs:
doc = searcher.doc(hit.doc)
sentence_list.append(doc.get("title").encode("utf-8"))
return sentence_list
except:
print("Fail in receiving sentence with term "+string)
print ("query",query)
print "Unexpected error:", sys.exc_info()[0]
# raw_input("wait")
print
return []
示例7: searchForDbpediaURI
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import setDefaultOperator [as 别名]
def searchForDbpediaURI(self, uri):
"""
Returns all sentences, which are tagged with the given DBpedia URI
"""
print "in searchForDbpediaURI"
uri_old = uri
uri = uri.replace("http://dbpedia.org/ontology/","")
uri = uri.replace("http://dbpedia.org/property/","")
uri = uri.replace("http://dbpedia.org/resource/","")
array = re.findall(r'[\w\s]+',uri)
uri = ""
for item in array:
uri+=item
try:
qp = QueryParser(Version.LUCENE_35, "URI", analyzer)
qp.setDefaultOperator(qp.Operator.AND)
query = qp.parse(uri)
print "query: "+str(query)
MAX = 500000
result = []
hits = searcher.search(query, MAX)
for hit in hits.scoreDocs:
doc = searcher.doc(hit.doc)
dbpedia_uri = doc["URI"]
if dbpedia_uri == uri_old:
result.append([IndexUtils.sentence_wrapper(doc["Sentence"]), doc["X"], doc["Y"],dbpedia_uri])
return result
except:
print("Fail in uri: "+uri)
print "Unexpected error:", sys.exc_info()[0]
return result
示例8: searchForDbpediaURI
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import setDefaultOperator [as 别名]
def searchForDbpediaURI(self, uri):
"""
Returns all anchor texts, which are related to the given DBpedia URI.
Also returns for each anchor text the corresponding URI and the number of how often the anchor appears on the english Wikipedia
"""
uri_old = uri
uri = uri.replace("http://dbpedia.org/resource/","")
array = re.findall(r'[\w\s]+',uri)
uri = ""
for item in array:
uri+=item
try:
qp = QueryParser(Version.LUCENE_35, "dbpedia_uri", analyzer)
qp.setDefaultOperator(qp.Operator.AND)
query = qp.parse(uri)
MAX = 10000
result = []
hits = searcher.search(query, MAX)
for hit in hits.scoreDocs:
doc = searcher.doc(hit.doc)
dbpedia_uri = doc["dbpedia_uri"].encode("utf-8")
if dbpedia_uri == uri_old:
result.append([doc["anchor"].encode("utf-8"), doc["anchor_uri"].encode("utf-8"), dbpedia_uri, doc["number"].encode("utf-8")])
return result
except:
print("searchForDbpediaURI - Fail in uri: "+uri)
return []
示例9: getResultScoreDocs
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import setDefaultOperator [as 别名]
def getResultScoreDocs(query):
# create analyzer
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
# create parser for user submitted query
parser = QueryParser(Version.LUCENE_CURRENT, "title", analyzer)
parser.setDefaultOperator(QueryParser.Operator.AND)
formatted_query = parser.parse(query)
scoreDocs = searcher.search(formatted_query, 50).scoreDocs
return scoreDocs
示例10: does_line_exist
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import setDefaultOperator [as 别名]
def does_line_exist(self,line,x,y):
"""
Old, more complex function if a sentence already exists in the index.
Not used in the moment
"""
return self.does_line_existNew(line, x, y)
try:
array = re.findall(r'[\w\s]+',x)
x = ""
for item in array:
x+=item
qp = QueryParser(Version.LUCENE_35, "X", analyzer)
qp.setDefaultOperator(qp.Operator.AND)
query = qp.parse(x)
MAX = 100000
hits = searcher.search(query, MAX)
#First check, if an x already exists
for hit in hits.scoreDocs:
doc = searcher.doc(hit.doc)
y_entry = doc["Y"]
if y_entry == y:
print "y found"
print
try:
array = re.findall(r'[\w\s]+',line)
string = ""
for item in array:
string+=item
qp = QueryParser(Version.LUCENE_35, "Sentence", analyzer)
qp.setDefaultOperator(qp.Operator.AND)
query = qp.parse(string)
MAX = 10
hits = searcher.search(query, MAX)
if len(hits.scoreDocs)>0:
return True
except Exception:
s_tmp = str(sys.exc_info())
if "too many boolean clauses" in s_tmp:
print "too many boolean clauses"
return True
else:
print "Unexpected error:", sys.exc_info()[0]
print "in does line exist"
print s_tmp
print 'nothing found'
return False
except:
print("Fail (does line exists) in x:"+x+" y:"+y)
print "Unexpected error:", sys.exc_info()[0]
print
示例11: run
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import setDefaultOperator [as 别名]
def run(command):
if command == '':
return None
STORE_DIR = "index"
initVM(CLASSPATH)
directory = FSDirectory.getDirectory(STORE_DIR, False)
searcher = IndexSearcher(directory)
analyzer = StandardAnalyzer()
parser = QueryParser("contents", analyzer)
parser.setDefaultOperator(QueryParser.Operator.AND)
parser.setFuzzyMinSim(0.2)
query = parser.parse(command)
hits = map(transform, searcher.search(query))
searcher.close()
return hits
示例12: searchForDbpediaURImax
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import setDefaultOperator [as 别名]
def searchForDbpediaURImax(self, uri, number):
"""
Returns maximal the number of anchor texts, which are related to the given DBpedia URI.
Also returns for each anchor text the corresponding URI and the number of how often the anchor appears on the English Wikipedia
"""
uri_old = uri
uri = uri.replace("http://dbpedia.org/resource/","")
array = re.findall(r'[\w\s]+',uri)
uri = ""
for item in array:
uri+=item
try:
qp = QueryParser(Version.LUCENE_35, "dbpedia_uri", analyzer)
qp.setDefaultOperator(qp.Operator.AND)
query = qp.parse(uri)
MAX = 10000
result = []
hits = searcher.search(query, MAX)
for hit in hits.scoreDocs:
doc = searcher.doc(hit.doc)
dbpedia_uri = doc["dbpedia_uri"].encode("utf-8")
if dbpedia_uri == uri_old:
result.append([doc["anchor"].encode("utf-8"), doc["anchor_uri"].encode("utf-8"), dbpedia_uri, int(doc["number"].encode("utf-8"))])
result = sorted(result, key = itemgetter(3), reverse=True)
if len(result) > number:
return result[0:number]
else:
return result
return result
except:
print("searchForDbpediaURImax - Fail in uri: "+uri)
print "Unexpected error:", sys.exc_info()[0]
# raise
print
return []
示例13: test_search
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import setDefaultOperator [as 别名]
def test_search(index_dir):
'''
The test function to test the created index
'''
store = SimpleFSDirectory(File(index_dir))
searcher = IndexSearcher(store, True)
parser = QueryParser(Version.LUCENE_CURRENT, "keywords", STD_ANALYZER)
parser.setDefaultOperator(QueryParser.Operator.AND)
query = parser.parse('email_subject:Training')
start = datetime.datetime.now()
scoreDocs = searcher.search(query, 50).scoreDocs
duration = datetime.datetime.now() - start
print "Found %d document(s) (in %s) that matched query '%s':" %(len(scoreDocs), duration, query)
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
print scoreDoc.score
table = dict((field.name(), field.stringValue())
for field in doc.getFields())
print table
示例14: search
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import setDefaultOperator [as 别名]
def search(self, query,category_id=None):
SHOULD = BooleanClause.Occur.SHOULD
#MultiFieldQueryParser.setOperator(QueryParser.DEFAULT_OPERATOR_AND);
parser1 = QueryParser('summary',self.analyzer)
parser2 = QueryParser('title',self.analyzer)
parser1.setDefaultOperator(QueryParser.AND_OPERATOR)
parser2.setDefaultOperator(QueryParser.AND_OPERATOR)
q1 = parser1.parse(query)
q2 = parser2.parse(query)
boolQuery = BooleanQuery()
boolQuery.add(q1,SHOULD)
boolQuery.add(q2,SHOULD)
#camp = CategoryComparatorSource(query)
#sortfield = SortField("link", camp)
#sort = Sort(sortfield)
if category_id:
self.catfilter.query = query
self.catfilter.category_id = category_id
hits = self.searcher.search(boolQuery,self.catfilter)
else:
hits = self.searcher.search(boolQuery)
return hits
示例15: similar
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import setDefaultOperator [as 别名]
def similar(command, docno):
STORE_DIR = "index"
initVM(CLASSPATH)
directory = FSDirectory.getDirectory(STORE_DIR, False)
searcher = IndexSearcher(directory)
analyzer = StandardAnalyzer()
parser = QueryParser("contents", analyzer)
parser.setDefaultOperator(QueryParser.Operator.AND)
parser.setFuzzyMinSim(0.2)
query = parser.parse(command)
hits = searcher.search(query)
document = hits.id(docno)
ir = IndexReader.open(STORE_DIR)
mlt = MoreLikeThis(ir)
mlt.setFieldNames(['name', 'contents'])
mlt.setMinWordLen(2)
mlt.setBoost(True)
query = mlt.like(document)
hits = map(transform, searcher.search(query))
searcher.close()
return hits