本文整理匯總了Python中lucene.QueryParser類的典型用法代碼示例。如果您正苦於以下問題:Python QueryParser類的具體用法?Python QueryParser怎麽用?Python QueryParser使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了QueryParser類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: searchXYPair
def searchXYPair(self,x,y):
"""
Returns all sentences, which are tagged with the given two entities (x,y)
"""
tmp_hm = {}
if x == "" or y == "":
return []
try:
array = re.findall(r'[\w\s]+',x)
x = ""
for item in array:
x+=item
qp = QueryParser(Version.LUCENE_35, "X", analyzer)
qp.setDefaultOperator(qp.Operator.AND)
query = qp.parse(x)
MAX = 100000
result_list = []
hits = searcher.search(query, MAX)
for hit in hits.scoreDocs:
doc = searcher.doc(hit.doc)
y_entry = doc["Y"]
if y_entry == y:
tmp_hm[doc["Sentence"]]=""
for key in tmp_hm:
result_list.append(IndexUtils.sentence_wrapper(key))
tmp_hm = {}
return result_list
except:
print("Fail (search XYPair) in x:"+x+" y:"+y)
print "Unexpected error:", sys.exc_info()[0]
print
return []
示例2: main
def main(cls, argv):
allBooks = MatchAllDocsQuery()
parser = QueryParser(Version.LUCENE_CURRENT, "contents",
StandardAnalyzer(Version.LUCENE_CURRENT))
query = BooleanQuery()
query.add(allBooks, BooleanClause.Occur.SHOULD)
query.add(parser.parse("java OR action"), BooleanClause.Occur.SHOULD)
indexDir = System.getProperty("index.dir")
directory = SimpleFSDirectory(File(indexDir))
example = SortingExample(directory)
example.displayResults(query, Sort.RELEVANCE)
example.displayResults(query, Sort.INDEXORDER)
example.displayResults(query,
Sort(SortField("category", SortField.STRING)))
example.displayResults(query,
Sort(SortField("pubmonth", SortField.INT, True)))
example.displayResults(query,
Sort([SortField("category", SortField.STRING),
SortField.FIELD_SCORE,
SortField("pubmonth", SortField.INT, True)]))
example.displayResults(query,
Sort([SortField.FIELD_SCORE,
SortField("category", SortField.STRING)]))
directory.close()
示例3: search
def search(r, keyword=""):
import logging
logger = logging.getLogger("search")
bench = Benchmark(logger)
from lucene import IndexSearcher, StandardAnalyzer, FSDirectory, QueryParser, File, Hit
import lucene, os
os.environ["JAVA_HOME"] = "/usr/local/jdk1.6.0_17"
lucene.initVM(lucene.CLASSPATH)
directory = FSDirectory.open(File(CONFIG.INDEX_PATH))
ROBOT_INDEX = IndexSearcher(directory, True)
ROBOT_ANALYZER = StandardAnalyzer()
keyword = keyword or r.GET["keyword"]
query = QueryParser("context", ROBOT_ANALYZER)
query = query.parse('"%s"' % keyword)
bench.start_mark("search")
hits = ROBOT_INDEX.search(query)
count = len(hits)
result = []
i = 0
for hit in hits:
i += 1
if i > 100:
break
doc = Hit.cast_(hit).getDocument()
result.append(SearchResult(doc, i, keyword))
ROBOT_INDEX.close()
et = bench.stop_mark()
return render_to_response("robot_search_result.html", {"result": result, "count": count, "elaspe": et})
示例4: searchForDbpediaURI
def searchForDbpediaURI(self, uri):
"""
Returns all anchor texts, which are related to the given DBpedia URI.
Also returns for each anchor text the corresponding URI and the number of how often the anchor appears on the english Wikipedia
"""
uri_old = uri
uri = uri.replace("http://dbpedia.org/resource/","")
array = re.findall(r'[\w\s]+',uri)
uri = ""
for item in array:
uri+=item
try:
qp = QueryParser(Version.LUCENE_35, "dbpedia_uri", analyzer)
qp.setDefaultOperator(qp.Operator.AND)
query = qp.parse(uri)
MAX = 10000
result = []
hits = searcher.search(query, MAX)
for hit in hits.scoreDocs:
doc = searcher.doc(hit.doc)
dbpedia_uri = doc["dbpedia_uri"].encode("utf-8")
if dbpedia_uri == uri_old:
result.append([doc["anchor"].encode("utf-8"), doc["anchor_uri"].encode("utf-8"), dbpedia_uri, doc["number"].encode("utf-8")])
return result
except:
print("searchForDbpediaURI - Fail in uri: "+uri)
return []
示例5: searchKey
def searchKey(self, key , rank = None):
query = ""
try:
MAX = 100000
qp = QueryParser(Version.LUCENE_35, "key", analyzer)
qp.setDefaultOperator(qp.Operator.AND)
query = qp.parse(key)
# print ("query",query)
hits = searcher.search(query, MAX)
sentence_list = []
for hit in hits.scoreDocs:
doc = searcher.doc(hit.doc)
try:
sentence_list.append(eval(doc.get("sentence").encode("utf-8")))
except:
print doc.get("sentence")
return sentence_list
except:
print("Fail in receiving sentence with term "+key)
print ("query",query)
print "Unexpected error:", sys.exc_info()[0]
# raw_input("wait")
print
return []
示例6: query
def query(indexName, queryString):
indSearcher = IndexSearcher(SimpleFSDirectory(File(indexName)))
qp = QueryParser(Version.LUCENE_CURRENT, "content", StandardAnalyzer(Version.LUCENE_CURRENT))
qp.setDefaultOperator(qp.Operator.AND)
query = qp.parse(queryString.replace("-","_"))
aux = indSearcher.search(query, 100)
results = aux.scoreDocs
hits = aux.totalHits
ir = indSearcher.getIndexReader()
#results = collector.topDocs()
i = 0
res = []
for r in results:
doc = ir.document(i)
res.insert(i, doc.get('id'))
i+=1
return res
示例7: testPrefixQuery
def testPrefixQuery(self):
parser = QueryParser(Version.LUCENE_CURRENT, "category",
StandardAnalyzer(Version.LUCENE_CURRENT))
parser.setLowercaseExpandedTerms(False)
print parser.parse("/Computers/technology*").toString("category")
示例8: search
def search(self, string ,special = None):
query = ""
try:
MAX = 100000
#for dates such as 1931.08.06
string = string.replace("."," ")
array = re.findall(r'[\w\s]+',string)
string = ""
for item in array:
string+=item
qp = QueryParser(Version.LUCENE_35, "title", analyzer)
qp.setDefaultOperator(qp.Operator.AND)
query = qp.parse(string)
# print ("query",query)
hits = searcher.search(query, MAX)
sentence_list = []
for hit in hits.scoreDocs:
doc = searcher.doc(hit.doc)
sentence_list.append(doc.get("title").encode("utf-8"))
return sentence_list
except:
print("Fail in receiving sentence with term "+string)
print ("query",query)
print "Unexpected error:", sys.exc_info()[0]
# raw_input("wait")
print
return []
示例9: searchForDbpediaURI
def searchForDbpediaURI(self, uri):
"""
Returns all sentences, which are tagged with the given DBpedia URI
"""
print "in searchForDbpediaURI"
uri_old = uri
uri = uri.replace("http://dbpedia.org/ontology/","")
uri = uri.replace("http://dbpedia.org/property/","")
uri = uri.replace("http://dbpedia.org/resource/","")
array = re.findall(r'[\w\s]+',uri)
uri = ""
for item in array:
uri+=item
try:
qp = QueryParser(Version.LUCENE_35, "URI", analyzer)
qp.setDefaultOperator(qp.Operator.AND)
query = qp.parse(uri)
print "query: "+str(query)
MAX = 500000
result = []
hits = searcher.search(query, MAX)
for hit in hits.scoreDocs:
doc = searcher.doc(hit.doc)
dbpedia_uri = doc["URI"]
if dbpedia_uri == uri_old:
result.append([IndexUtils.sentence_wrapper(doc["Sentence"]), doc["X"], doc["Y"],dbpedia_uri])
return result
except:
print("Fail in uri: "+uri)
print "Unexpected error:", sys.exc_info()[0]
return result
示例10: searchString
def searchString(self, string):
'searches for a string and returns an array of POS-tagged sentences'
query = ""
#print("Input String: ",string)
try:
MAX = 100000
#for dates such as 1931.08.06
string = string.replace("."," ")
array = re.findall(r'[\w\s]+',string)
string = ""
for item in array:
string+=item
#print("Input String2: ",string)
qp = QueryParser(Version.LUCENE_35, "sentence", analyzer)
qp.setDefaultOperator(qp.Operator.AND)
query = qp.parse(string)
#print ("query",query)
hits = searcher.search(query, MAX)
#print len(hits)
sentence_list = []
for hit in hits.scoreDocs:
doc = searcher.doc(hit.doc)
#print doc.get("sentence")
sentence_list.append(eval(doc.get("sentence").encode("utf-8")))
return sentence_list
except:
print("Fail in receiving sentence with term "+string+" in search term")
print ("query",query)
print "Unexpected error:", sys.exc_info()[0]
# raw_input("wait")
print
return []
示例11: does_line_existNew
def does_line_existNew(self,line,x,y):
"""
Checks, if parsed sentence already exists in index
"""
query = ""
try:
array = re.findall(r'[\w]+',line)
string = ""
for item in array:
string+=item+" "
qp = QueryParser(Version.LUCENE_35, "Sentence", analyzer)
qp.setDefaultOperator(qp.Operator.AND)
query = qp.parse(string)
MAX = 10
hits = searcher.search(query, MAX)
if len(hits.scoreDocs)>0:
return True
else:
return False
except Exception:
s_tmp = str(sys.exc_info())
if "too many boolean clauses" in s_tmp:
print "too many boolean clauses"
"""
Returns true, so that the sentence is not added each time, to avoid further error messages.
Only occours with very large sentences.
"""
return True
else:
print "Unexpected error:", sys.exc_info()[0]
print "in does line exist"
print s_tmp
return False
示例12: testPhraseQuery
def testPhraseQuery(self):
analyzer = StandardAnalyzer(Version.LUCENE_24)
q = QueryParser(Version.LUCENE_24, "field", analyzer).parse('"This is Some Phrase*"')
self.assertEqual('"some phrase"', q.toString("field"), "analyzed")
q = QueryParser(Version.LUCENE_CURRENT, "field", self.analyzer).parse('"term"')
self.assert_(TermQuery.instance_(q), "reduced to TermQuery")
示例13: main
def main(cls):
query = QueryParser(Version.LUCENE_CURRENT, "content",
cls.synonymAnalyzer).parse('"fox jumps"')
print "\"fox jumps\" parses to ", query.toString("content")
print "From AnalyzerUtils.tokensFromAnalysis: "
AnalyzerUtils.displayTokens(cls.synonymAnalyzer, "\"fox jumps\"")
print ''
示例14: __init__
def __init__(self, emoticon, searcher, analyzer, english_only=False):
super(PMICalculator, self).__init__()
self.field = "emoticons"
self.emoticon = emoticon
self.searcher = searcher
self.analyzer = analyzer
self.escaped_emoticon = QueryParser.escape(self.emoticon)
self.query = QueryParser("emoticons", self.analyzer).parse(self.escaped_emoticon)
self.raw_stats_dir = "/Volumes/TerraFirma/SharedData/vdb5/emoticons_raw_files/"
if english_only:
country = "United States"
country_prefix = "US"
else:
country = None
country_prefix = ""
self.pmi_file_name = (
self.raw_stats_dir
+ normalizeEmoticonName(self.emoticon).rstrip("_")
+ ("_%s" % (country_prefix)) * english_only
+ ".pmidata"
)
self.sample_tweets_name = (
self.raw_stats_dir
+ normalizeEmoticonName(self.emoticon).rstrip("_")
+ ("_%s" % (country_prefix)) * english_only
+ ".samptweets"
)
self.sample_tweets_file = codecs.open(self.sample_tweets_name, encoding="utf-8", mode="w")
self.term_count_collector = TermCountCollector(searcher, emoticon, country)
print "starting query at: ", time.time()
hits = self.searcher.search(self.query, self.term_count_collector)
# print "terms: ", self.terms
if emoticon == ":P":
ee_two = QueryParser.escape(":p")
elif emoticon == "T_T":
ee_two = QueryParser.escape("TT")
elif emoticon == "^_^":
ee_two = QueryParser.escape("^^")
if emoticon in [":P", "T_T", "^_^"]:
q_two = QueryParser("emoticons", self.analyzer).parse(ee_two)
hits_two = self.searcher.search(q_two, self.term_count_collector)
self.terms = self.term_count_collector.getTerms()
self.query_result_count = self.term_count_collector.getDocCount()
for p_term, p_term_tweets in self.term_count_collector.popular_terms_hash.items():
for p_term_tweet in p_term_tweets:
self.sample_tweets_file.write("term: " + p_term + " tweet: " + p_term_tweet + "\n")
self.sample_tweets_file.close()
self.base_stats_file = open(
"/Volumes/TerraFirma/SharedData/vdb5/emoticons_raw_files/emoticon_pmi_stats.txt", "r"
)
self.n = int(self.base_stats_file.read().strip().split(":")[1])
print "computing PMI for query: ", self.emoticon, " at: ", time.time()
self.p_query_result = self.query_result_count * 1.0 / self.n
示例15: extractFeatureQueryWords
def extractFeatureQueryWords(query):
import string
from lucene import Document, TermQuery, Term
# create analyzer
aux_analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
try:
file = open('../features.txt', 'r')
featurelist = []
for line in file.readlines():
words_in_line = line.split()
featurelist += words_in_line
querywordlist = query.split()
featureQueryList = []
productQueryList = []
for word in querywordlist:
if word in featurelist:
featureQueryList.append(word)
else:
# create parser for word
aux_parser = QueryParser(Version.LUCENE_CURRENT, "title", aux_analyzer)
aux_query = aux_parser.parse(word)
scoreDocs = searcher.search(aux_query, 50).scoreDocs
if scoreDocs:
productQueryList.append(word)
featureQuery = ""
if featureQueryList:
featureQuery = "("
for i in range(len(featureQueryList)):
if i == len(featureQueryList) - 1:
featureQuery += featureQueryList[i] + ")"
else:
featureQuery += featureQueryList[i] + " AND "
print featureQuery
productQuery = ""
if productQueryList:
productQuery = "("
for i in range(len(productQueryList)):
if i == len(productQueryList) - 1:
productQuery += productQueryList[i] + ")"
else:
productQuery += productQueryList[i] + " AND "
return (featureQuery, productQuery, featureQueryList, productQueryList)
except Exception, ex:
print "Could not separate feature query words. Reason: ", ex
return ("", "(" + query + ")", [], querywordlist)