本文整理汇总了Python中lucene.QueryParser.escape方法的典型用法代码示例。如果您正苦于以下问题:Python QueryParser.escape方法的具体用法?Python QueryParser.escape怎么用?Python QueryParser.escape使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lucene.QueryParser
的用法示例。
在下文中一共展示了QueryParser.escape方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import escape [as 别名]
def __init__(self, emoticon, searcher, analyzer, english_only=False):
super(PMICalculator, self).__init__()
self.field = "emoticons"
self.emoticon = emoticon
self.searcher = searcher
self.analyzer = analyzer
self.escaped_emoticon = QueryParser.escape(self.emoticon)
self.query = QueryParser("emoticons", self.analyzer).parse(self.escaped_emoticon)
self.raw_stats_dir = "/Volumes/TerraFirma/SharedData/vdb5/emoticons_raw_files/"
if english_only:
country = "United States"
country_prefix = "US"
else:
country = None
country_prefix = ""
self.pmi_file_name = (
self.raw_stats_dir
+ normalizeEmoticonName(self.emoticon).rstrip("_")
+ ("_%s" % (country_prefix)) * english_only
+ ".pmidata"
)
self.sample_tweets_name = (
self.raw_stats_dir
+ normalizeEmoticonName(self.emoticon).rstrip("_")
+ ("_%s" % (country_prefix)) * english_only
+ ".samptweets"
)
self.sample_tweets_file = codecs.open(self.sample_tweets_name, encoding="utf-8", mode="w")
self.term_count_collector = TermCountCollector(searcher, emoticon, country)
print "starting query at: ", time.time()
hits = self.searcher.search(self.query, self.term_count_collector)
# print "terms: ", self.terms
if emoticon == ":P":
ee_two = QueryParser.escape(":p")
elif emoticon == "T_T":
ee_two = QueryParser.escape("TT")
elif emoticon == "^_^":
ee_two = QueryParser.escape("^^")
if emoticon in [":P", "T_T", "^_^"]:
q_two = QueryParser("emoticons", self.analyzer).parse(ee_two)
hits_two = self.searcher.search(q_two, self.term_count_collector)
self.terms = self.term_count_collector.getTerms()
self.query_result_count = self.term_count_collector.getDocCount()
for p_term, p_term_tweets in self.term_count_collector.popular_terms_hash.items():
for p_term_tweet in p_term_tweets:
self.sample_tweets_file.write("term: " + p_term + " tweet: " + p_term_tweet + "\n")
self.sample_tweets_file.close()
self.base_stats_file = open(
"/Volumes/TerraFirma/SharedData/vdb5/emoticons_raw_files/emoticon_pmi_stats.txt", "r"
)
self.n = int(self.base_stats_file.read().strip().split(":")[1])
print "computing PMI for query: ", self.emoticon, " at: ", time.time()
self.p_query_result = self.query_result_count * 1.0 / self.n
示例2: run
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import escape [as 别名]
def run(searcher, analyzer):
while True:
print
print "Hit enter with no input to quit."
command = raw_input("Query:")
if command == '':
return
print
print "Searching for:", command
parsed_command = QueryParser.escape(command)
query = QueryParser("text", analyzer).parse(parsed_command)
hits = searcher.search(query)
print "%s total matching documents." % hits.length()
try:
hctr = 0
for hit in hits:
hit_id = hits.id(hctr),
hit_tv = searcher.getIndexReader().getTermFreqVector(hits.id(hctr), "text")
trm_str = ""
for trm in hit_tv.getTerms(): trm_str += " " + trm
print "term string: ", trm_str.encode("ascii","ignore")
hctr += 1
if hctr > hits.length()-2 or hctr > 100: break
print 'uid:', hit.get("user_id"), 'timestamp: ', hit.get("timestamp"), "country: ", hit.get('country'), "emoticons: ", hit.get('emoticons')
except Exception, e:
print "failed to list hit: ", e
print
command = raw_input("Query:")
parsed_command = QueryParser.escape(command)
print "Searching for emoticon:", parsed_command
query = QueryParser("emoticons", analyzer).parse(parsed_command)
hits = searcher.search(query)
print "%s total matching documents." % hits.length()
try:
hctr = 0
for hit in hits:
hit_id = hits.id(hctr),
hit_tv = searcher.getIndexReader().getTermFreqVector(hits.id(hctr), "text")
trm_str = ""
for trm in hit_tv.getTerms(): trm_str += " " + trm
print "term string: ", trm_str.encode("ascii","ignore")
hctr += 1
if hctr > hits.length()-2 or hctr > 100: break
print 'uid:', hit.get("user_id"), 'timestamp: ', hit.get("timestamp"), "country: ", hit.get('country'), "emoticons: ", hit.get('emoticons')
except Exception, e:
print "failed to list hit: ", e
示例3: calculateEmoticonDiffusion
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import escape [as 别名]
def calculateEmoticonDiffusion(emoticon, searcher, analyzer, user_location_hash, usage_threshold = 1, comm_threshold = 1):
raw_stats_dir = "/Volumes/TerraFirma/SharedData/vdb5/emoticons_raw_files/"
emoticon_stats_file = open("/Volumes/TerraFirma/SharedData/vdb5/emoticons_raw_files/emoticon_diffusion_stats.txt","r")
total_users = int(emoticon_stats_file.read().strip())
emoticon_stats_file.close()
emoticon_file_name = raw_stats_dir + normalizeEmoticonName(emoticon).rstrip('_')+".diffusion_bidir"
print "Calculating Diffusion for: ", emoticon, " at: ", time.time()
escaped_emoticon = QueryParser.escape(emoticon)
query = QueryParser("emoticons", analyzer).parse(escaped_emoticon)
hits = searcher.search(query)
print "%s total matching documents." % hits.length()
if hits.length() == 0: return
print "compiling diffusion stats at: ", time.time()
emoticon_users_by_time_hash = {}
emoticon_users_adopters_hash = {}
emoticon_users_non_adopters_hash = {}
users_exposure_hash = {}
reverse_users_exposure_hash = {}
try:
hctr = 0
for hit in hits:
hctr += 1
if hctr%100000==0: print "on hit: ", hctr
#if hctr > 100000: break
if hctr == hits.length(): break
uid, timestamp, country, emoticons, user_id_replied = hit.get("user_id"), int(hit.get("timestamp")), hit.get('country'), hit.get('emoticons'), hit.get('user_id_replied')
emoticon_users_by_time_hash[uid] = emoticon_users_by_time_hash.get(uid,[])+[timestamp]
except Exception, e:
pass
示例4: handle
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import escape [as 别名]
def handle(self):
# self.request is the TCP socket connected to the client
# self.rfile is a file-like object created by the handler;
# we can now use e.g. readline() instead of raw recv() calls
self.data = self.request.recv(1024).strip()
# print "{} wrote:".format(self.client_address[0])
# print self.data
# just send back the same data, but upper-cased
MAX = 50
analyzer = StandardAnalyzer(Version.LUCENE_34)
self.data = QueryParser.escape(self.data)
query = QueryParser(Version.LUCENE_34, "contents", analyzer).parse(self.data)
hits = searcher.search(query, MAX)
if settings.DEBUG:
print "Found %d document(s) that matched query '%s':" % (hits.totalHits, query)
serialized = self.serialize(hits)
self.request.send(serialized)
示例5: getEmoticonPropagationCurves
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import escape [as 别名]
def getEmoticonPropagationCurves(emoticon, searcher, analyzer):
raw_stats_dir = "/Volumes/TerraFirma/SharedData/vdb5/emoticons_raw_files/"
emoticon_file_name = raw_stats_dir + normalizeEmoticonName(emoticon).rstrip('_')+".timehash"
emoticon_stats_file = open("/Volumes/TerraFirma/SharedData/vdb5/emoticons_raw_files/emoticon_stats.json","r")
emoticon_stats_hash = json.loads(emoticon_stats_file.read())
print "Searching for: ", emoticon, " at: ", time.time()
escaped_emoticon = QueryParser.escape(emoticon)
query = QueryParser("emoticons", analyzer).parse(escaped_emoticon)
hits = searcher.search(query)
print "%s total matching documents." % hits.length()
if hits.length() == 0: return
print " compiling propagation curve at: ", time.time()
emoticon_propagation_hash = {}
countryset = set()
daytshash = {}
try:
hctr = 0
for hit in hits:
hctr += 1
if hctr%100000==0: print "on hit: ", hctr
if hctr == hits.length(): break
uid, timestamp, country, emoticons, user_id_replied = hit.get("user_id"), hit.get("timestamp"), hit.get('country'), hit.get('emoticons'), hit.get('user_id_replied')
num_replies = int(user_id_replied != '0')
countryset.add(country)
timestruct = time.gmtime(int(timestamp))
daysincestart = (timestruct[0]-2005)*365+timestruct[7]
daystartts = int(timestamp)-60*60*timestruct[3]-60*timestruct[4]-timestruct[5]
nextdaystartts = daystartts+86400
daytshash[daystartts] = {'days since start':daysincestart, 'next day ts':nextdaystartts}
total_emoticon_count = string.count(emoticons, emoticon)
if daysincestart in emoticon_propagation_hash:
#emoticon_propagation_hash[daysincestart]['total'] += total_emoticon_count
emoticon_propagation_hash[daysincestart]['total'] += 1
#emoticon_propagation_hash[daysincestart][country] = emoticon_propagation_hash[daysincestart].get(country,0) + total_emoticon_count
emoticon_propagation_hash[daysincestart][country] = emoticon_propagation_hash[daysincestart].get(country,0) + 1
emoticon_propagation_hash[daysincestart]['total_in_replies'] += num_replies
else:
emoticon_propagation_hash[daysincestart] = {'total':total_emoticon_count, 'total_in_replies':num_replies, country:total_emoticon_count, \
'total tweets':0, 'total emoticon tweets':0, 'total http emoticons':0}
except Exception, e:
print "failed to list hit: ", e
示例6: getBaselineStatistics
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import escape [as 别名]
def getBaselineStatistics(searcher, analyzer):
baseline_stats_hash = {}
day_one = time.strptime("01 01 2005", "%d %m %Y")
day_one_ts = int(time.mktime(day_one))
max_day_ctr = 1830
day_ctr = 0
while day_ctr < max_day_ctr:
if day_ctr%100 == 0: print "on day ctr: ", day_ctr, " at time: ", time.time()
curr_day_ts = day_one_ts + 86400*day_ctr
next_day_ts = day_one_ts + 86400*(day_ctr+1)
day_ctr+=1
range_filter = NumericRangeFilter.newIntRange("timestamp", Integer(curr_day_ts), Integer(next_day_ts), True, True)
#all tweets in day range
all_docs_query = MatchAllDocsQuery()
tweets_in_range_search = searcher.search(all_docs_query, range_filter)
num_tweets_in_range = tweets_in_range_search.length()
#all tweets in day range US
US_tweets_base_query = MatchAllDocsQuery()
#us_escape_one = QueryParser("country", analyzer).escape("United")
#us_escape_two =
us_query = TermQuery(Term("country", "United States"))
#us_query.add(Term("country","United"))
#us_query.add(Term("country","States"))
US_tweets_country_query = us_query
#US_tweets_country_query = QueryParser("country", analyzer).parse(us_query)
US_tweets_query_filter = QueryFilter(US_tweets_country_query)
compound_filter_US_tweets = BooleanFilter()
compound_filter_US_tweets.add(FilterClause(range_filter, BooleanClause.Occur.MUST))
compound_filter_US_tweets.add(FilterClause(US_tweets_query_filter, BooleanClause.Occur.MUST))
US_tweets_in_range_search = searcher.search(US_tweets_base_query, compound_filter_US_tweets)
num_US_tweets_in_range = US_tweets_in_range_search.length()
#all tweets in day range japan
JP_tweets_base_query = MatchAllDocsQuery()
JP_tweets_country_query = QueryParser("country", analyzer).parse("Japan")
JP_tweets_query_filter = QueryFilter(JP_tweets_country_query)
compound_filter_JP_tweets = BooleanFilter()
compound_filter_JP_tweets.add(FilterClause(range_filter, BooleanClause.Occur.MUST))
compound_filter_JP_tweets.add(FilterClause(JP_tweets_query_filter, BooleanClause.Occur.MUST))
JP_tweets_in_range_search = searcher.search(JP_tweets_base_query, compound_filter_JP_tweets)
num_JP_tweets_in_range = JP_tweets_in_range_search.length()
#day_ctr%10 == 0: print "US tweets: ", num_US_tweets_in_range, " JP tweets: ", num_JP_tweets_in_range
#all tweets containing emoticons
empty_term = Term("emoticons")
empty_term_prefix = PrefixQuery(empty_term)
all_emoticons_docs_query_filter = QueryFilter(empty_term_prefix)
compound_filter = BooleanFilter()
compound_filter.add(FilterClause(range_filter, BooleanClause.Occur.MUST))
compound_filter.add(FilterClause(all_emoticons_docs_query_filter, BooleanClause.Occur.MUST))
emoticon_tweets_in_range_search = searcher.search(all_docs_query, compound_filter)
num_emoticon_tweets_in_range = emoticon_tweets_in_range_search.length()
#all tweets containing "http" or "https"
bq = BooleanQuery()
http_str = QueryParser.escape("http://")
http_query = QueryParser("emoticons", analyzer).parse(http_str)
https_str = QueryParser.escape("https://")
https_query = QueryParser("emoticons", analyzer).parse(https_str)
bq.add(http_query, BooleanClause.Occur.SHOULD)
bq.add(https_query, BooleanClause.Occur.SHOULD)
bq_search = searcher.search(bq, range_filter)
num_http_emoticons = bq_search.length()
baseline_stats_hash[day_ctr] = {'total tweets':num_tweets_in_range, 'emoticons':num_emoticon_tweets_in_range, 'http':num_http_emoticons, 'US tweets':num_US_tweets_in_range, \
'JP tweets':num_JP_tweets_in_range}
baseline_stats_text_file = open("/Volumes/TerraFirma/SharedData/vdb5/emoticons_raw_files/emoticon_stats.txt","w")
raw_stats_list = sorted(baseline_stats_hash.items(), key = lambda x: int(x[0]))
baseline_stats_text_file.write("day total emoticons http US JP\n")
for rs in raw_stats_list: baseline_stats_text_file.write("%s %s %s %s %s %s\n" %(rs[0], rs[1]["total tweets"], rs[1]["emoticons"], rs[1]["http"], rs[1]['US tweets'], \
rs[1]['JP tweets']))
baseline_stats_text_file.close()
baseline_stats_file = open("/Volumes/TerraFirma/SharedData/vdb5/emoticons_raw_files/emoticon_stats.json","w")
baseline_stats_file.write(json.dumps(baseline_stats_hash))
baseline_stats_file.close()
示例7: raw_input
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import escape [as 别名]
hctr = 0
for hit in hits:
hit_id = hits.id(hctr),
hit_tv = searcher.getIndexReader().getTermFreqVector(hits.id(hctr), "text")
trm_str = ""
for trm in hit_tv.getTerms(): trm_str += " " + trm
print "term string: ", trm_str.encode("ascii","ignore")
hctr += 1
if hctr > hits.length()-2 or hctr > 100: break
print 'uid:', hit.get("user_id"), 'timestamp: ', hit.get("timestamp"), "country: ", hit.get('country'), "emoticons: ", hit.get('emoticons')
except Exception, e:
print "failed to list hit: ", e
print
command = raw_input("Query:")
parsed_command = QueryParser.escape(command)
print "Searching for uid:", parsed_command
query = QueryParser("user_id", analyzer).parse(parsed_command)
hits = searcher.search(query)
print "%s total matching documents." % hits.length()
try:
hctr = 0
for hit in hits:
hit_id = hits.id(hctr),
hit_tv = searcher.getIndexReader().getTermFreqVector(hits.id(hctr), "text")
trm_str = ""
for trm in hit_tv.getTerms(): trm_str += " " + trm
print "term string: ", trm_str.encode("ascii","ignore")
hctr += 1
if hctr > hits.length()-2 or hctr > 100: break
示例8: process_query_param
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import escape [as 别名]
def process_query_param(param):
"""
Escapes and lowercases all query params for searching in the lucene index.
"""
processed_param = QueryParser.escape(param)
return processed_param.lower()
示例9: int
# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import escape [as 别名]
reverse_users_exposure_hash = {}
try:
hctr = 0
for hit in hits:
hctr += 1
if hctr%100000==0: print "on hit: ", hctr
#if hctr > 100000: break
if hctr == hits.length(): break
uid, timestamp, country, emoticons, user_id_replied = hit.get("user_id"), int(hit.get("timestamp")), hit.get('country'), hit.get('emoticons'), hit.get('user_id_replied')
emoticon_users_by_time_hash[uid] = emoticon_users_by_time_hash.get(uid,[])+[timestamp]
except Exception, e:
pass
#print "failed to list hit: ", e
if emoticon == ":P":
ee_two = QueryParser.escape(":p")
elif emoticon == "T_T":
ee_two = QueryParser.escape("TT")
elif emoticon == "^_^":
ee_two = QueryParser.escape("^^")
if emoticon in [":P","T_T","^_^"]:
q_two = QueryParser("emoticons",analyzer).parse(ee_two)
hits_two = searcher.search(q_two)
try:
hctr_two = 0
for hit_two in hits_two:
hctr_two += 1
if hctr_two%100000==0: print "on hit: ", hctr_two
#if hctr > 100000: break
if hctr_two == hits_two.length(): break
uid, timestamp, country, emoticons, user_id_replied = hit_two.get("user_id"), int(hit_two.get("timestamp")), hit_two.get('country'), hit_two.get('emoticons'), hit_two.get('user_id_replied')