当前位置: 首页>>代码示例>>Python>>正文


Python QueryParser.escape方法代码示例

本文整理汇总了Python中lucene.QueryParser.escape方法的典型用法代码示例。如果您正苦于以下问题:Python QueryParser.escape方法的具体用法?Python QueryParser.escape怎么用?Python QueryParser.escape使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在lucene.QueryParser的用法示例。


在下文中一共展示了QueryParser.escape方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import escape [as 别名]
    def __init__(self, emoticon, searcher, analyzer, english_only=False):
        super(PMICalculator, self).__init__()

        self.field = "emoticons"
        self.emoticon = emoticon
        self.searcher = searcher
        self.analyzer = analyzer
        self.escaped_emoticon = QueryParser.escape(self.emoticon)
        self.query = QueryParser("emoticons", self.analyzer).parse(self.escaped_emoticon)
        self.raw_stats_dir = "/Volumes/TerraFirma/SharedData/vdb5/emoticons_raw_files/"
        if english_only:
            country = "United States"
            country_prefix = "US"
        else:
            country = None
            country_prefix = ""
        self.pmi_file_name = (
            self.raw_stats_dir
            + normalizeEmoticonName(self.emoticon).rstrip("_")
            + ("_%s" % (country_prefix)) * english_only
            + ".pmidata"
        )
        self.sample_tweets_name = (
            self.raw_stats_dir
            + normalizeEmoticonName(self.emoticon).rstrip("_")
            + ("_%s" % (country_prefix)) * english_only
            + ".samptweets"
        )
        self.sample_tweets_file = codecs.open(self.sample_tweets_name, encoding="utf-8", mode="w")
        self.term_count_collector = TermCountCollector(searcher, emoticon, country)
        print "starting query at: ", time.time()
        hits = self.searcher.search(self.query, self.term_count_collector)
        # print "terms: ", self.terms
        if emoticon == ":P":
            ee_two = QueryParser.escape(":p")
        elif emoticon == "T_T":
            ee_two = QueryParser.escape("TT")
        elif emoticon == "^_^":
            ee_two = QueryParser.escape("^^")
        if emoticon in [":P", "T_T", "^_^"]:
            q_two = QueryParser("emoticons", self.analyzer).parse(ee_two)
            hits_two = self.searcher.search(q_two, self.term_count_collector)
        self.terms = self.term_count_collector.getTerms()
        self.query_result_count = self.term_count_collector.getDocCount()
        for p_term, p_term_tweets in self.term_count_collector.popular_terms_hash.items():
            for p_term_tweet in p_term_tweets:
                self.sample_tweets_file.write("term: " + p_term + " tweet: " + p_term_tweet + "\n")
        self.sample_tweets_file.close()
        self.base_stats_file = open(
            "/Volumes/TerraFirma/SharedData/vdb5/emoticons_raw_files/emoticon_pmi_stats.txt", "r"
        )
        self.n = int(self.base_stats_file.read().strip().split(":")[1])

        print "computing PMI for query: ", self.emoticon, " at: ", time.time()

        self.p_query_result = self.query_result_count * 1.0 / self.n
开发者ID:vlad43210,项目名称:emoticons,代码行数:58,代码来源:calculate_emoticon_pmi.py

示例2: run

# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import escape [as 别名]
def run(searcher, analyzer):
    while True:
        print
        print "Hit enter with no input to quit."
        command = raw_input("Query:")
        if command == '':
            return

        print
        print "Searching for:", command
        parsed_command = QueryParser.escape(command)
        query = QueryParser("text", analyzer).parse(parsed_command)
        hits = searcher.search(query)
        print "%s total matching documents." % hits.length()

        try:
            hctr = 0
            for hit in hits:
                hit_id = hits.id(hctr), 
                hit_tv = searcher.getIndexReader().getTermFreqVector(hits.id(hctr), "text")
                trm_str = ""
                for trm in hit_tv.getTerms(): trm_str += " " + trm
                print "term string: ", trm_str.encode("ascii","ignore")
                hctr += 1
                if hctr > hits.length()-2 or hctr > 100: break
                print 'uid:', hit.get("user_id"), 'timestamp: ', hit.get("timestamp"), "country: ", hit.get('country'), "emoticons: ", hit.get('emoticons')
        except Exception, e: 
            print "failed to list hit: ", e

        print
        command = raw_input("Query:")
        parsed_command = QueryParser.escape(command)
        print "Searching for emoticon:", parsed_command
        query = QueryParser("emoticons", analyzer).parse(parsed_command)
        hits = searcher.search(query)
        print "%s total matching documents." % hits.length()

        try:
            hctr = 0
            for hit in hits:
                hit_id = hits.id(hctr), 
                hit_tv = searcher.getIndexReader().getTermFreqVector(hits.id(hctr), "text")
                trm_str = ""
                for trm in hit_tv.getTerms(): trm_str += " " + trm
                print "term string: ", trm_str.encode("ascii","ignore")
                hctr += 1
                if hctr > hits.length()-2 or hctr > 100: break
                print 'uid:', hit.get("user_id"), 'timestamp: ', hit.get("timestamp"), "country: ", hit.get('country'), "emoticons: ", hit.get('emoticons')
        except Exception, e: 
            print "failed to list hit: ", e
开发者ID:vlad43210,项目名称:emoticons,代码行数:52,代码来源:search_files.py

示例3: calculateEmoticonDiffusion

# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import escape [as 别名]
def calculateEmoticonDiffusion(emoticon, searcher, analyzer, user_location_hash, usage_threshold = 1, comm_threshold = 1):
    raw_stats_dir = "/Volumes/TerraFirma/SharedData/vdb5/emoticons_raw_files/"
    emoticon_stats_file = open("/Volumes/TerraFirma/SharedData/vdb5/emoticons_raw_files/emoticon_diffusion_stats.txt","r") 
    total_users = int(emoticon_stats_file.read().strip())
    emoticon_stats_file.close()

    emoticon_file_name = raw_stats_dir + normalizeEmoticonName(emoticon).rstrip('_')+".diffusion_bidir"
    print "Calculating Diffusion for: ", emoticon, " at: ", time.time()
    escaped_emoticon = QueryParser.escape(emoticon)
    query = QueryParser("emoticons", analyzer).parse(escaped_emoticon)
    hits = searcher.search(query)
    print "%s total matching documents." % hits.length()
    if hits.length() == 0: return

    print "compiling diffusion stats at: ", time.time()
    emoticon_users_by_time_hash = {}
    emoticon_users_adopters_hash = {}
    emoticon_users_non_adopters_hash = {}
    users_exposure_hash = {}
    reverse_users_exposure_hash = {}
    try:
        hctr = 0
        for hit in hits:
            hctr += 1
            if hctr%100000==0: print "on hit: ", hctr
            #if hctr > 100000: break
            if hctr == hits.length(): break
            uid, timestamp, country, emoticons, user_id_replied = hit.get("user_id"), int(hit.get("timestamp")), hit.get('country'), hit.get('emoticons'), hit.get('user_id_replied')
            emoticon_users_by_time_hash[uid] = emoticon_users_by_time_hash.get(uid,[])+[timestamp]
    except Exception, e:
        pass
开发者ID:vlad43210,项目名称:emoticons,代码行数:33,代码来源:calculate_emoticon_diffusion_bidir.py

示例4: handle

# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import escape [as 别名]
    def handle(self):
        # self.request is the TCP socket connected to the client
        # self.rfile is a file-like object created by the handler;
        # we can now use e.g. readline() instead of raw recv() calls
        self.data = self.request.recv(1024).strip()
        # print "{} wrote:".format(self.client_address[0])
        # print self.data
        # just send back the same data, but upper-cased

        MAX = 50
        analyzer = StandardAnalyzer(Version.LUCENE_34)
        self.data = QueryParser.escape(self.data)
        query = QueryParser(Version.LUCENE_34, "contents", analyzer).parse(self.data)

        hits = searcher.search(query, MAX)
        if settings.DEBUG:
            print "Found %d document(s) that matched query '%s':" % (hits.totalHits, query)
        serialized = self.serialize(hits)
        self.request.send(serialized)
开发者ID:paulmaki,项目名称:diffindexer,代码行数:21,代码来源:server.py

示例5: getEmoticonPropagationCurves

# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import escape [as 别名]
def getEmoticonPropagationCurves(emoticon, searcher, analyzer):
    raw_stats_dir = "/Volumes/TerraFirma/SharedData/vdb5/emoticons_raw_files/"
    emoticon_file_name = raw_stats_dir + normalizeEmoticonName(emoticon).rstrip('_')+".timehash"
    emoticon_stats_file = open("/Volumes/TerraFirma/SharedData/vdb5/emoticons_raw_files/emoticon_stats.json","r") 
    emoticon_stats_hash = json.loads(emoticon_stats_file.read())
    print "Searching for: ", emoticon, " at: ", time.time()
    escaped_emoticon = QueryParser.escape(emoticon)
    query = QueryParser("emoticons", analyzer).parse(escaped_emoticon)
    hits = searcher.search(query)
    print "%s total matching documents." % hits.length()
    if hits.length() == 0: return

    print " compiling propagation curve at: ", time.time()
    emoticon_propagation_hash = {}
    countryset = set()
    daytshash = {}
    try:
        hctr = 0
        for hit in hits:
            hctr += 1
            if hctr%100000==0: print "on hit: ", hctr
            if hctr == hits.length(): break
            uid, timestamp, country, emoticons, user_id_replied = hit.get("user_id"), hit.get("timestamp"), hit.get('country'), hit.get('emoticons'), hit.get('user_id_replied')
            num_replies = int(user_id_replied != '0')
            countryset.add(country)
            timestruct = time.gmtime(int(timestamp))
            daysincestart = (timestruct[0]-2005)*365+timestruct[7]
            daystartts = int(timestamp)-60*60*timestruct[3]-60*timestruct[4]-timestruct[5]
            nextdaystartts = daystartts+86400
            daytshash[daystartts] = {'days since start':daysincestart, 'next day ts':nextdaystartts}
            total_emoticon_count = string.count(emoticons, emoticon)
            if daysincestart in emoticon_propagation_hash:
                #emoticon_propagation_hash[daysincestart]['total'] += total_emoticon_count
                emoticon_propagation_hash[daysincestart]['total'] += 1
                #emoticon_propagation_hash[daysincestart][country] = emoticon_propagation_hash[daysincestart].get(country,0) + total_emoticon_count
                emoticon_propagation_hash[daysincestart][country] = emoticon_propagation_hash[daysincestart].get(country,0) + 1
                emoticon_propagation_hash[daysincestart]['total_in_replies'] += num_replies
            else:
                emoticon_propagation_hash[daysincestart] = {'total':total_emoticon_count, 'total_in_replies':num_replies, country:total_emoticon_count, \
                                                            'total tweets':0, 'total emoticon tweets':0, 'total http emoticons':0}
    except Exception, e: 
        print "failed to list hit: ", e
开发者ID:vlad43210,项目名称:emoticons,代码行数:44,代码来源:get_emoticon_propagation_curves.py

示例6: getBaselineStatistics

# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import escape [as 别名]
def getBaselineStatistics(searcher, analyzer):
    baseline_stats_hash = {}
    day_one = time.strptime("01 01 2005", "%d %m %Y")
    day_one_ts = int(time.mktime(day_one))
    max_day_ctr = 1830
    day_ctr = 0
    while day_ctr < max_day_ctr:
        if day_ctr%100 == 0: print "on day ctr: ", day_ctr, " at time: ", time.time()
        curr_day_ts = day_one_ts + 86400*day_ctr
        next_day_ts = day_one_ts + 86400*(day_ctr+1)
        day_ctr+=1

        range_filter = NumericRangeFilter.newIntRange("timestamp", Integer(curr_day_ts), Integer(next_day_ts), True, True)
        
        #all tweets in day range
        all_docs_query = MatchAllDocsQuery()
        tweets_in_range_search = searcher.search(all_docs_query, range_filter)
        num_tweets_in_range = tweets_in_range_search.length()

        #all tweets in day range US
        US_tweets_base_query = MatchAllDocsQuery()
        #us_escape_one = QueryParser("country", analyzer).escape("United")
        #us_escape_two = 
        us_query = TermQuery(Term("country", "United States"))
        #us_query.add(Term("country","United"))
        #us_query.add(Term("country","States"))
        US_tweets_country_query = us_query
        #US_tweets_country_query = QueryParser("country", analyzer).parse(us_query)
        US_tweets_query_filter = QueryFilter(US_tweets_country_query)
        compound_filter_US_tweets = BooleanFilter()
        compound_filter_US_tweets.add(FilterClause(range_filter, BooleanClause.Occur.MUST))
        compound_filter_US_tweets.add(FilterClause(US_tweets_query_filter, BooleanClause.Occur.MUST))
        US_tweets_in_range_search = searcher.search(US_tweets_base_query, compound_filter_US_tweets)
        num_US_tweets_in_range = US_tweets_in_range_search.length()
        
        #all tweets in day range japan
        JP_tweets_base_query = MatchAllDocsQuery()
        JP_tweets_country_query = QueryParser("country", analyzer).parse("Japan")
        JP_tweets_query_filter = QueryFilter(JP_tweets_country_query)
        compound_filter_JP_tweets = BooleanFilter()
        compound_filter_JP_tweets.add(FilterClause(range_filter, BooleanClause.Occur.MUST))
        compound_filter_JP_tweets.add(FilterClause(JP_tweets_query_filter, BooleanClause.Occur.MUST))
        JP_tweets_in_range_search = searcher.search(JP_tweets_base_query, compound_filter_JP_tweets)
        num_JP_tweets_in_range = JP_tweets_in_range_search.length()
        #day_ctr%10 == 0: print "US tweets: ", num_US_tweets_in_range, " JP tweets: ", num_JP_tweets_in_range
        
        #all tweets containing emoticons
        empty_term = Term("emoticons")
        empty_term_prefix = PrefixQuery(empty_term)
        all_emoticons_docs_query_filter = QueryFilter(empty_term_prefix)
        compound_filter = BooleanFilter()
        compound_filter.add(FilterClause(range_filter, BooleanClause.Occur.MUST))
        compound_filter.add(FilterClause(all_emoticons_docs_query_filter, BooleanClause.Occur.MUST))
        emoticon_tweets_in_range_search = searcher.search(all_docs_query, compound_filter)
        num_emoticon_tweets_in_range = emoticon_tweets_in_range_search.length()

        #all tweets containing "http" or "https"
        bq = BooleanQuery()
        http_str = QueryParser.escape("http://")
        http_query = QueryParser("emoticons", analyzer).parse(http_str)
        https_str = QueryParser.escape("https://")
        https_query = QueryParser("emoticons", analyzer).parse(https_str)
        bq.add(http_query, BooleanClause.Occur.SHOULD)
        bq.add(https_query, BooleanClause.Occur.SHOULD)
        bq_search = searcher.search(bq, range_filter)
        num_http_emoticons = bq_search.length()
        
        baseline_stats_hash[day_ctr] = {'total tweets':num_tweets_in_range, 'emoticons':num_emoticon_tweets_in_range, 'http':num_http_emoticons, 'US tweets':num_US_tweets_in_range, \
                                        'JP tweets':num_JP_tweets_in_range}

    baseline_stats_text_file = open("/Volumes/TerraFirma/SharedData/vdb5/emoticons_raw_files/emoticon_stats.txt","w")
    raw_stats_list = sorted(baseline_stats_hash.items(), key = lambda x: int(x[0]))
    baseline_stats_text_file.write("day total emoticons http US JP\n")
    for rs in raw_stats_list: baseline_stats_text_file.write("%s %s %s %s %s %s\n" %(rs[0], rs[1]["total tweets"], rs[1]["emoticons"], rs[1]["http"], rs[1]['US tweets'], \
                                                             rs[1]['JP tweets']))
    baseline_stats_text_file.close()
    baseline_stats_file = open("/Volumes/TerraFirma/SharedData/vdb5/emoticons_raw_files/emoticon_stats.json","w")
    baseline_stats_file.write(json.dumps(baseline_stats_hash))
    baseline_stats_file.close()
开发者ID:vlad43210,项目名称:emoticons,代码行数:81,代码来源:get_emoticon_propagation_curves.py

示例7: raw_input

# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import escape [as 别名]
            hctr = 0
            for hit in hits:
                hit_id = hits.id(hctr), 
                hit_tv = searcher.getIndexReader().getTermFreqVector(hits.id(hctr), "text")
                trm_str = ""
                for trm in hit_tv.getTerms(): trm_str += " " + trm
                print "term string: ", trm_str.encode("ascii","ignore")
                hctr += 1
                if hctr > hits.length()-2 or hctr > 100: break
                print 'uid:', hit.get("user_id"), 'timestamp: ', hit.get("timestamp"), "country: ", hit.get('country'), "emoticons: ", hit.get('emoticons')
        except Exception, e: 
            print "failed to list hit: ", e

        print
        command = raw_input("Query:")
        parsed_command = QueryParser.escape(command)
        print "Searching for uid:", parsed_command
        query = QueryParser("user_id", analyzer).parse(parsed_command)
        hits = searcher.search(query)
        print "%s total matching documents." % hits.length()

        try:
            hctr = 0
            for hit in hits:
                hit_id = hits.id(hctr), 
                hit_tv = searcher.getIndexReader().getTermFreqVector(hits.id(hctr), "text")
                trm_str = ""
                for trm in hit_tv.getTerms(): trm_str += " " + trm
                print "term string: ", trm_str.encode("ascii","ignore")
                hctr += 1
                if hctr > hits.length()-2 or hctr > 100: break
开发者ID:vlad43210,项目名称:emoticons,代码行数:33,代码来源:search_files.py

示例8: process_query_param

# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import escape [as 别名]
def process_query_param(param):
    """
    Escapes and lowercases all query params for searching in the lucene index.
    """
    processed_param = QueryParser.escape(param)
    return processed_param.lower()
开发者ID:bethune,项目名称:disease-ontology,代码行数:8,代码来源:do_lucene_search.py

示例9: int

# 需要导入模块: from lucene import QueryParser [as 别名]
# 或者: from lucene.QueryParser import escape [as 别名]
    reverse_users_exposure_hash = {}
    try:
        hctr = 0
        for hit in hits:
            hctr += 1
            if hctr%100000==0: print "on hit: ", hctr
            #if hctr > 100000: break
            if hctr == hits.length(): break
            uid, timestamp, country, emoticons, user_id_replied = hit.get("user_id"), int(hit.get("timestamp")), hit.get('country'), hit.get('emoticons'), hit.get('user_id_replied')
            emoticon_users_by_time_hash[uid] = emoticon_users_by_time_hash.get(uid,[])+[timestamp]
    except Exception, e:
        pass
        #print "failed to list hit: ", e

    if emoticon == ":P":
        ee_two = QueryParser.escape(":p")
    elif emoticon == "T_T":
        ee_two = QueryParser.escape("TT")
    elif emoticon == "^_^":
        ee_two = QueryParser.escape("^^")
    if emoticon in [":P","T_T","^_^"]:
        q_two = QueryParser("emoticons",analyzer).parse(ee_two)
        hits_two = searcher.search(q_two)
        try:
            hctr_two = 0
            for hit_two in hits_two:
                hctr_two += 1
                if hctr_two%100000==0: print "on hit: ", hctr_two
                #if hctr > 100000: break
                if hctr_two == hits_two.length(): break
                uid, timestamp, country, emoticons, user_id_replied = hit_two.get("user_id"), int(hit_two.get("timestamp")), hit_two.get('country'), hit_two.get('emoticons'), hit_two.get('user_id_replied')
开发者ID:vlad43210,项目名称:emoticons,代码行数:33,代码来源:calculate_emoticon_diffusion_bidir.py


注:本文中的lucene.QueryParser.escape方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。