当前位置: 首页>>代码示例>>Python>>正文


Python R_CLUSTER_FLOW2.hmget方法代码示例

本文整理汇总了Python中global_utils.R_CLUSTER_FLOW2.hmget方法的典型用法代码示例。如果您正苦于以下问题:Python R_CLUSTER_FLOW2.hmget方法的具体用法?Python R_CLUSTER_FLOW2.hmget怎么用?Python R_CLUSTER_FLOW2.hmget使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在global_utils.R_CLUSTER_FLOW2的用法示例。


在下文中一共展示了R_CLUSTER_FLOW2.hmget方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_attr_geo_track

# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def get_attr_geo_track(uid_list):
    date_results = [] # results = {'2013-09-01':[(geo1, count1), (geo2, track2)], '2013-09-02'...} 7day
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    #test
    now_date = '2013-09-08'
    ts = datetime2ts(now_date)
    for i in range(7, 0, -1):
        timestamp = ts - i*24*3600
        #print 'timestamp:', ts2datetime(timestamp)
        ip_dict = dict()
        results = r_cluster.hmget('ip_'+str(timestamp), uid_list)
        #print 'results:',results
        for item in results:
            if item:
                item_dict = json.loads(item)
                #print 'item_dict:', item_dict
                for ip_item in item_dict:
                    try:
                        ip_dict[ip_item] += item_dict[ip_item]
                    except:
                        ip_dict[ip_item] = item_dict[ip_item]
        geo_dict = ip2geo(ip_dict)
        sort_geo_dict = sorted(geo_dict.items(), key=lambda x:x[1], reverse=True)
        date_key = ts2datetime(timestamp)
        date_results.append([date_key, sort_geo_dict[:2]])
    #print 'results:', date_results
    return {'geo_track': json.dumps(date_results)}
开发者ID:taozhiiq,项目名称:user_portrait,代码行数:30,代码来源:cron_group.py

示例2: update_day_hashtag

# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def update_day_hashtag(uid_list):
    results = {}
    all_results = {}
    now_ts = time.time()
    #run_type
    if RUN_TYPE == 1:
        now_date_ts = datetime2ts(ts2datetime(now_ts))
    else:
        now_date_ts = test_ts

    for i in range(WEEK,0,-1):
        ts = now_date_ts - DAY*i
        count = 0
        hashtag_results = r_cluster.hmget('hashtag_'+str(ts), uid_list)
        for uid in uid_list:
            if uid not in results:
                results[uid] = {}
            hashtag_item = hashtag_results[count]
            if hashtag_item:
                hashtag_dict = json.loads(hashtag_item)
            else:
                hashtag_dict = {}
            for hashtag in hashtag_dict:
                try:
                    results[uid][hashtag] += 1
                except:
                    results[uid][hashtag] = 1
    for uid in uid_list:
        user_hashtag_dict = results[uid]
        hashtag_string = '&'.join(user_hashtag_dict.keys())
        all_results[uid] = {'hashtag': hashtag_string, 'hashtag_dict':user_hashtag_dict}
    return all_results
开发者ID:SwoJa,项目名称:ruman,代码行数:34,代码来源:update_day.py

示例3: update_day_geo

# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def update_day_geo(uid_list, user_info_list):
    results = {}
    now_ts = time.time()
    #run_type
    if RUN_TYPE == 1:
        now_date_ts = datetime2ts(ts2datetime(now_ts))
    else:
        now_date_ts = test_ts
    ip_results = r_cluster.hmget('new_ip_'+str(now_date_ts - DAY), uid_list)
    count = 0
    for uid in uid_list:
        if uid not in results:
            results[uid] = {'activity_geo':{}, 'activity_geo_dict':[]}
        uid_ip_results = ip_results[count]
        count += 1
        if uid_ip_results:
            uid_ip_dict = json.loads(uid_ip_results)
        else:
            uid_ip_dict = {}
        day_results = {}
        for ip in uid_ip_dict:
            ip_count = len(uid_ip_dict[ip].split('&'))
            geo, school = ip2city(ip)
            if geo:
                geo = geo.decode('utf-8')
                try:
                    day_results[geo] += ip_count
                except:
                    day_results[geo] = ip_count
        #update the activity_geo_dict
        activity_geo_history_list = json.loads(user_info_list[uid]['activity_geo_dict'])
        activity_geo_history_list.append(day_results)
        results[uid]['activity_geo_dict'] = json.dumps(activity_geo_history_list[-30:])
        #update the activity_geo
        week_activity_geo_list = activity_geo_history_list[-7:]
        week_geo_list = []
        for activity_geo_item in week_activity_geo_list:
            geo_list = activity_geo_item.keys()
            week_geo_list.extend(geo_list)
        week_geo_list = list(set(week_geo_list))
        week_geo_string = '&'.join(['&'.join(item.split('\t')) for item in week_geo_list])
        try:
            week_geo_aggs_string = '&'.join([item.split('\t')[-1] for item in week_geo_list])
        except:
            week_geo_aggs_string = ''

        results[uid]['activity_geo'] = week_geo_string
        results[uid]['activity_geo_aggs'] = week_geo_aggs_string
    #print 'update geo results:', results
    return results
开发者ID:huxiaoqian,项目名称:user_portrait_ending2,代码行数:52,代码来源:update_day.py

示例4: get_activity_time

# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def get_activity_time(uid_list):
    results = {}
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    #run_type
    if RUN_TYPE == 1:
        timestamp = datetime2ts(now_date)
    else:
        timestamp = datetime2ts(RUN_TEST_TIME)
    activity_list_dict = {} # {uid:[activity_list], uid:[]}
    for i in range(1,WEEK+1):
        ts = timestamp - DAY*i
        print ts
        r_result = r_cluster.hmget('activity_'+str(ts), uid_list)
        #print r_result
        if r_result:
            for j in range(0, len(uid_list)):
                uid = uid_list[j]
                if uid not in activity_list_dict:
                    activity_list_dict[uid] = [0 for i in range(0, 96)]
                user_r_result = r_result[j]
                if user_r_result:
                    user_activity_dict = json.loads(user_r_result)
                    for i in range(0, 96):
                        try:
                            count = user_activity_dict[str(i)]
                        except:
                            count = 0
                        activity_list_dict[uid].append(count)
    for uid in uid_list:
        activity_list = activity_list_dict[uid]
        statusnum = sum(activity_list)
        signal = np.array(activity_list)
        fftResult = np.abs(np.fft.fft(signal))**2
        n = signal.size
        freq = np.fft.fftfreq(n, d=1)
        i = 0
        max_val = 0
        max_freq = 0
        for val in fftResult:
            if val>max_val and freq[i]>0:
                max_val = val
                max_freq = freq[i]
            i += 1
        results[uid] = {'statusnum': statusnum, 'activity_time': math.log(max_freq + 1)}
    
    return results
开发者ID:SwoJa,项目名称:ruman,代码行数:49,代码来源:evaluate_index.py

示例5: get_attr_trend

# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def get_attr_trend(uid_list):
    result = {}
    now_ts = time.time()
    date = ts2datetime(now_ts - 24*3600)
    timestamp = datetime2ts(date)
    #test
    timestamp = datetime2ts('2013-09-08')
    time_result = dict()
    segment_result = dict()
    for i in range(1, 8):
        ts = timestamp - i*24*3600
        r_result = r_cluster.hmget('activity_'+str(ts), uid_list)
        #print 'r_result:', r_result
        for item in r_result:
            if item:
                item = json.loads(item)
                for segment in item:
                    try:
                        time_result[int(segment)/16*15*60*16+ts] += item[segment]
                    except:
                        time_result[int(segment)/16*15*60*16+ts] = item[segment]
                    try:
                        segment_result[int(segment)/16*15*60*16] += item[segment]
                    except:
                        segment_result[int(segment)/16*15*60*16] = item[segment]
    trend_list = []
    for i in range(1, 8):
        ts = timestamp - i*24*3600
        for j in range(0, 6):
            time_seg = ts + j*15*60*16
            if time_seg in time_result:
                trend_list.append((time_seg, time_result[time_seg]))
            else:
                trend_list.append((time_seg, 0))
    sort_trend_list = sorted(trend_list, key=lambda x:x[0], reverse=False)
    #print 'time_result:', time_result
    #print 'trend_list:', trend_list
    #print 'sort_trend_list:', sort_trend_list
    result['activity_trend'] = json.dumps(sort_trend_list)
    result['activity_time'] = json.dumps(segment_result)
    return result
开发者ID:taozhiiq,项目名称:user_portrait,代码行数:43,代码来源:cron_group.py

示例6: update_day_sensitive

# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def update_day_sensitive(uid_list):
    results = {}
    all_results = {}
    now_ts = time.time()
    #run_type
    if RUN_TYPE == 1:
        now_date_ts = datetime2ts(ts2datetime(now_ts))
    else:
        now_date_ts = test_ts

    for i in range(WEEK,0,-1):
        ts = now_date_ts - DAY*i
        count = 0
        sensitive_results = r_cluster.hmget('sensitive_'+str(ts), uid_list)
        for uid in uid_list:
            if uid not in results:
                results[uid] = {}
                sensitive_item = sensitive_results[count]
            if sensitive_item:
                sensitive_dict = json.loads(sensitive_item)
            else:
                sensitive_dict = {}
            for sensitive in sensitive_dict:
                try:
                    results[uid][sensitive] += 1
                except:
                    results[uid][sensitive] = 1
    for uid in uid_list:
        user_sensitive_dict = results[uid]
        sensitive_score = 0
        for item in user_sensitive_dict:
            k = item
            v = user_sensitive_dict[k]
            tmp_stage = r_sensitive.hget('sensitive_words', k)
            if tmp_stage:
                sensitive_score += v * sensitive_score_dict[str(tmp_stage)]
        sensitive_string = '&'.join(user_sensitive_dict.keys())
        all_results[uid] = {'sensitive_string': sensitive_string, 'sensitive_dict':user_sensitive_dict,\
                'sensitive': sensitive_score}
    return all_results
开发者ID:ferrero-zhang,项目名称:user_portrait_0324,代码行数:42,代码来源:update_day.py

示例7: get_school

# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def get_school(uid_list):
    now_ts = time.time()
    now_date_ts = datetime2ts(ts2datetime(now_ts))
    school_results = {}
    for i in range(WEEK, 0, -1):
        ts = now_date_ts - DAY * i
        ip_results = r_cluster.hmget('new_ip_'+str(ts), uid_list)
        count = 0
        for uid in uid_list:
            if uid not in school_results:
                school_results[uid] = {}
            ip_item = ip_results[count]
            if ip_item:
                uid_ip_dict = json.loads(ip_item)
            else:
                uid_ip_dict = {}
            for ip in uid_ip_dict:
                ip_count = len(uid_ip_dict[ip].split('&'))
                
                school = ip2school(ip)
                if school:
                    try:
                        school_results[uid][school] += ip_count
                    except:
                        school_results[uid][school] = ip_count
           
            count += 1
    results = {} 
    for uid in uid_list:
        school_dict = school_results[uid]
        school_string = '&'.join(school_dict.keys())
        if school_dict != {}:
            is_school = '1'
        else:
            is_school = '0'
        results[uid] = {'is_school': is_school, 'school_string': school_string, 'school_dict': json.dumps(school_dict)}
    return results
开发者ID:lcwy220,项目名称:revised_user_portrait,代码行数:39,代码来源:update_week.py

示例8: update_flow_information

# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def update_flow_information(user_info):
    results = {} # results ={uid: {'activity_geo_dict':'', 'activity_geo':'', 'hashtag_dict':'', 'hashtag':'', 'online_pattern_dict':'', 'online_pattern':''}}
    uid_list = user_info.keys()
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    timestamp = datetime2ts(now_date)
    #test
    timestamp = datetime2ts('2013-09-08')
    user_hashtag_dict = dict()
    user_online_dict = dict()
    ip_user_count_dict = {}
    new_day_ip_dict = dict()
    for i in range(7,0,-1):
        ts = timestamp - 24*3600*i
        print 'iter date:', ts2date(ts)
        results = r_cluster.hmget('hashtag_'+str(ts), uid_list)
        online_pattern_results = r_cluster.hmget('online_'+str(ts), uid_list)

        if i==0:
            ip_result = r_cluater.hmget('hashtag_'+str(ts), uid_list)

        for j in range(0, len(uid_list)):
            uid = uid_list[j]
            #attr: hashtag
            if results[j]:
                hashtag_dict = json.loads(results[j])
                for hashtag in hashtag_dict:
                    if uid in user_hashtag_dict:
                        try:
                            user_hashtag_dict[uid][hashtag] += hashtag_dict[hashtag]
                        except:
                            user_hashtag_dict[uid][hashtag] = hashtag_dict[hashtag]
                    else:
                        user_hashtag_dict[uid] = {hashtag: hashtag_dict[hashtag]}
            '''
            #attr: online_pattern
            if online_pattern_results[j]:
                online_pattern_dict = json.loads(online_pattern_results[j])
                for online_pattern in online_pattern_dict:
                    if uid in user_online_dict:
                        try:
                            user_online_dict[uid][online_pattern] += online_pattern_dict[online_pattern]
                        except:
                            user_online_dict[uid][online_pattern] = online_pattern_dict[online_pattern]
                    else:
                        user_online_dict[uid] = {online_pattern: online_pattern_dict[online_pattern]}
            '''
            
            #attr: activity_geo by ip-timestamp
            if i==0 and ip_result[j]:
                ip_timestamp_dict = json.loads(ip_result[j])
                old_flow_information = user_info[uid]
                old_day_geo_list = json.loads(old_flow_information['activity_geo_dict'])
                for ip in ip_timestamp_dict:
                    ip_count = len(ip_timestamp_dict[ip].split('&'))
                    new_day_ip_dict[uid][ip] = ip_count
                geo_dict = ip2city(new_day_ip_dict[uid])
                if len(old_day_geo_list)>=30:
                    new_day_geo_list = old_day_geo_list[1:].append(geo_dict)
                else:
                    new_day_geo_list = old_day_geo_list.append(geo_dict)
                week_geo_list = []
                week_day_geo_list = new_day_geo[-7:]
                for day_geo_dict in week_day_geo_list:
                    week_geo_list.extend(day_geo_dict.keys())
                week_geo_list = list(set(week_geo_list))
                activity_geo_string = ''
                new_week_geo_list = []
                for geo_string in week_geo_list:
                    day_geo_string = '&'.join(geo_string.split('\t'))
                    new_week_geo_list.append(day_geo_string)
                activity_geo_string = '&'.join(new_week_geo_list)
                print 'activity_geo_string:', activity_geo_string
                

    for uid in uid_list:
        #attr: hashtag
        try:
            hashtag_dict = user_hashtag_dict[uid]
            hashtag_string = json.dumps(hashtag_dict)
            hashtag_list = '&'.join(hashtag_dict.keys())
        except KeyError:
            hashtag_string = ''
            hashtag_list = ''
        '''
        #attr: online_pattern
        try:
            online_dict = user_online_dict[uid]
            online_string = json.dumps(online_dict)
            online_list = '&'.join(online_dict.keys())
        except KeyError:
            online_string = ''
            online_list = ''
        '''
        result[uid] = {'hashtag_dict':hashtag_string, 'hashtag':hashtag_list, \
                       'activity_geo_dict': json.loads(new_day_geo_list), 'activity_geo': activity_geo_string, \
                       'online_pattern_dict': online_pattern_string, 'online_pattern': online_pattern_list}
    return result
开发者ID:huxiaoqian,项目名称:user_portrait_ending2,代码行数:100,代码来源:flow_information.py

示例9: get_flow_information

# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def get_flow_information(uid_list):
    results = {}      
    #results = {uid:{'hashtag_dict':{},'hashtag':'', 'keywords_dict':{}, 'keywords_string':'', 'activity_geo':'', 'activity_geo_dict':dict}}
    iter_results = {} # iter_results = {uid:{'hashtag': hashtag_dict, 'geo':geo_dict, 'keywords':keywords_dict}}
    now_ts = time.time()
    now_date_ts = datetime2ts(ts2datetime(now_ts))
    #test
    now_date_ts = test_ts
    for i in range(7,0,-1):
        ts = now_date_ts - DAY*i
        iter_date = ts2datetime(ts)
        flow_text_index_name = flow_text_index_name_pre + iter_date
        uid_day_geo = {}
        #compute hashtag and geo
        hashtag_results = r_cluster_3.hmget('hashtag_'+str(ts), uid_list)
        ip_results = r_cluster.hmget('new_ip_'+str(ts), uid_list)
        #compute sensitive_words
        sensitive_results = r_cluster_3.hmget('sensitive_'+str(ts), uid_list)
        count = 0 
        for uid in uid_list:
            #init iter_results[uid]
            if uid not in iter_results:
                iter_results[uid] = {'hashtag':{}, 'geo':{},'geo_track':[],'keywords':{}, 'sensitive':{}}
            #compute hashtag
            hashtag_item = hashtag_results[count]
            if hashtag_item:
                uid_hashtag_dict = json.loads(hashtag_item)
            else:
                uid_hashtag_dict = {}
            for hashtag in uid_hashtag_dict:
                try:
                    iter_results[uid]['hashtag'][hashtag] += uid_hashtag_dict[hashtag]
                except:
                    iter_results[uid]['hashtag'][hashtag] = uid_hashtag_dict[hashtag]
            #compute sensitive
            sensitive_item = sensitive_results[count]
            if sensitive_item:
                uid_sensitive_dict = json.loads(sensitive_item)
            else:
                uid_sensitive_dict = {}
            for sensitive_word in uid_sensitive_dict:
                try:
                    iter_results[uid]['sensitive'][sensitive_word] += uid_sensitive_dict[sensitive_word]
                except:
                    iter_results[uid]['sensitive'][sensitive_word] = uid_sensitive_dict[sensitive_word]
            #compute geo
            uid_day_geo[uid] = {}
            ip_item = ip_results[count]
            if ip_item:
                uid_ip_dict = json.loads(ip_item)
            else:
                uid_ip_dict = {}
            for ip in uid_ip_dict:
                ip_count = len(uid_ip_dict[ip].split('&'))
                geo = ip2city(ip)
                if geo:
                    #print 'geo:', geo
                    try:
                        iter_results[uid]['geo'][geo] += ip_count
                    except:
                        iter_results[uid]['geo'][geo] = ip_count
                    try:
                        uid_day_geo[uid][geo] += ip_count
                    except:
                        uid_day_geo[uid][geo] = ip_count
            iter_results[uid]['geo_track'].append(uid_day_geo[uid])
            count += 1
        
        #compute keywords:        
        try:
            text_results = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type, \
                                               body={'query':{'filtered':{'filter':{'terms':{'uid': uid_list}}}}, 'size':MAX_VALUE}, _source=True, fields=['uid', 'keywords_dict'])['hits']['hits']
        except:
            text_results = {}
        for item in text_results:
            #print 'keywords item:', item
            uid = item['fields']['uid'][0]
            uid_keywords_dict = json.loads(item['fields']['keywords_dict'][0])
            for keywords in uid_keywords_dict:
                try:
                    iter_results[uid]['keywords'][keywords] += uid_keywords_dict[keywords]
                except:
                    iter_results[uid]['keywords'][keywords] = uid_keywords_dict[keywords]

       
    #get keywords top
    for uid in uid_list:
        results[uid] = {}
        hashtag_dict = iter_results[uid]['hashtag']
        results[uid]['hashtag_dict'] = json.dumps(hashtag_dict)
        results[uid]['hashtag'] = '&'.join(hashtag_dict.keys())
        #sensitive words
        sensitive_word_dict = iter_results[uid]['sensitive']
        results[uid]['sensitive_dict'] = json.dumps(sensitive_word_dict)
        results[uid]['sensitive_string'] = '&'.join(sensitive_word_dict.keys())
        sensitive_score = 0
        for item in sensitive_word_dict:
            k = item
            v = sensitive_word_dict[k]
            tmp_stage = r_sensitive.hget('sensitive_words', k)
#.........这里部分代码省略.........
开发者ID:huxiaoqian,项目名称:user_portrait_ending2,代码行数:103,代码来源:flow_information.py

示例10: get_flow_information_v2

# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def get_flow_information_v2(uid_list, all_user_keywords_dict):
    results = {}      
    #results = {uid:{'hashtag_dict':{},'hashtag':'', 'keywords_dict':{}, 'keywords_string':'', 'activity_geo':'', 'activity_geo_dict':dict, 'activity_geo_aggs':''}}
    iter_results = {} # iter_results = {uid:{'hashtag': hashtag_dict, 'geo':geo_dict, 'keywords':keywords_dict}}
    now_ts = time.time()
    #run_type
    today_sensitive_results = {}
    if RUN_TYPE == 1:
        now_date_ts = datetime2ts(ts2datetime(now_ts))
    else:
        now_date_ts = test_ts
    for i in range(WEEK,0,-1):
        ts = now_date_ts - DAY*i
        uid_day_geo = {}
        #compute hashtag and geo
        hashtag_results = r_cluster_3.hmget('hashtag_'+str(ts), uid_list)
        ip_results = r_cluster.hmget('new_ip_'+str(ts), uid_list)
        #compute sensitive_words
        sensitive_results = r_cluster_3.hmget('sensitive_'+str(ts), uid_list)
        count = 0 
        for uid in uid_list:
            #init iter_results[uid]
            if uid not in iter_results:
                iter_results[uid] = {'hashtag':{}, 'geo':{},'geo_track':[],'keywords':{}, 'sensitive':{}, 'school':{}}
            if uid not in today_sensitive_results:
                today_sensitive_results[uid] = {}
            #compute hashtag
            hashtag_item = hashtag_results[count]
            if hashtag_item:
                uid_hashtag_dict = json.loads(hashtag_item)
            else:
                uid_hashtag_dict = {}
            for hashtag in uid_hashtag_dict:
                try:
                    iter_results[uid]['hashtag'][hashtag] += uid_hashtag_dict[hashtag]
                except:
                    iter_results[uid]['hashtag'][hashtag] = uid_hashtag_dict[hashtag]
            #compute sensitive
            sensitive_item = sensitive_results[count]
            if sensitive_item:
                uid_sensitive_dict = json.loads(sensitive_item)
            else:
                uid_sensitive_dict = {}
            for sensitive_word in uid_sensitive_dict:
                try:
                    iter_results[uid]['sensitive'][sensitive_word] += uid_sensitive_dict[sensitive_word]
                except:
                    iter_results[uid]['sensitive'][sensitive_word] = uid_sensitive_dict[sensitive_word]
                if ts == now_date_ts - DAY:
                    try:
                        today_sensitive_results[uid][sensitive_word] += uid_sensitive_dict[sensitive_word]
                    except:
                        today_sensitive_results[uid][sensitive_word] = uid_sensitive_dict[sensitive_word]
            #compute geo
            uid_day_geo[uid] = {}
            ip_item = ip_results[count]
            if ip_item:
                uid_ip_dict = json.loads(ip_item)
            else:
                uid_ip_dict = {}
            for ip in uid_ip_dict:
                ip_count = len(uid_ip_dict[ip].split('&'))
                geo, school = ip2city(ip)
                if geo:
                    try:
                        iter_results[uid]['geo'][geo] += ip_count
                    except:
                        iter_results[uid]['geo'][geo] = ip_count
                    try:
                        uid_day_geo[uid][geo] += ip_count
                    except:
                        uid_day_geo[uid][geo] = ip_count
                if school:
                    try:
                        iter_results[uid]['school'][school] += ip_count
                    except:
                        iter_results[uid]['school'][school] = ip_count
            iter_results[uid]['geo_track'].append(uid_day_geo[uid])
            count += 1
               
    #get keywords top
    for uid in uid_list:
        results[uid] = {}
        #hashtag
        hashtag_dict = iter_results[uid]['hashtag']
        results[uid]['hashtag_dict'] = json.dumps(hashtag_dict)
        results[uid]['hashtag'] = '&'.join(hashtag_dict.keys())
        #sensitive words
        sensitive_word_dict = iter_results[uid]['sensitive']
        results[uid]['sensitive_dict'] = json.dumps(sensitive_word_dict)
        results[uid]['sensitive_string'] = '&'.join(sensitive_word_dict.keys())
        sensitive_score = 0
        today_sensitive_results_user = today_sensitive_results[uid]
        for sensitive_item in today_sensitive_results_user:
            k = sensitive_item
            v = today_sensitive_results_user[sensitive_item]
            tmp_stage = r_sensitive.hget('sensitive_words', k)
            if tmp_stage:
                sensitive_score += v * sensitive_score_dict[str(tmp_stage)]
        results[uid]['sensitive'] = sensitive_score
#.........这里部分代码省略.........
开发者ID:huxiaoqian,项目名称:user_portrait_ending2,代码行数:103,代码来源:flow_information.py

示例11: get_flow_information

# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def get_flow_information(uid_list):
    # 每天更新,只计算前一天的数据
    result_dict = {}
    now_ts = time.time()-3600*24
    now_date = ts2datetime(now_ts) # date: 2013-09-01
    #now_date = "2013-09-08"

    hashtag_results = {}
    geo_results = {}
    #ts = datetime2ts(now_date)
    user_hashtag_result = {}
    user_sensitive_hashtag = {}
    sensitive_words = {}
    user_ip_result = {}
    user_sensitive_ip = {}
    for i in range(1,8):
        ts = ts - 3600*24
        date = ts2datetime(ts).replace('-','')
        hashtag_results = r_cluster.hmget('hashtag_'+str(date), uid_list)
        sensitive_hashtag = r_cluster.hmget('sensitive_hashtag_'+str(date), uid_list)
        ip_results = r_cluster.hmget('ip_'+str(date), uid_list)
        sensitive_ip = r_cluster.hmget('sensitive_ip_'+str(date), uid_list)
        sensitive_results = r_cluster.hmget('sensitive_'+str(date), uid_list)

        for j in range(0, len(uid_list)):
            uid = uid_list[j]
            if sensitive_results[j]:
                sensitive_words_results = json.loads(sensitive_results[j])
                if sensitive_words.has_key(uid):
                    sensitive_words[uid].update({date: sensitive_words_results})
                else:
                    sensitive_words[uid] = {date: sensitive_words_results}
            if hashtag_results[j]:
                hashtag_dict = json.loads(hashtag_results[j])
                if user_hashtag_result.has_key(uid):
                    user_hashtag_result[uid].update({date: hashtag_dict})
                else:
                    user_hashtag_result[uid] = {date: hashtag_dict}
            if sensitive_hashtag[j]:
                sensitive_hashtag_dict = json.loads(sensitive_hashtag[j])
                if user_sensitive_hashtag.has_key(uid):
                    user_sensitive_hashtag[uid].update({date: sensitive_hashtag_dict})
                else:
                    user_sensitive_hashtag[uid] = {date: sensitive_hashtag_dict}
            if ip_results[j]:
                ip_dict = json.loads(ip_results[j])
                if user_ip_result.has_key(uid):
                    user_ip_result[uid].update({date: ip_dict})
                else:
                    user_ip_result[uid] = {date: ip_dict}
            if sensitive_ip[j]:
                sensitive_ip_result = json.loads(sensitive_ip[j])
                if user_sensitive_ip.has_key(uid):
                    user_sensitive_ip[uid].update({date: sensitive_ip_result})
                else:
                    user_sensitive_ip[uid] = {date: sensitive_ip_result}


    for uid in uid_list:
        hashtag_string = ''
        sensitive_hashtag_string = ''
        ip_string = ''
        ip_all = ""
        sensitive_ip_string = ''
        hashtag_dict = {}
        sensitive_hashtag_dict = {}
        ip_dict = {}
        sensitive_ip_dict = {}
        sensitive_words_string = ''
        sensitive_words_dict = {}

        if sensitive_words.has_key(uid):
            sensitive_words_string = extract_string(sensitive_words[uid])
            sensitive_words_dict = json.dumps(sensitive_words[uid])
        if user_hashtag_result.has_key(uid):
            hashtag_string = extract_string(user_hashtag_result[uid])
            hashtag_dict = json.dumps(user_hashtag_result[uid])
        if user_sensitive_hashtag.has_key(uid):
            sensitive_hashtag_string = extract_string(user_sensitive_hashtag[uid])
            sensitive_hashtag_dict = json.dumps(user_sensitive_hashtag[uid])
        if user_ip_result.has_key(uid):
            ip_string = extract_geo(user_ip_result[uid])
            ip_dict = json.dumps(ip_to_geo(user_ip_result[uid]))
            ip_all = json.dumps(user_ip_result[uid])
        if user_sensitive_ip.has_key(uid):
            sensitive_ip_string = extract_geo(user_sensitive_ip[uid])
            sensitive_ip_dict = json.dumps(ip_to_geo(user_sensitive_ip[uid]))

        result_dict[uid] = {"hashtag_string": hashtag_string, "hashtag_dict": hashtag_dict, \
                            "sensitive_hashtag_string": sensitive_hashtag_string, "sensitive_hashtag_dict": sensitive_hashtag_dict, \
                            "geo_activity": ip_dict, "geo_string": ip_string, 'ip': ip_all, \
                             "sensitive_geo_activity": sensitive_ip_dict, "sensitive_geo_string":sensitive_ip_string, \
                             'sensitive_words_string': sensitive_words_string, 'sensitive_words_dict': sensitive_words_dict}
    return result_dict
开发者ID:jianjian0dandan,项目名称:sensitive_user_portrait,代码行数:96,代码来源:attribute_from_flow.py

示例12: zip

# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
   uhlist = zip(uidlist, hashtag_list)
   uhtlist = []
   for uh in uhlist:
       uh = list(uh)
       uh.append(ts)
       uhtlist.append(uh)
   data.extend(uhtlist)

with open("hashtag_0521.txt", "w") as fw:
    for d in data:
        if d[1] != None:
            fw.write("%s\n" % json.dumps(d))

at_data = []
for ts in tss:
   ns = "at_" + str(ts)
   hashtag_list = R_CLUSTER_FLOW2.hmget(ns, uidlist)
   hashtag_list = [json.loads(h) if h else None for h in hashtag_list]
   uhlist = zip(uidlist, hashtag_list)
   uhtlist = []
   for uh in uhlist:
       uh = list(uh)
       uh.append(ts)
       uhtlist.append(uh)
   at_data.extend(uhtlist)

with open("at_0521.txt", "w") as fw:
    for a in at_data:
        if a[1] != None:
            fw.write("%s\n" % json.dumps(a))
开发者ID:SwoJa,项目名称:ruman,代码行数:32,代码来源:extract_at_data.py

示例13: get_flow_information_v2

# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def get_flow_information_v2(uid_list, all_user_keywords_dict):
    results = {}      
    #results = {uid:{'hashtag_dict':{},'hashtag':'', 'keywords_dict':{}, 'keywords_string':'', 'activity_geo':'', 'activity_geo_dict':dict, 'activity_geo_aggs':''}}
    iter_results = {} # iter_results = {uid:{'hashtag': hashtag_dict, 'geo':geo_dict, 'keywords':keywords_dict}}
    now_ts = time.time()
    #run_type
    if RUN_TYPE == 1:
        now_date_ts = datetime2ts(ts2datetime(now_ts))
    else:
        now_date_ts = test_ts
    for i in range(WEEK,0,-1):
        ts = now_date_ts - DAY*i
        uid_day_geo = {}
        #compute hashtag and geo
        hashtag_results = r_cluster.hmget('hashtag_'+str(ts), uid_list)
        ip_results = r_cluster.hmget('new_ip_'+str(ts), uid_list)
        count = 0 
        for uid in uid_list:
            #init iter_results[uid]
            if uid not in iter_results:
                iter_results[uid] = {'hashtag':{}, 'geo':{},'geo_track':[],'keywords':{}}
            #compute hashtag
            hashtag_item = hashtag_results[count]
            if hashtag_item:
                uid_hashtag_dict = json.loads(hashtag_item)
            else:
                uid_hashtag_dict = {}
            for hashtag in uid_hashtag_dict:
                try:
                    iter_results[uid]['hashtag'][hashtag] += uid_hashtag_dict[hashtag]
                except:
                    iter_results[uid]['hashtag'][hashtag] = uid_hashtag_dict[hashtag]
            #compute geo
            uid_day_geo[uid] = {}
            ip_item = ip_results[count]
            if ip_item:
                uid_ip_dict = json.loads(ip_item)
            else:
                uid_ip_dict = {}
            for ip in uid_ip_dict:
                ip_count = len(uid_ip_dict[ip].split('&'))
                geo = ip2city(ip)
                if geo:
                    try:
                        iter_results[uid]['geo'][geo] += ip_count
                    except:
                        iter_results[uid]['geo'][geo] = ip_count
                    try:
                        uid_day_geo[uid][geo] += ip_count
                    except:
                        uid_day_geo[uid][geo] = ip_count
            iter_results[uid]['geo_track'].append(uid_day_geo[uid])
            count += 1
               
    #get keywords top
    for uid in uid_list:
        results[uid] = {}
        hashtag_dict = iter_results[uid]['hashtag']
        results[uid]['hashtag_dict'] = json.dumps(hashtag_dict)
        results[uid]['hashtag'] = '&'.join(hashtag_dict.keys())
        geo_dict = iter_results[uid]['geo']
        geo_track_list = iter_results[uid]['geo_track']
        results[uid]['activity_geo_dict'] = json.dumps(geo_track_list)
        geo_dict_keys = geo_dict.keys()
        results[uid]['activity_geo'] = '&'.join(['&'.join(item.split('\t')) for item in geo_dict_keys])
        try:
            results[uid]['activity_geo_aggs'] = '&'.join([item.split('\t')[-1] for item in geo_dict_keys])
        except:
            results[uid]['activity_geo_aggs'] = ''

        keywords_dict = all_user_keywords_dict[uid]
        keywords_top50 = sorted(keywords_dict.items(), key=lambda x:x[1], reverse=True)[:50]
        keywords_top50_string = '&'.join([keyword_item[0] for keyword_item in keywords_top50])
        results[uid]['keywords'] = json.dumps(keywords_top50)
        results[uid]['keywords_string'] = keywords_top50_string
        
    return results
开发者ID:huxiaoqian,项目名称:user_portrait,代码行数:79,代码来源:flow_information.py


注:本文中的global_utils.R_CLUSTER_FLOW2.hmget方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。