当前位置: 首页>>代码示例>>Python>>正文


Python global_utils.R_CLUSTER_FLOW2类代码示例

本文整理汇总了Python中global_utils.R_CLUSTER_FLOW2的典型用法代码示例。如果您正苦于以下问题:Python R_CLUSTER_FLOW2类的具体用法?Python R_CLUSTER_FLOW2怎么用?Python R_CLUSTER_FLOW2使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了R_CLUSTER_FLOW2类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: cal_text_sensitive

def cal_text_sensitive(item):
    text = item['text']
    uid = item['uid']
    timestamp = item['timestamp']
    date = ts2datetime(timestamp)
    ts = datetime2ts(date)
    if isinstance(text, str):
        text = text.decode('utf-8', 'ignore')
    sensitive_result = [word for word in SENSITIVE_WORD if word in text]
    if sensitive_result:
        sensitive_dict = dict()
        for word in sensitive_result:
            try:
                sensitive_dict[word] += 1
            except:
                sensitive_dict[word] = 1
        try:
            sensitive_count_string = r_cluster.hget('sensitive_'+str(ts), str(uid))
            sensitive_count_dict = json.loads(sensitive_count_string)
            for word in sensitive_dict:
                count = sensitive_dict[word]
                try:
                    sensitive_count_dict[word] += count
                except:
                    sensitive_count_dict[word] = count
            r_cluster.hset('sensitive_'+str(ts), str(uid), json.dumps(sensitive_count_dict))
        except:
            r_cluster.hset('sensitive_'+str(ts), str(uid), json.dumps(sensitive_dict))
开发者ID:lcwy220,项目名称:deployed_user_portrait,代码行数:28,代码来源:zmq_work_weibo_flow4.py

示例2: cal_text_work

def cal_text_work(item):
    uid = item['uid']
    timestamp = item['timestamp']
    date = ts2datetime(timestamp)
    ts = datetime2ts(date)
    text = item['text']
    if isinstance(text, str):
        text = text.decode('utf-8', 'ignore')
    RE = re.compile(u'#([a-zA-Z-_⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]+)#', re.UNICODE)
    hashtag_list = RE.findall(text)
    if hashtag_list:
        # there all use unicode·
        hashtag_dict = dict()
        for hashtag in hashtag_list:
            try:
                hashtag_dict[hashtag] += 1
            except:
                hashtag_dict[hashtag] = 1
        try:
            hashtag_count_string = r_cluster.hget('hashtag_'+str(ts), str(uid))
            hashtag_count_dict = json.loads(hashtag_count_string)
            for hashtag in hashtag_dict:
                count = hashtag_dict[hashtag]
                try:
                    hashtag_count_dict[hashtag] += count
                except:
                    hashtag_count_dict[hashtag] = count
            r_cluster.hset('hashtag_'+str(ts), str(uid), json.dumps(hashtag_count_dict))
        except:
            r_cluster.hset('hashtag_'+str(ts), str(uid), json.dumps(hashtag_dict))
开发者ID:ferrero-zhang,项目名称:user_portrait_0324,代码行数:30,代码来源:zmq_work_weibo_flow4.py

示例3: save_city

def save_city(uid, ip, timestamp, sensitive):
    ts = ts2datetime(timestamp).replace('-','')
    key = str(uid)
    try:
        if sensitive:
            ip_count_string = r_cluster.hget('sensitive_ip_'+str(ts), str(uid))
        else:
            ip_count_string = r_cluster.hget('ip_'+str(ts), str(uid))

        ip_count_dict = json.loads(ip_count_string)

        try:
            ip_count_dict[str(ip)] += 1
        except:
            ip_count_dict[str(ip)] = 1

        if sensitive:
            r_cluster.hset('sensitive_ip_'+str(ts), str(uid), json.dumps(ip_count_dict))
        else:
            r_cluster.hset('ip_'+str(ts), str(uid), json.dumps(ip_count_dict))

    except:
        if sensitive:
            r_cluster.hset('sensitive_ip_'+str(ts), str(uid), json.dumps({str(ip):1}))
        else:
            r_cluster.hset('ip_'+str(ts), str(uid), json.dumps({str(ip):1}))
开发者ID:huxiaoqian,项目名称:sensitive_user_portrait,代码行数:26,代码来源:test_save_attribute.py

示例4: cal_sensitive_words_work

def cal_sensitive_words_work(item, sw_list):
    timestamp = item['timestamp']
    uid = item['uid']
    timestamp = ts2datetime(timestamp).replace('-','')
    ts = timestamp
    map = {}
    for w in sw_list:
        word = "".join([chr(x) for x in w])
        word = word.decode('utf-8')
        if not map.__contains__(word):
            map[word] = 1
        else:
            map[word] += 1
    try:
        sensitive_count_string = r_cluster.hget('sensitive_'+str(ts), str(uid))
        sensitive_count_dict = json.loads(sensitive_count_string)
        for word in map:
            count = map[word]
            if sensitive_count_dict.__contains__(word):
                sensitive_count_dict[word] += count
            else:
                sensitive_count_dict[word] = count
        r_cluster.hset('sensitive_'+str(ts), str(uid), json.dumps(sensitive_count_dict))
    except:
        r_cluster.hset('sensitive_'+str(ts), str(uid), json.dumps(map))
开发者ID:huxiaoqian,项目名称:sensitive_user_portrait,代码行数:25,代码来源:zmq_work_weibo_flow3.py

示例5: cal_class_ratio

def cal_class_ratio():
    ratio_results = {}
    date = '2013-09-07'
    ts = datetime2ts(date)
    scan_count = 0
    scan_cursor = 0
    all_count = 0
    while 1:
        if scan_count == 1000000:
            break
        results = r_cluster.hscan('activity_'+str(ts), scan_cursor, count=1000)
        scan_cursor = results[0]
        scan_count += 1000
        for uid in results[1]:
            activity_dict_string = r_cluster.hget('activity_'+str(ts), uid)
            activity_dict = json.loads(activity_dict_string)
            weibo_count = 0
            for time_seg in activity_dict:
                weibo_count += int(activity_dict[time_seg])
            if weibo_count >= 6:
                indic_3 = '1'
            else:
                indic_3 = '0'
            retweet_results = r.hgetall('retweet_'+str(uid))
            retweet_count = len(retweet_results)
            if retweet_count >= 8:
                indic_1 = '1'
            else:
                indic_1 = '0'
            be_retweet_results = r.hgetall('be_retweet_'+str(uid))
            be_retweet_count = len(be_retweet_results)
            #print 'be_retweet_count:', be_retweet_count
            if be_retweet_count >= 9:
                indic_2 = '1'
            else:
                indic_2 = '0'
            #print 'indic_2:', indic_2
            key = indic_1 + indic_2 + indic_3
            try:
                ratio_results[key] += 1
            except:
                ratio_results[key] = 1
            # write eight type users
            '''
            if key=='001':
                writer1.writerow([uid, retweet_count, be_retweet_count, weibo_count])
            elif key=='111':
                writer2.writerow([uid, retweet_count, be_retweet_count, weibo_count])
            elif key=='101':
                writer3.writerow([uid, retweet_count, be_retweet_count, weibo_count])
            elif key=='011':
                writer4.writerow([uid, retweet_count, be_retweet_count, weibo_count])
            elif key=='110':
                writer5.writerow([uid, retweet_count, be_retweet_count, weibo_count])
            if key=='010':
                writer6.writerow([uid, retweet_count, be_retweet_count, weibo_count])
            '''
    print 'ratio_results:', ratio_results
开发者ID:ferrero-zhang,项目名称:user_portrait_0324,代码行数:58,代码来源:test_recommentation.py

示例6: save_activity

def save_activity(uid, ts, time_segment):
    key = str(ts)
    try:
        activity_count_dict = r_cluster.hget('activity_' + key, str(uid))
        activity_count_dict = json.loads(activity_count_dict)
        try:
            activity_count_dict[str(time_segment)] += 1
        except:
            activity_count_dict[str(time_segment)] = 1
        r_cluster.hset('activity_' + key, str(uid), json.dumps(activity_count_dict))
    except:
        r_cluster.hset('activity_' + key, str(uid), json.dumps({str(time_segment): 1}))
开发者ID:taozhiiq,项目名称:user_portrait,代码行数:12,代码来源:test_save_attribute.py

示例7: save_at

def save_at(uid, at_uid, timestamp):
    date = ts2datetime(timestamp)
    ts = datetime2ts(date)
    key = str(uid)
    try:
        ruid_count_string = r_cluster.hget('at_'+str(ts), str(uid))
        ruid_count_dict = json.loads(ruid_count_string)
        try:
            ruid_count_dict[str(at_uid)] += 1
        except:
            ruid_count_dict[str(at_uid)] = 1
        r_cluster.hset('at_'+str(ts), str(uid), json.dumps(ruid_count_dict))
    except:
        r_cluster.hset('at_'+str(ts), str(uid), json.dumps({str(at_uid):1}))
开发者ID:taozhiiq,项目名称:user_portrait,代码行数:14,代码来源:test_save_attribute.py

示例8: save_city

def save_city(uid, ip, timestamp):
    date = ts2datetime(timestamp)
    ts = datetime2ts(date)
    key = str(uid)
    try:
        ip_count_string = r_cluster.hget('ip_'+str(ts), str(uid))
        ip_count_dict = json.loads(ip_count_string)
        try:
            ip_count_dict[str(ip)] += 1
        except:
            ip_count_dict[str(ip)] = 1
        r_cluster.hset('ip_'+str(ts), str(uid), json.dumps(ip_count_dict))
    except:
        r_cluster.hset('ip_'+str(ts), str(uid), json.dumps({str(ip):1}))
开发者ID:taozhiiq,项目名称:user_portrait,代码行数:14,代码来源:test_save_attribute.py

示例9: save_city_timestamp

def save_city_timestamp(uid, ip, timestamp):
    date = ts2datetime(timestamp)
    ts = datetime2ts(date)
    try:
        ip_timestamp_string = r_cluster.hget('new_ip_'+str(ts), str(uid))
        ip_timestamp_string_dict = json.loads(ip_timestamp_string)
        try:
            add_string = '&'+str(timestamp)
            ip_timestamp_string_dict[str(ip)] += add_string
        except:
            ip_timestamp_string_dict[str(ip)] = str(timestamp)
        r_cluster.hset('new_ip_'+str(ts), str(uid), json.dumps(ip_timestamp_string_dict))

    except:
        r_cluster.hset('new_ip_'+str(ts), str(uid), json.dumps({str(ip): str(timestamp)}))
开发者ID:ferrero-zhang,项目名称:user_portrait_0324,代码行数:15,代码来源:test_save_attribute.py

示例10: get_attr_geo_track

def get_attr_geo_track(uid_list):
    date_results = [] # results = {'2013-09-01':[(geo1, count1), (geo2, track2)], '2013-09-02'...} 7day
    now_ts = time.time()
    now_date = ts2datetime(now_ts)
    #test
    now_date = '2013-09-08'
    ts = datetime2ts(now_date)
    for i in range(7, 0, -1):
        timestamp = ts - i*24*3600
        #print 'timestamp:', ts2datetime(timestamp)
        ip_dict = dict()
        results = r_cluster.hmget('ip_'+str(timestamp), uid_list)
        #print 'results:',results
        for item in results:
            if item:
                item_dict = json.loads(item)
                #print 'item_dict:', item_dict
                for ip_item in item_dict:
                    try:
                        ip_dict[ip_item] += item_dict[ip_item]
                    except:
                        ip_dict[ip_item] = item_dict[ip_item]
        geo_dict = ip2geo(ip_dict)
        sort_geo_dict = sorted(geo_dict.items(), key=lambda x:x[1], reverse=True)
        date_key = ts2datetime(timestamp)
        date_results.append([date_key, sort_geo_dict[:2]])
    #print 'results:', date_results
    return {'geo_track': json.dumps(date_results)}
开发者ID:taozhiiq,项目名称:user_portrait,代码行数:28,代码来源:cron_group.py

示例11: update_day_hashtag

def update_day_hashtag(uid_list):
    results = {}
    all_results = {}
    now_ts = time.time()
    #run_type
    if RUN_TYPE == 1:
        now_date_ts = datetime2ts(ts2datetime(now_ts))
    else:
        now_date_ts = test_ts

    for i in range(WEEK,0,-1):
        ts = now_date_ts - DAY*i
        count = 0
        hashtag_results = r_cluster.hmget('hashtag_'+str(ts), uid_list)
        for uid in uid_list:
            if uid not in results:
                results[uid] = {}
            hashtag_item = hashtag_results[count]
            if hashtag_item:
                hashtag_dict = json.loads(hashtag_item)
            else:
                hashtag_dict = {}
            for hashtag in hashtag_dict:
                try:
                    results[uid][hashtag] += 1
                except:
                    results[uid][hashtag] = 1
    for uid in uid_list:
        user_hashtag_dict = results[uid]
        hashtag_string = '&'.join(user_hashtag_dict.keys())
        all_results[uid] = {'hashtag': hashtag_string, 'hashtag_dict':user_hashtag_dict}
    return all_results
开发者ID:SwoJa,项目名称:ruman,代码行数:32,代码来源:update_day.py

示例12: filter_activity

def filter_activity(user_set):
    results = []
    now_date = ts2datetime(time.time())
    # test
    now_date = '2013-09-08'
    ts = datetime2ts(now_date) - 24*3600
    date = ts2datetime(ts)
    #print 'date:', date
    timestamp = datetime2ts(date)
    for user in user_set:
        over_count = 0
        for i in range(0,7):
            ts = timestamp - 3600*24*i
            result = r_cluster.hget('activity_'+str(ts), str(user))
            if result:
                items_dict = json.loads(result)
                for item in items_dict:
                    weibo_count = items_dict[item]
                    if weibo_count > activity_threshold:
                        over_count += 1
        if over_count == 0:
            results.append(user)
        else:
            writer.writerow([user, 'activity'])
            
    print 'after filter activity:', len(results)    
    return results
开发者ID:taozhiiq,项目名称:user_portrait,代码行数:27,代码来源:filter_rules.py

示例13: filter_activity

def filter_activity(user_set):
    results = []
    #run_type
    if RUN_TYPE == 1:
        now_date = ts2datetime(time.time())
    else:
        now_date = RUN_TEST_TIME
    ts = datetime2ts(now_date) - DAY
    date = ts2datetime(ts)
    timestamp = datetime2ts(date)
    for user in user_set:
        over_count = 0
        for i in range(0,7):
            ts = timestamp - DAY*i
            result = r_cluster.hget('activity_'+str(ts), str(user))
            if result:
                items_dict = json.loads(result)
                for item in items_dict:
                    weibo_count = items_dict[item]
                    if weibo_count > activity_threshold:
                        over_count += 1
        if over_count == 0:
            results.append(user)
        else:
            writer.writerow([user, 'activity'])
            
    return results
开发者ID:huxiaoqian,项目名称:revised_user_portrait,代码行数:27,代码来源:filter_rules.py

示例14: filter_activity

def filter_activity(user_set):
    results = []
    now_date = ts2datetime(time.time())
    now_date = '2013-09-08'
    ts = datetime2ts(now_date) - 24*3600
    date = ts2datetime(ts)
    timestamp = datetime2ts(date)
    ts = ts.replace('-','')
    for user in user_set:
        over_count = 0
        for i in range(0,7):
            ts = timestamp - 3600*24*i
            result = r_cluster.hget('activity_'+str(ts), str(user))
            if result:
                item_dict = json.loads(result)
                sorted_dict = sorted(item_dict.iteritems(), key=lambda asd:asd[1], reverse=True)
                if sorted_dict[0][1] > activity_threshold:
                    over_count = 1
        if over_count == 0:
            results.append(user)
        else:
            writer.writerow([user, 'activity'])

    print 'after filter activity: ', len(results)
    return results
开发者ID:huxiaoqian,项目名称:sensitive_user_portrait,代码行数:25,代码来源:filter_rules.py

示例15: cal_hashtag_work

def cal_hashtag_work(item, sensitive):
    text = item['text']
    uid = item['uid']
    timestamp = item['timestamp']
    ts = ts2datetime(timestamp).replace('-','')

    if isinstance(text, str):
        text = text.decode('utf-8', 'ignore')
    RE = re.compile(u'#([a-zA-Z-_⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]+)#', re.UNICODE)
    hashtag_list = RE.findall(text)
    if hashtag_list:
        hashtag_dict = {}
        for hashtag in hashtag_list:
            try:
                hashtag_dict[hashtag] += 1
            except:
                hashtag_dict[hashtag] = 1

        try:
            if sensitive:
                hashtag_count_string = r_cluster.hget('sensitive_hashtag_'+str(ts), str(uid))
            else:
                hashtag_count_string = r_cluster.hget('hashtag_'+str(ts), str(uid))
            hashtag_count_dict = json.loads(hashtag_count_string)
            for hashtag in hashtag_dict:
                count = hashtag_dict[hashtag]
                try:
                    hashtag_count_dict[hashtag] += count
                except:
                    hashtag_count_dict[hashtag] = count
            if sensitive:
                r_cluster.hset('sensitive_hashtag_'+str(ts), str(uid), json.dumps(hashtag_count_dict))
            else:
                r_cluster.hset('hashtag_'+str(ts), str(uid), json.dumps(hashtag_count_dict))
        except:
            if sensitive:
                r_cluster.hset('sensitive_hashtag_'+str(ts), str(uid), json.dumps(hashtag_dict))
            else:
                r_cluster.hset('hashtag_'+str(ts), str(uid), json.dumps(hashtag_dict))
开发者ID:huxiaoqian,项目名称:sensitive_user_portrait,代码行数:39,代码来源:zmq_work_weibo_flow3.py


注:本文中的global_utils.R_CLUSTER_FLOW2类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。