本文整理汇总了Python中global_utils.R_CLUSTER_FLOW2.hscan方法的典型用法代码示例。如果您正苦于以下问题:Python R_CLUSTER_FLOW2.hscan方法的具体用法?Python R_CLUSTER_FLOW2.hscan怎么用?Python R_CLUSTER_FLOW2.hscan使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类global_utils.R_CLUSTER_FLOW2
的用法示例。
在下文中一共展示了R_CLUSTER_FLOW2.hscan方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: cal_class_ratio
# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hscan [as 别名]
def cal_class_ratio():
ratio_results = {}
date = '2013-09-07'
ts = datetime2ts(date)
scan_count = 0
scan_cursor = 0
all_count = 0
while 1:
if scan_count == 1000000:
break
results = r_cluster.hscan('activity_'+str(ts), scan_cursor, count=1000)
scan_cursor = results[0]
scan_count += 1000
for uid in results[1]:
activity_dict_string = r_cluster.hget('activity_'+str(ts), uid)
activity_dict = json.loads(activity_dict_string)
weibo_count = 0
for time_seg in activity_dict:
weibo_count += int(activity_dict[time_seg])
if weibo_count >= 6:
indic_3 = '1'
else:
indic_3 = '0'
retweet_results = r.hgetall('retweet_'+str(uid))
retweet_count = len(retweet_results)
if retweet_count >= 8:
indic_1 = '1'
else:
indic_1 = '0'
be_retweet_results = r.hgetall('be_retweet_'+str(uid))
be_retweet_count = len(be_retweet_results)
#print 'be_retweet_count:', be_retweet_count
if be_retweet_count >= 9:
indic_2 = '1'
else:
indic_2 = '0'
#print 'indic_2:', indic_2
key = indic_1 + indic_2 + indic_3
try:
ratio_results[key] += 1
except:
ratio_results[key] = 1
# write eight type users
'''
if key=='001':
writer1.writerow([uid, retweet_count, be_retweet_count, weibo_count])
elif key=='111':
writer2.writerow([uid, retweet_count, be_retweet_count, weibo_count])
elif key=='101':
writer3.writerow([uid, retweet_count, be_retweet_count, weibo_count])
elif key=='011':
writer4.writerow([uid, retweet_count, be_retweet_count, weibo_count])
elif key=='110':
writer5.writerow([uid, retweet_count, be_retweet_count, weibo_count])
if key=='010':
writer6.writerow([uid, retweet_count, be_retweet_count, weibo_count])
'''
print 'ratio_results:', ratio_results
示例2: cal_ave_weibo
# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hscan [as 别名]
def cal_ave_weibo():
date = '2013-09-07'
timestamp = datetime2ts(date)
scan_count = 0
scan_cursor = 0
all_count = 0
while 1:
if scan_count == 1000000:
break
results = r_cluster.hscan('activity_'+str(timestamp), scan_cursor, count=1000)
scan_cursor = results[0]
scan_count += 1000
for uid in results[1]:
for i in range(0,1):
ts = timestamp - 24*3600*i
activity_dict_string = r_cluster.hget('activity_'+str(ts), uid)
if activity_dict_string:
activity_dict = json.loads(activity_dict_string)
weibo_count = 0
for time_seg in activity_dict:
weibo_count += int(activity_dict[time_seg])
all_count += weibo_count
ave_count = float(all_count) / scan_count
print 'ave_count:', ave_count
示例3: main
# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hscan [as 别名]
def main():
if RUN_TYPE:
now_ts = time.time()-DAY # 前一天
ts = str(datetime2ts(ts2datetime(now_ts)))
else:
ts = str(datetime2ts('2013-09-07'))
now_ts = int(ts)
sensitive_string = "sensitive_" + ts
date_string = ts
update_sensitive_key = "sensitive_score_" + ts # 更新的键
sensitive_dict_key = "sensitive_dict_" + ts
sensitive_string_key = "sensitive_string_" + ts
sensitive_day_change_key = "sensitive_" + ts +"_day_change"
del_month = datetime2ts(ts2datetime(now_ts - MONTH))
del_sensitive_key = "sensitive_score_"+str(del_month) # 要删除的键
former_ts = int(ts) - DAY
former_date = str(datetime2ts(ts2datetime(former_ts)))
former_sensitive_key = "sensitive_score_" + former_date
iter_count = 0
bulk_action = []
mappings(ES_SENSITIVE_INDEX)
total_number = r.hlen(sensitive_string)
scan_cursor = 0
print total_number
while 1:
re_scan = r.hscan(sensitive_string, scan_cursor, count=1000)
scan_cursor = re_scan[0]
if len(re_scan[1]) != 0:
sensitive_info = re_scan[1] # 字典形式,uid:sensitive_words_dict
uid_list = sensitive_info.keys()
sensitive_results = es.mget(index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX, body={"ids":uid_list})['docs']
if sensitive_results:
for item in sensitive_results:
uid = item['_id']
sensitive_words_dict = json.loads(sensitive_info[uid]) # json.loads
current_sensitive_score = 0
for k,v in sensitive_words_dict.iteritems():
tmp_stage = r_sensitive.hget("sensitive_words", k)
if tmp_stage:
current_sensitive_score += v*sensitive_score_dict[str(tmp_stage)]
if item['found']: # 之前存在相关信息
revise_item = item["_source"]
if del_sensitive_key in revise_item:
item.pop(del_sensitive_key)
revise_item['uid'] = uid
# 新更新的敏感度
revise_item[update_sensitive_key] = current_sensitive_score
# 新更新的敏感词
revise_item[sensitive_dict_key] = sensitive_info[uid]
# 新更新的string
revise_item[sensitive_string_key] = "&".join(sensitive_words_dict.keys())
# 当天和之前一天、一周和一月均值的差异
revise_item['sensitive_day_change'] = current_sensitive_score - revise_item.get(former_sensitive_key, 0)
revise_item['sensitive_week_change'] = current_sensitive_score - revise_item.get('sensitive_week_ave', 0)
revise_item['sensitive_month_change'] = current_sensitive_score - revise_item.get('sensitive_month_ave', 0)
# 更新后week、month的均值和方差
revise_item['sensitive_week_ave'], revise_item['sensitive_week_var'], revise_item['sensitive_week_sum'] = compute_week(revise_item, now_ts)
revise_item['senstiive_month_ave'], revise_item['sensitive_month_var'], revise_item['sensitive_month_sum'] = compute_month(revise_item, now_ts)
else:
revise_item = dict()
revise_item['uid'] = uid
revise_item[update_sensitive_key] = current_sensitive_score
revise_item[sensitive_dict_key] = sensitive_info[uid]
revise_item[sensitive_string_key] = "&".join(sensitive_words_dict.keys())
revise_item['sensitive_day_change'] = current_sensitive_score
revise_item['sensitive_week_change'] = current_sensitive_score
revise_item['sensitive_month_change'] = current_sensitive_score
revise_item['sensitive_week_ave'], revise_item['sensitive_week_var'], revise_item['sensitive_week_sum'] = compute_week(revise_item, now_ts)
revise_item['senstiive_month_ave'], revise_item['sensitive_month_var'], revise_item['sensitive_month_sum'] = compute_month(revise_item, now_ts)
action = {'index':{'_id': uid}}
bulk_action.extend([action, revise_item])
iter_count += 1
if iter_count % 1000 == 0:
es.bulk(bulk_action, index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX)
bulk_action = []
print iter_count
if int(scan_cursor) == 0:
break
if bulk_action:
es.bulk(bulk_action, index=ES_SENSITIVE_INDEX, doc_type=DOCTYPE_SENSITIVE_INDEX)