本文整理汇总了Python中global_utils.R_CLUSTER_FLOW2.hmget方法的典型用法代码示例。如果您正苦于以下问题:Python R_CLUSTER_FLOW2.hmget方法的具体用法?Python R_CLUSTER_FLOW2.hmget怎么用?Python R_CLUSTER_FLOW2.hmget使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类global_utils.R_CLUSTER_FLOW2
的用法示例。
在下文中一共展示了R_CLUSTER_FLOW2.hmget方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_attr_geo_track
# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def get_attr_geo_track(uid_list):
date_results = [] # results = {'2013-09-01':[(geo1, count1), (geo2, track2)], '2013-09-02'...} 7day
now_ts = time.time()
now_date = ts2datetime(now_ts)
#test
now_date = '2013-09-08'
ts = datetime2ts(now_date)
for i in range(7, 0, -1):
timestamp = ts - i*24*3600
#print 'timestamp:', ts2datetime(timestamp)
ip_dict = dict()
results = r_cluster.hmget('ip_'+str(timestamp), uid_list)
#print 'results:',results
for item in results:
if item:
item_dict = json.loads(item)
#print 'item_dict:', item_dict
for ip_item in item_dict:
try:
ip_dict[ip_item] += item_dict[ip_item]
except:
ip_dict[ip_item] = item_dict[ip_item]
geo_dict = ip2geo(ip_dict)
sort_geo_dict = sorted(geo_dict.items(), key=lambda x:x[1], reverse=True)
date_key = ts2datetime(timestamp)
date_results.append([date_key, sort_geo_dict[:2]])
#print 'results:', date_results
return {'geo_track': json.dumps(date_results)}
示例2: update_day_hashtag
# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def update_day_hashtag(uid_list):
results = {}
all_results = {}
now_ts = time.time()
#run_type
if RUN_TYPE == 1:
now_date_ts = datetime2ts(ts2datetime(now_ts))
else:
now_date_ts = test_ts
for i in range(WEEK,0,-1):
ts = now_date_ts - DAY*i
count = 0
hashtag_results = r_cluster.hmget('hashtag_'+str(ts), uid_list)
for uid in uid_list:
if uid not in results:
results[uid] = {}
hashtag_item = hashtag_results[count]
if hashtag_item:
hashtag_dict = json.loads(hashtag_item)
else:
hashtag_dict = {}
for hashtag in hashtag_dict:
try:
results[uid][hashtag] += 1
except:
results[uid][hashtag] = 1
for uid in uid_list:
user_hashtag_dict = results[uid]
hashtag_string = '&'.join(user_hashtag_dict.keys())
all_results[uid] = {'hashtag': hashtag_string, 'hashtag_dict':user_hashtag_dict}
return all_results
示例3: update_day_geo
# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def update_day_geo(uid_list, user_info_list):
results = {}
now_ts = time.time()
#run_type
if RUN_TYPE == 1:
now_date_ts = datetime2ts(ts2datetime(now_ts))
else:
now_date_ts = test_ts
ip_results = r_cluster.hmget('new_ip_'+str(now_date_ts - DAY), uid_list)
count = 0
for uid in uid_list:
if uid not in results:
results[uid] = {'activity_geo':{}, 'activity_geo_dict':[]}
uid_ip_results = ip_results[count]
count += 1
if uid_ip_results:
uid_ip_dict = json.loads(uid_ip_results)
else:
uid_ip_dict = {}
day_results = {}
for ip in uid_ip_dict:
ip_count = len(uid_ip_dict[ip].split('&'))
geo, school = ip2city(ip)
if geo:
geo = geo.decode('utf-8')
try:
day_results[geo] += ip_count
except:
day_results[geo] = ip_count
#update the activity_geo_dict
activity_geo_history_list = json.loads(user_info_list[uid]['activity_geo_dict'])
activity_geo_history_list.append(day_results)
results[uid]['activity_geo_dict'] = json.dumps(activity_geo_history_list[-30:])
#update the activity_geo
week_activity_geo_list = activity_geo_history_list[-7:]
week_geo_list = []
for activity_geo_item in week_activity_geo_list:
geo_list = activity_geo_item.keys()
week_geo_list.extend(geo_list)
week_geo_list = list(set(week_geo_list))
week_geo_string = '&'.join(['&'.join(item.split('\t')) for item in week_geo_list])
try:
week_geo_aggs_string = '&'.join([item.split('\t')[-1] for item in week_geo_list])
except:
week_geo_aggs_string = ''
results[uid]['activity_geo'] = week_geo_string
results[uid]['activity_geo_aggs'] = week_geo_aggs_string
#print 'update geo results:', results
return results
示例4: get_activity_time
# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def get_activity_time(uid_list):
results = {}
now_ts = time.time()
now_date = ts2datetime(now_ts)
#run_type
if RUN_TYPE == 1:
timestamp = datetime2ts(now_date)
else:
timestamp = datetime2ts(RUN_TEST_TIME)
activity_list_dict = {} # {uid:[activity_list], uid:[]}
for i in range(1,WEEK+1):
ts = timestamp - DAY*i
print ts
r_result = r_cluster.hmget('activity_'+str(ts), uid_list)
#print r_result
if r_result:
for j in range(0, len(uid_list)):
uid = uid_list[j]
if uid not in activity_list_dict:
activity_list_dict[uid] = [0 for i in range(0, 96)]
user_r_result = r_result[j]
if user_r_result:
user_activity_dict = json.loads(user_r_result)
for i in range(0, 96):
try:
count = user_activity_dict[str(i)]
except:
count = 0
activity_list_dict[uid].append(count)
for uid in uid_list:
activity_list = activity_list_dict[uid]
statusnum = sum(activity_list)
signal = np.array(activity_list)
fftResult = np.abs(np.fft.fft(signal))**2
n = signal.size
freq = np.fft.fftfreq(n, d=1)
i = 0
max_val = 0
max_freq = 0
for val in fftResult:
if val>max_val and freq[i]>0:
max_val = val
max_freq = freq[i]
i += 1
results[uid] = {'statusnum': statusnum, 'activity_time': math.log(max_freq + 1)}
return results
示例5: get_attr_trend
# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def get_attr_trend(uid_list):
result = {}
now_ts = time.time()
date = ts2datetime(now_ts - 24*3600)
timestamp = datetime2ts(date)
#test
timestamp = datetime2ts('2013-09-08')
time_result = dict()
segment_result = dict()
for i in range(1, 8):
ts = timestamp - i*24*3600
r_result = r_cluster.hmget('activity_'+str(ts), uid_list)
#print 'r_result:', r_result
for item in r_result:
if item:
item = json.loads(item)
for segment in item:
try:
time_result[int(segment)/16*15*60*16+ts] += item[segment]
except:
time_result[int(segment)/16*15*60*16+ts] = item[segment]
try:
segment_result[int(segment)/16*15*60*16] += item[segment]
except:
segment_result[int(segment)/16*15*60*16] = item[segment]
trend_list = []
for i in range(1, 8):
ts = timestamp - i*24*3600
for j in range(0, 6):
time_seg = ts + j*15*60*16
if time_seg in time_result:
trend_list.append((time_seg, time_result[time_seg]))
else:
trend_list.append((time_seg, 0))
sort_trend_list = sorted(trend_list, key=lambda x:x[0], reverse=False)
#print 'time_result:', time_result
#print 'trend_list:', trend_list
#print 'sort_trend_list:', sort_trend_list
result['activity_trend'] = json.dumps(sort_trend_list)
result['activity_time'] = json.dumps(segment_result)
return result
示例6: update_day_sensitive
# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def update_day_sensitive(uid_list):
results = {}
all_results = {}
now_ts = time.time()
#run_type
if RUN_TYPE == 1:
now_date_ts = datetime2ts(ts2datetime(now_ts))
else:
now_date_ts = test_ts
for i in range(WEEK,0,-1):
ts = now_date_ts - DAY*i
count = 0
sensitive_results = r_cluster.hmget('sensitive_'+str(ts), uid_list)
for uid in uid_list:
if uid not in results:
results[uid] = {}
sensitive_item = sensitive_results[count]
if sensitive_item:
sensitive_dict = json.loads(sensitive_item)
else:
sensitive_dict = {}
for sensitive in sensitive_dict:
try:
results[uid][sensitive] += 1
except:
results[uid][sensitive] = 1
for uid in uid_list:
user_sensitive_dict = results[uid]
sensitive_score = 0
for item in user_sensitive_dict:
k = item
v = user_sensitive_dict[k]
tmp_stage = r_sensitive.hget('sensitive_words', k)
if tmp_stage:
sensitive_score += v * sensitive_score_dict[str(tmp_stage)]
sensitive_string = '&'.join(user_sensitive_dict.keys())
all_results[uid] = {'sensitive_string': sensitive_string, 'sensitive_dict':user_sensitive_dict,\
'sensitive': sensitive_score}
return all_results
示例7: get_school
# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def get_school(uid_list):
now_ts = time.time()
now_date_ts = datetime2ts(ts2datetime(now_ts))
school_results = {}
for i in range(WEEK, 0, -1):
ts = now_date_ts - DAY * i
ip_results = r_cluster.hmget('new_ip_'+str(ts), uid_list)
count = 0
for uid in uid_list:
if uid not in school_results:
school_results[uid] = {}
ip_item = ip_results[count]
if ip_item:
uid_ip_dict = json.loads(ip_item)
else:
uid_ip_dict = {}
for ip in uid_ip_dict:
ip_count = len(uid_ip_dict[ip].split('&'))
school = ip2school(ip)
if school:
try:
school_results[uid][school] += ip_count
except:
school_results[uid][school] = ip_count
count += 1
results = {}
for uid in uid_list:
school_dict = school_results[uid]
school_string = '&'.join(school_dict.keys())
if school_dict != {}:
is_school = '1'
else:
is_school = '0'
results[uid] = {'is_school': is_school, 'school_string': school_string, 'school_dict': json.dumps(school_dict)}
return results
示例8: update_flow_information
# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def update_flow_information(user_info):
results = {} # results ={uid: {'activity_geo_dict':'', 'activity_geo':'', 'hashtag_dict':'', 'hashtag':'', 'online_pattern_dict':'', 'online_pattern':''}}
uid_list = user_info.keys()
now_ts = time.time()
now_date = ts2datetime(now_ts)
timestamp = datetime2ts(now_date)
#test
timestamp = datetime2ts('2013-09-08')
user_hashtag_dict = dict()
user_online_dict = dict()
ip_user_count_dict = {}
new_day_ip_dict = dict()
for i in range(7,0,-1):
ts = timestamp - 24*3600*i
print 'iter date:', ts2date(ts)
results = r_cluster.hmget('hashtag_'+str(ts), uid_list)
online_pattern_results = r_cluster.hmget('online_'+str(ts), uid_list)
if i==0:
ip_result = r_cluater.hmget('hashtag_'+str(ts), uid_list)
for j in range(0, len(uid_list)):
uid = uid_list[j]
#attr: hashtag
if results[j]:
hashtag_dict = json.loads(results[j])
for hashtag in hashtag_dict:
if uid in user_hashtag_dict:
try:
user_hashtag_dict[uid][hashtag] += hashtag_dict[hashtag]
except:
user_hashtag_dict[uid][hashtag] = hashtag_dict[hashtag]
else:
user_hashtag_dict[uid] = {hashtag: hashtag_dict[hashtag]}
'''
#attr: online_pattern
if online_pattern_results[j]:
online_pattern_dict = json.loads(online_pattern_results[j])
for online_pattern in online_pattern_dict:
if uid in user_online_dict:
try:
user_online_dict[uid][online_pattern] += online_pattern_dict[online_pattern]
except:
user_online_dict[uid][online_pattern] = online_pattern_dict[online_pattern]
else:
user_online_dict[uid] = {online_pattern: online_pattern_dict[online_pattern]}
'''
#attr: activity_geo by ip-timestamp
if i==0 and ip_result[j]:
ip_timestamp_dict = json.loads(ip_result[j])
old_flow_information = user_info[uid]
old_day_geo_list = json.loads(old_flow_information['activity_geo_dict'])
for ip in ip_timestamp_dict:
ip_count = len(ip_timestamp_dict[ip].split('&'))
new_day_ip_dict[uid][ip] = ip_count
geo_dict = ip2city(new_day_ip_dict[uid])
if len(old_day_geo_list)>=30:
new_day_geo_list = old_day_geo_list[1:].append(geo_dict)
else:
new_day_geo_list = old_day_geo_list.append(geo_dict)
week_geo_list = []
week_day_geo_list = new_day_geo[-7:]
for day_geo_dict in week_day_geo_list:
week_geo_list.extend(day_geo_dict.keys())
week_geo_list = list(set(week_geo_list))
activity_geo_string = ''
new_week_geo_list = []
for geo_string in week_geo_list:
day_geo_string = '&'.join(geo_string.split('\t'))
new_week_geo_list.append(day_geo_string)
activity_geo_string = '&'.join(new_week_geo_list)
print 'activity_geo_string:', activity_geo_string
for uid in uid_list:
#attr: hashtag
try:
hashtag_dict = user_hashtag_dict[uid]
hashtag_string = json.dumps(hashtag_dict)
hashtag_list = '&'.join(hashtag_dict.keys())
except KeyError:
hashtag_string = ''
hashtag_list = ''
'''
#attr: online_pattern
try:
online_dict = user_online_dict[uid]
online_string = json.dumps(online_dict)
online_list = '&'.join(online_dict.keys())
except KeyError:
online_string = ''
online_list = ''
'''
result[uid] = {'hashtag_dict':hashtag_string, 'hashtag':hashtag_list, \
'activity_geo_dict': json.loads(new_day_geo_list), 'activity_geo': activity_geo_string, \
'online_pattern_dict': online_pattern_string, 'online_pattern': online_pattern_list}
return result
示例9: get_flow_information
# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def get_flow_information(uid_list):
results = {}
#results = {uid:{'hashtag_dict':{},'hashtag':'', 'keywords_dict':{}, 'keywords_string':'', 'activity_geo':'', 'activity_geo_dict':dict}}
iter_results = {} # iter_results = {uid:{'hashtag': hashtag_dict, 'geo':geo_dict, 'keywords':keywords_dict}}
now_ts = time.time()
now_date_ts = datetime2ts(ts2datetime(now_ts))
#test
now_date_ts = test_ts
for i in range(7,0,-1):
ts = now_date_ts - DAY*i
iter_date = ts2datetime(ts)
flow_text_index_name = flow_text_index_name_pre + iter_date
uid_day_geo = {}
#compute hashtag and geo
hashtag_results = r_cluster_3.hmget('hashtag_'+str(ts), uid_list)
ip_results = r_cluster.hmget('new_ip_'+str(ts), uid_list)
#compute sensitive_words
sensitive_results = r_cluster_3.hmget('sensitive_'+str(ts), uid_list)
count = 0
for uid in uid_list:
#init iter_results[uid]
if uid not in iter_results:
iter_results[uid] = {'hashtag':{}, 'geo':{},'geo_track':[],'keywords':{}, 'sensitive':{}}
#compute hashtag
hashtag_item = hashtag_results[count]
if hashtag_item:
uid_hashtag_dict = json.loads(hashtag_item)
else:
uid_hashtag_dict = {}
for hashtag in uid_hashtag_dict:
try:
iter_results[uid]['hashtag'][hashtag] += uid_hashtag_dict[hashtag]
except:
iter_results[uid]['hashtag'][hashtag] = uid_hashtag_dict[hashtag]
#compute sensitive
sensitive_item = sensitive_results[count]
if sensitive_item:
uid_sensitive_dict = json.loads(sensitive_item)
else:
uid_sensitive_dict = {}
for sensitive_word in uid_sensitive_dict:
try:
iter_results[uid]['sensitive'][sensitive_word] += uid_sensitive_dict[sensitive_word]
except:
iter_results[uid]['sensitive'][sensitive_word] = uid_sensitive_dict[sensitive_word]
#compute geo
uid_day_geo[uid] = {}
ip_item = ip_results[count]
if ip_item:
uid_ip_dict = json.loads(ip_item)
else:
uid_ip_dict = {}
for ip in uid_ip_dict:
ip_count = len(uid_ip_dict[ip].split('&'))
geo = ip2city(ip)
if geo:
#print 'geo:', geo
try:
iter_results[uid]['geo'][geo] += ip_count
except:
iter_results[uid]['geo'][geo] = ip_count
try:
uid_day_geo[uid][geo] += ip_count
except:
uid_day_geo[uid][geo] = ip_count
iter_results[uid]['geo_track'].append(uid_day_geo[uid])
count += 1
#compute keywords:
try:
text_results = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type, \
body={'query':{'filtered':{'filter':{'terms':{'uid': uid_list}}}}, 'size':MAX_VALUE}, _source=True, fields=['uid', 'keywords_dict'])['hits']['hits']
except:
text_results = {}
for item in text_results:
#print 'keywords item:', item
uid = item['fields']['uid'][0]
uid_keywords_dict = json.loads(item['fields']['keywords_dict'][0])
for keywords in uid_keywords_dict:
try:
iter_results[uid]['keywords'][keywords] += uid_keywords_dict[keywords]
except:
iter_results[uid]['keywords'][keywords] = uid_keywords_dict[keywords]
#get keywords top
for uid in uid_list:
results[uid] = {}
hashtag_dict = iter_results[uid]['hashtag']
results[uid]['hashtag_dict'] = json.dumps(hashtag_dict)
results[uid]['hashtag'] = '&'.join(hashtag_dict.keys())
#sensitive words
sensitive_word_dict = iter_results[uid]['sensitive']
results[uid]['sensitive_dict'] = json.dumps(sensitive_word_dict)
results[uid]['sensitive_string'] = '&'.join(sensitive_word_dict.keys())
sensitive_score = 0
for item in sensitive_word_dict:
k = item
v = sensitive_word_dict[k]
tmp_stage = r_sensitive.hget('sensitive_words', k)
#.........这里部分代码省略.........
示例10: get_flow_information_v2
# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def get_flow_information_v2(uid_list, all_user_keywords_dict):
results = {}
#results = {uid:{'hashtag_dict':{},'hashtag':'', 'keywords_dict':{}, 'keywords_string':'', 'activity_geo':'', 'activity_geo_dict':dict, 'activity_geo_aggs':''}}
iter_results = {} # iter_results = {uid:{'hashtag': hashtag_dict, 'geo':geo_dict, 'keywords':keywords_dict}}
now_ts = time.time()
#run_type
today_sensitive_results = {}
if RUN_TYPE == 1:
now_date_ts = datetime2ts(ts2datetime(now_ts))
else:
now_date_ts = test_ts
for i in range(WEEK,0,-1):
ts = now_date_ts - DAY*i
uid_day_geo = {}
#compute hashtag and geo
hashtag_results = r_cluster_3.hmget('hashtag_'+str(ts), uid_list)
ip_results = r_cluster.hmget('new_ip_'+str(ts), uid_list)
#compute sensitive_words
sensitive_results = r_cluster_3.hmget('sensitive_'+str(ts), uid_list)
count = 0
for uid in uid_list:
#init iter_results[uid]
if uid not in iter_results:
iter_results[uid] = {'hashtag':{}, 'geo':{},'geo_track':[],'keywords':{}, 'sensitive':{}, 'school':{}}
if uid not in today_sensitive_results:
today_sensitive_results[uid] = {}
#compute hashtag
hashtag_item = hashtag_results[count]
if hashtag_item:
uid_hashtag_dict = json.loads(hashtag_item)
else:
uid_hashtag_dict = {}
for hashtag in uid_hashtag_dict:
try:
iter_results[uid]['hashtag'][hashtag] += uid_hashtag_dict[hashtag]
except:
iter_results[uid]['hashtag'][hashtag] = uid_hashtag_dict[hashtag]
#compute sensitive
sensitive_item = sensitive_results[count]
if sensitive_item:
uid_sensitive_dict = json.loads(sensitive_item)
else:
uid_sensitive_dict = {}
for sensitive_word in uid_sensitive_dict:
try:
iter_results[uid]['sensitive'][sensitive_word] += uid_sensitive_dict[sensitive_word]
except:
iter_results[uid]['sensitive'][sensitive_word] = uid_sensitive_dict[sensitive_word]
if ts == now_date_ts - DAY:
try:
today_sensitive_results[uid][sensitive_word] += uid_sensitive_dict[sensitive_word]
except:
today_sensitive_results[uid][sensitive_word] = uid_sensitive_dict[sensitive_word]
#compute geo
uid_day_geo[uid] = {}
ip_item = ip_results[count]
if ip_item:
uid_ip_dict = json.loads(ip_item)
else:
uid_ip_dict = {}
for ip in uid_ip_dict:
ip_count = len(uid_ip_dict[ip].split('&'))
geo, school = ip2city(ip)
if geo:
try:
iter_results[uid]['geo'][geo] += ip_count
except:
iter_results[uid]['geo'][geo] = ip_count
try:
uid_day_geo[uid][geo] += ip_count
except:
uid_day_geo[uid][geo] = ip_count
if school:
try:
iter_results[uid]['school'][school] += ip_count
except:
iter_results[uid]['school'][school] = ip_count
iter_results[uid]['geo_track'].append(uid_day_geo[uid])
count += 1
#get keywords top
for uid in uid_list:
results[uid] = {}
#hashtag
hashtag_dict = iter_results[uid]['hashtag']
results[uid]['hashtag_dict'] = json.dumps(hashtag_dict)
results[uid]['hashtag'] = '&'.join(hashtag_dict.keys())
#sensitive words
sensitive_word_dict = iter_results[uid]['sensitive']
results[uid]['sensitive_dict'] = json.dumps(sensitive_word_dict)
results[uid]['sensitive_string'] = '&'.join(sensitive_word_dict.keys())
sensitive_score = 0
today_sensitive_results_user = today_sensitive_results[uid]
for sensitive_item in today_sensitive_results_user:
k = sensitive_item
v = today_sensitive_results_user[sensitive_item]
tmp_stage = r_sensitive.hget('sensitive_words', k)
if tmp_stage:
sensitive_score += v * sensitive_score_dict[str(tmp_stage)]
results[uid]['sensitive'] = sensitive_score
#.........这里部分代码省略.........
示例11: get_flow_information
# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def get_flow_information(uid_list):
# 每天更新,只计算前一天的数据
result_dict = {}
now_ts = time.time()-3600*24
now_date = ts2datetime(now_ts) # date: 2013-09-01
#now_date = "2013-09-08"
hashtag_results = {}
geo_results = {}
#ts = datetime2ts(now_date)
user_hashtag_result = {}
user_sensitive_hashtag = {}
sensitive_words = {}
user_ip_result = {}
user_sensitive_ip = {}
for i in range(1,8):
ts = ts - 3600*24
date = ts2datetime(ts).replace('-','')
hashtag_results = r_cluster.hmget('hashtag_'+str(date), uid_list)
sensitive_hashtag = r_cluster.hmget('sensitive_hashtag_'+str(date), uid_list)
ip_results = r_cluster.hmget('ip_'+str(date), uid_list)
sensitive_ip = r_cluster.hmget('sensitive_ip_'+str(date), uid_list)
sensitive_results = r_cluster.hmget('sensitive_'+str(date), uid_list)
for j in range(0, len(uid_list)):
uid = uid_list[j]
if sensitive_results[j]:
sensitive_words_results = json.loads(sensitive_results[j])
if sensitive_words.has_key(uid):
sensitive_words[uid].update({date: sensitive_words_results})
else:
sensitive_words[uid] = {date: sensitive_words_results}
if hashtag_results[j]:
hashtag_dict = json.loads(hashtag_results[j])
if user_hashtag_result.has_key(uid):
user_hashtag_result[uid].update({date: hashtag_dict})
else:
user_hashtag_result[uid] = {date: hashtag_dict}
if sensitive_hashtag[j]:
sensitive_hashtag_dict = json.loads(sensitive_hashtag[j])
if user_sensitive_hashtag.has_key(uid):
user_sensitive_hashtag[uid].update({date: sensitive_hashtag_dict})
else:
user_sensitive_hashtag[uid] = {date: sensitive_hashtag_dict}
if ip_results[j]:
ip_dict = json.loads(ip_results[j])
if user_ip_result.has_key(uid):
user_ip_result[uid].update({date: ip_dict})
else:
user_ip_result[uid] = {date: ip_dict}
if sensitive_ip[j]:
sensitive_ip_result = json.loads(sensitive_ip[j])
if user_sensitive_ip.has_key(uid):
user_sensitive_ip[uid].update({date: sensitive_ip_result})
else:
user_sensitive_ip[uid] = {date: sensitive_ip_result}
for uid in uid_list:
hashtag_string = ''
sensitive_hashtag_string = ''
ip_string = ''
ip_all = ""
sensitive_ip_string = ''
hashtag_dict = {}
sensitive_hashtag_dict = {}
ip_dict = {}
sensitive_ip_dict = {}
sensitive_words_string = ''
sensitive_words_dict = {}
if sensitive_words.has_key(uid):
sensitive_words_string = extract_string(sensitive_words[uid])
sensitive_words_dict = json.dumps(sensitive_words[uid])
if user_hashtag_result.has_key(uid):
hashtag_string = extract_string(user_hashtag_result[uid])
hashtag_dict = json.dumps(user_hashtag_result[uid])
if user_sensitive_hashtag.has_key(uid):
sensitive_hashtag_string = extract_string(user_sensitive_hashtag[uid])
sensitive_hashtag_dict = json.dumps(user_sensitive_hashtag[uid])
if user_ip_result.has_key(uid):
ip_string = extract_geo(user_ip_result[uid])
ip_dict = json.dumps(ip_to_geo(user_ip_result[uid]))
ip_all = json.dumps(user_ip_result[uid])
if user_sensitive_ip.has_key(uid):
sensitive_ip_string = extract_geo(user_sensitive_ip[uid])
sensitive_ip_dict = json.dumps(ip_to_geo(user_sensitive_ip[uid]))
result_dict[uid] = {"hashtag_string": hashtag_string, "hashtag_dict": hashtag_dict, \
"sensitive_hashtag_string": sensitive_hashtag_string, "sensitive_hashtag_dict": sensitive_hashtag_dict, \
"geo_activity": ip_dict, "geo_string": ip_string, 'ip': ip_all, \
"sensitive_geo_activity": sensitive_ip_dict, "sensitive_geo_string":sensitive_ip_string, \
'sensitive_words_string': sensitive_words_string, 'sensitive_words_dict': sensitive_words_dict}
return result_dict
示例12: zip
# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
uhlist = zip(uidlist, hashtag_list)
uhtlist = []
for uh in uhlist:
uh = list(uh)
uh.append(ts)
uhtlist.append(uh)
data.extend(uhtlist)
with open("hashtag_0521.txt", "w") as fw:
for d in data:
if d[1] != None:
fw.write("%s\n" % json.dumps(d))
at_data = []
for ts in tss:
ns = "at_" + str(ts)
hashtag_list = R_CLUSTER_FLOW2.hmget(ns, uidlist)
hashtag_list = [json.loads(h) if h else None for h in hashtag_list]
uhlist = zip(uidlist, hashtag_list)
uhtlist = []
for uh in uhlist:
uh = list(uh)
uh.append(ts)
uhtlist.append(uh)
at_data.extend(uhtlist)
with open("at_0521.txt", "w") as fw:
for a in at_data:
if a[1] != None:
fw.write("%s\n" % json.dumps(a))
示例13: get_flow_information_v2
# 需要导入模块: from global_utils import R_CLUSTER_FLOW2 [as 别名]
# 或者: from global_utils.R_CLUSTER_FLOW2 import hmget [as 别名]
def get_flow_information_v2(uid_list, all_user_keywords_dict):
results = {}
#results = {uid:{'hashtag_dict':{},'hashtag':'', 'keywords_dict':{}, 'keywords_string':'', 'activity_geo':'', 'activity_geo_dict':dict, 'activity_geo_aggs':''}}
iter_results = {} # iter_results = {uid:{'hashtag': hashtag_dict, 'geo':geo_dict, 'keywords':keywords_dict}}
now_ts = time.time()
#run_type
if RUN_TYPE == 1:
now_date_ts = datetime2ts(ts2datetime(now_ts))
else:
now_date_ts = test_ts
for i in range(WEEK,0,-1):
ts = now_date_ts - DAY*i
uid_day_geo = {}
#compute hashtag and geo
hashtag_results = r_cluster.hmget('hashtag_'+str(ts), uid_list)
ip_results = r_cluster.hmget('new_ip_'+str(ts), uid_list)
count = 0
for uid in uid_list:
#init iter_results[uid]
if uid not in iter_results:
iter_results[uid] = {'hashtag':{}, 'geo':{},'geo_track':[],'keywords':{}}
#compute hashtag
hashtag_item = hashtag_results[count]
if hashtag_item:
uid_hashtag_dict = json.loads(hashtag_item)
else:
uid_hashtag_dict = {}
for hashtag in uid_hashtag_dict:
try:
iter_results[uid]['hashtag'][hashtag] += uid_hashtag_dict[hashtag]
except:
iter_results[uid]['hashtag'][hashtag] = uid_hashtag_dict[hashtag]
#compute geo
uid_day_geo[uid] = {}
ip_item = ip_results[count]
if ip_item:
uid_ip_dict = json.loads(ip_item)
else:
uid_ip_dict = {}
for ip in uid_ip_dict:
ip_count = len(uid_ip_dict[ip].split('&'))
geo = ip2city(ip)
if geo:
try:
iter_results[uid]['geo'][geo] += ip_count
except:
iter_results[uid]['geo'][geo] = ip_count
try:
uid_day_geo[uid][geo] += ip_count
except:
uid_day_geo[uid][geo] = ip_count
iter_results[uid]['geo_track'].append(uid_day_geo[uid])
count += 1
#get keywords top
for uid in uid_list:
results[uid] = {}
hashtag_dict = iter_results[uid]['hashtag']
results[uid]['hashtag_dict'] = json.dumps(hashtag_dict)
results[uid]['hashtag'] = '&'.join(hashtag_dict.keys())
geo_dict = iter_results[uid]['geo']
geo_track_list = iter_results[uid]['geo_track']
results[uid]['activity_geo_dict'] = json.dumps(geo_track_list)
geo_dict_keys = geo_dict.keys()
results[uid]['activity_geo'] = '&'.join(['&'.join(item.split('\t')) for item in geo_dict_keys])
try:
results[uid]['activity_geo_aggs'] = '&'.join([item.split('\t')[-1] for item in geo_dict_keys])
except:
results[uid]['activity_geo_aggs'] = ''
keywords_dict = all_user_keywords_dict[uid]
keywords_top50 = sorted(keywords_dict.items(), key=lambda x:x[1], reverse=True)[:50]
keywords_top50_string = '&'.join([keyword_item[0] for keyword_item in keywords_top50])
results[uid]['keywords'] = json.dumps(keywords_top50)
results[uid]['keywords_string'] = keywords_top50_string
return results