本文整理汇总了Python中global_utils.R_CLUSTER_FLOW2类的典型用法代码示例。如果您正苦于以下问题:Python R_CLUSTER_FLOW2类的具体用法?Python R_CLUSTER_FLOW2怎么用?Python R_CLUSTER_FLOW2使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了R_CLUSTER_FLOW2类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: cal_text_sensitive
def cal_text_sensitive(item):
text = item['text']
uid = item['uid']
timestamp = item['timestamp']
date = ts2datetime(timestamp)
ts = datetime2ts(date)
if isinstance(text, str):
text = text.decode('utf-8', 'ignore')
sensitive_result = [word for word in SENSITIVE_WORD if word in text]
if sensitive_result:
sensitive_dict = dict()
for word in sensitive_result:
try:
sensitive_dict[word] += 1
except:
sensitive_dict[word] = 1
try:
sensitive_count_string = r_cluster.hget('sensitive_'+str(ts), str(uid))
sensitive_count_dict = json.loads(sensitive_count_string)
for word in sensitive_dict:
count = sensitive_dict[word]
try:
sensitive_count_dict[word] += count
except:
sensitive_count_dict[word] = count
r_cluster.hset('sensitive_'+str(ts), str(uid), json.dumps(sensitive_count_dict))
except:
r_cluster.hset('sensitive_'+str(ts), str(uid), json.dumps(sensitive_dict))
示例2: cal_text_work
def cal_text_work(item):
uid = item['uid']
timestamp = item['timestamp']
date = ts2datetime(timestamp)
ts = datetime2ts(date)
text = item['text']
if isinstance(text, str):
text = text.decode('utf-8', 'ignore')
RE = re.compile(u'#([a-zA-Z-_⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]+)#', re.UNICODE)
hashtag_list = RE.findall(text)
if hashtag_list:
# there all use unicode·
hashtag_dict = dict()
for hashtag in hashtag_list:
try:
hashtag_dict[hashtag] += 1
except:
hashtag_dict[hashtag] = 1
try:
hashtag_count_string = r_cluster.hget('hashtag_'+str(ts), str(uid))
hashtag_count_dict = json.loads(hashtag_count_string)
for hashtag in hashtag_dict:
count = hashtag_dict[hashtag]
try:
hashtag_count_dict[hashtag] += count
except:
hashtag_count_dict[hashtag] = count
r_cluster.hset('hashtag_'+str(ts), str(uid), json.dumps(hashtag_count_dict))
except:
r_cluster.hset('hashtag_'+str(ts), str(uid), json.dumps(hashtag_dict))
示例3: save_city
def save_city(uid, ip, timestamp, sensitive):
ts = ts2datetime(timestamp).replace('-','')
key = str(uid)
try:
if sensitive:
ip_count_string = r_cluster.hget('sensitive_ip_'+str(ts), str(uid))
else:
ip_count_string = r_cluster.hget('ip_'+str(ts), str(uid))
ip_count_dict = json.loads(ip_count_string)
try:
ip_count_dict[str(ip)] += 1
except:
ip_count_dict[str(ip)] = 1
if sensitive:
r_cluster.hset('sensitive_ip_'+str(ts), str(uid), json.dumps(ip_count_dict))
else:
r_cluster.hset('ip_'+str(ts), str(uid), json.dumps(ip_count_dict))
except:
if sensitive:
r_cluster.hset('sensitive_ip_'+str(ts), str(uid), json.dumps({str(ip):1}))
else:
r_cluster.hset('ip_'+str(ts), str(uid), json.dumps({str(ip):1}))
示例4: cal_sensitive_words_work
def cal_sensitive_words_work(item, sw_list):
timestamp = item['timestamp']
uid = item['uid']
timestamp = ts2datetime(timestamp).replace('-','')
ts = timestamp
map = {}
for w in sw_list:
word = "".join([chr(x) for x in w])
word = word.decode('utf-8')
if not map.__contains__(word):
map[word] = 1
else:
map[word] += 1
try:
sensitive_count_string = r_cluster.hget('sensitive_'+str(ts), str(uid))
sensitive_count_dict = json.loads(sensitive_count_string)
for word in map:
count = map[word]
if sensitive_count_dict.__contains__(word):
sensitive_count_dict[word] += count
else:
sensitive_count_dict[word] = count
r_cluster.hset('sensitive_'+str(ts), str(uid), json.dumps(sensitive_count_dict))
except:
r_cluster.hset('sensitive_'+str(ts), str(uid), json.dumps(map))
示例5: cal_class_ratio
def cal_class_ratio():
ratio_results = {}
date = '2013-09-07'
ts = datetime2ts(date)
scan_count = 0
scan_cursor = 0
all_count = 0
while 1:
if scan_count == 1000000:
break
results = r_cluster.hscan('activity_'+str(ts), scan_cursor, count=1000)
scan_cursor = results[0]
scan_count += 1000
for uid in results[1]:
activity_dict_string = r_cluster.hget('activity_'+str(ts), uid)
activity_dict = json.loads(activity_dict_string)
weibo_count = 0
for time_seg in activity_dict:
weibo_count += int(activity_dict[time_seg])
if weibo_count >= 6:
indic_3 = '1'
else:
indic_3 = '0'
retweet_results = r.hgetall('retweet_'+str(uid))
retweet_count = len(retweet_results)
if retweet_count >= 8:
indic_1 = '1'
else:
indic_1 = '0'
be_retweet_results = r.hgetall('be_retweet_'+str(uid))
be_retweet_count = len(be_retweet_results)
#print 'be_retweet_count:', be_retweet_count
if be_retweet_count >= 9:
indic_2 = '1'
else:
indic_2 = '0'
#print 'indic_2:', indic_2
key = indic_1 + indic_2 + indic_3
try:
ratio_results[key] += 1
except:
ratio_results[key] = 1
# write eight type users
'''
if key=='001':
writer1.writerow([uid, retweet_count, be_retweet_count, weibo_count])
elif key=='111':
writer2.writerow([uid, retweet_count, be_retweet_count, weibo_count])
elif key=='101':
writer3.writerow([uid, retweet_count, be_retweet_count, weibo_count])
elif key=='011':
writer4.writerow([uid, retweet_count, be_retweet_count, weibo_count])
elif key=='110':
writer5.writerow([uid, retweet_count, be_retweet_count, weibo_count])
if key=='010':
writer6.writerow([uid, retweet_count, be_retweet_count, weibo_count])
'''
print 'ratio_results:', ratio_results
示例6: save_activity
def save_activity(uid, ts, time_segment):
key = str(ts)
try:
activity_count_dict = r_cluster.hget('activity_' + key, str(uid))
activity_count_dict = json.loads(activity_count_dict)
try:
activity_count_dict[str(time_segment)] += 1
except:
activity_count_dict[str(time_segment)] = 1
r_cluster.hset('activity_' + key, str(uid), json.dumps(activity_count_dict))
except:
r_cluster.hset('activity_' + key, str(uid), json.dumps({str(time_segment): 1}))
示例7: save_at
def save_at(uid, at_uid, timestamp):
date = ts2datetime(timestamp)
ts = datetime2ts(date)
key = str(uid)
try:
ruid_count_string = r_cluster.hget('at_'+str(ts), str(uid))
ruid_count_dict = json.loads(ruid_count_string)
try:
ruid_count_dict[str(at_uid)] += 1
except:
ruid_count_dict[str(at_uid)] = 1
r_cluster.hset('at_'+str(ts), str(uid), json.dumps(ruid_count_dict))
except:
r_cluster.hset('at_'+str(ts), str(uid), json.dumps({str(at_uid):1}))
示例8: save_city
def save_city(uid, ip, timestamp):
date = ts2datetime(timestamp)
ts = datetime2ts(date)
key = str(uid)
try:
ip_count_string = r_cluster.hget('ip_'+str(ts), str(uid))
ip_count_dict = json.loads(ip_count_string)
try:
ip_count_dict[str(ip)] += 1
except:
ip_count_dict[str(ip)] = 1
r_cluster.hset('ip_'+str(ts), str(uid), json.dumps(ip_count_dict))
except:
r_cluster.hset('ip_'+str(ts), str(uid), json.dumps({str(ip):1}))
示例9: save_city_timestamp
def save_city_timestamp(uid, ip, timestamp):
date = ts2datetime(timestamp)
ts = datetime2ts(date)
try:
ip_timestamp_string = r_cluster.hget('new_ip_'+str(ts), str(uid))
ip_timestamp_string_dict = json.loads(ip_timestamp_string)
try:
add_string = '&'+str(timestamp)
ip_timestamp_string_dict[str(ip)] += add_string
except:
ip_timestamp_string_dict[str(ip)] = str(timestamp)
r_cluster.hset('new_ip_'+str(ts), str(uid), json.dumps(ip_timestamp_string_dict))
except:
r_cluster.hset('new_ip_'+str(ts), str(uid), json.dumps({str(ip): str(timestamp)}))
示例10: get_attr_geo_track
def get_attr_geo_track(uid_list):
date_results = [] # results = {'2013-09-01':[(geo1, count1), (geo2, track2)], '2013-09-02'...} 7day
now_ts = time.time()
now_date = ts2datetime(now_ts)
#test
now_date = '2013-09-08'
ts = datetime2ts(now_date)
for i in range(7, 0, -1):
timestamp = ts - i*24*3600
#print 'timestamp:', ts2datetime(timestamp)
ip_dict = dict()
results = r_cluster.hmget('ip_'+str(timestamp), uid_list)
#print 'results:',results
for item in results:
if item:
item_dict = json.loads(item)
#print 'item_dict:', item_dict
for ip_item in item_dict:
try:
ip_dict[ip_item] += item_dict[ip_item]
except:
ip_dict[ip_item] = item_dict[ip_item]
geo_dict = ip2geo(ip_dict)
sort_geo_dict = sorted(geo_dict.items(), key=lambda x:x[1], reverse=True)
date_key = ts2datetime(timestamp)
date_results.append([date_key, sort_geo_dict[:2]])
#print 'results:', date_results
return {'geo_track': json.dumps(date_results)}
示例11: update_day_hashtag
def update_day_hashtag(uid_list):
results = {}
all_results = {}
now_ts = time.time()
#run_type
if RUN_TYPE == 1:
now_date_ts = datetime2ts(ts2datetime(now_ts))
else:
now_date_ts = test_ts
for i in range(WEEK,0,-1):
ts = now_date_ts - DAY*i
count = 0
hashtag_results = r_cluster.hmget('hashtag_'+str(ts), uid_list)
for uid in uid_list:
if uid not in results:
results[uid] = {}
hashtag_item = hashtag_results[count]
if hashtag_item:
hashtag_dict = json.loads(hashtag_item)
else:
hashtag_dict = {}
for hashtag in hashtag_dict:
try:
results[uid][hashtag] += 1
except:
results[uid][hashtag] = 1
for uid in uid_list:
user_hashtag_dict = results[uid]
hashtag_string = '&'.join(user_hashtag_dict.keys())
all_results[uid] = {'hashtag': hashtag_string, 'hashtag_dict':user_hashtag_dict}
return all_results
示例12: filter_activity
def filter_activity(user_set):
results = []
now_date = ts2datetime(time.time())
# test
now_date = '2013-09-08'
ts = datetime2ts(now_date) - 24*3600
date = ts2datetime(ts)
#print 'date:', date
timestamp = datetime2ts(date)
for user in user_set:
over_count = 0
for i in range(0,7):
ts = timestamp - 3600*24*i
result = r_cluster.hget('activity_'+str(ts), str(user))
if result:
items_dict = json.loads(result)
for item in items_dict:
weibo_count = items_dict[item]
if weibo_count > activity_threshold:
over_count += 1
if over_count == 0:
results.append(user)
else:
writer.writerow([user, 'activity'])
print 'after filter activity:', len(results)
return results
示例13: filter_activity
def filter_activity(user_set):
results = []
#run_type
if RUN_TYPE == 1:
now_date = ts2datetime(time.time())
else:
now_date = RUN_TEST_TIME
ts = datetime2ts(now_date) - DAY
date = ts2datetime(ts)
timestamp = datetime2ts(date)
for user in user_set:
over_count = 0
for i in range(0,7):
ts = timestamp - DAY*i
result = r_cluster.hget('activity_'+str(ts), str(user))
if result:
items_dict = json.loads(result)
for item in items_dict:
weibo_count = items_dict[item]
if weibo_count > activity_threshold:
over_count += 1
if over_count == 0:
results.append(user)
else:
writer.writerow([user, 'activity'])
return results
示例14: filter_activity
def filter_activity(user_set):
results = []
now_date = ts2datetime(time.time())
now_date = '2013-09-08'
ts = datetime2ts(now_date) - 24*3600
date = ts2datetime(ts)
timestamp = datetime2ts(date)
ts = ts.replace('-','')
for user in user_set:
over_count = 0
for i in range(0,7):
ts = timestamp - 3600*24*i
result = r_cluster.hget('activity_'+str(ts), str(user))
if result:
item_dict = json.loads(result)
sorted_dict = sorted(item_dict.iteritems(), key=lambda asd:asd[1], reverse=True)
if sorted_dict[0][1] > activity_threshold:
over_count = 1
if over_count == 0:
results.append(user)
else:
writer.writerow([user, 'activity'])
print 'after filter activity: ', len(results)
return results
示例15: cal_hashtag_work
def cal_hashtag_work(item, sensitive):
text = item['text']
uid = item['uid']
timestamp = item['timestamp']
ts = ts2datetime(timestamp).replace('-','')
if isinstance(text, str):
text = text.decode('utf-8', 'ignore')
RE = re.compile(u'#([a-zA-Z-_⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]+)#', re.UNICODE)
hashtag_list = RE.findall(text)
if hashtag_list:
hashtag_dict = {}
for hashtag in hashtag_list:
try:
hashtag_dict[hashtag] += 1
except:
hashtag_dict[hashtag] = 1
try:
if sensitive:
hashtag_count_string = r_cluster.hget('sensitive_hashtag_'+str(ts), str(uid))
else:
hashtag_count_string = r_cluster.hget('hashtag_'+str(ts), str(uid))
hashtag_count_dict = json.loads(hashtag_count_string)
for hashtag in hashtag_dict:
count = hashtag_dict[hashtag]
try:
hashtag_count_dict[hashtag] += count
except:
hashtag_count_dict[hashtag] = count
if sensitive:
r_cluster.hset('sensitive_hashtag_'+str(ts), str(uid), json.dumps(hashtag_count_dict))
else:
r_cluster.hset('hashtag_'+str(ts), str(uid), json.dumps(hashtag_count_dict))
except:
if sensitive:
r_cluster.hset('sensitive_hashtag_'+str(ts), str(uid), json.dumps(hashtag_dict))
else:
r_cluster.hset('hashtag_'+str(ts), str(uid), json.dumps(hashtag_dict))