本文整理汇总了Python中utilities.Utilities.clean_tweet方法的典型用法代码示例。如果您正苦于以下问题:Python Utilities.clean_tweet方法的具体用法?Python Utilities.clean_tweet怎么用?Python Utilities.clean_tweet使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类utilities.Utilities
的用法示例。
在下文中一共展示了Utilities.clean_tweet方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: on_data
# 需要导入模块: from utilities import Utilities [as 别名]
# 或者: from utilities.Utilities import clean_tweet [as 别名]
def on_data(self,data):
try:
data = json.loads(data)
newd = {}
# Get Tweet
tweet = Utilities.clean_tweet(data['text'])
for key in self.recent_tweets:
#print Utilities.similarity(key,tweet)
if Utilities.similarity(key,tweet) > 70:
return
'''
if tweet in self.recent_tweets:
return
else:
'''
if len(self.recent_tweets) > 50:
self.recent_tweets.popitem(last=False)
self.recent_tweets[tweet] = True
#print tweet
# Get Redirected url
try:
url_name = Utilities.get_redirected_url(str(data['entities']['urls'][0]['expanded_url']))
except:
return
raise BaseException("Url for tweet did not exist")
# Get shortened url for key --> Upto 5th '/' or entire address (whichever is shorter)
url_name = Utilities.get_shortened_url(url_name).lower()
#Get timestamp
timestamp = str(data['created_at'])
# Verify authenticity of website by checking if it has the word coupon
# If it does , assume it is not a vendor site. Maybe blog, maybe coupon site
try:
Utilities.check_url_validity(url_name)
except:
return
raise BaseException("Url was not a valid site")
with open("x.txt","a") as f:
f.write(tweet + '\n')
f.write("--------------------" + '\n')
# Code to extract important information from this tweet
#self.tweets += 1
#print tweet
#print "Tweet Number : " + str(self.tweets)
e = Extraction()
code,date = e.extract_all(tweet)
if not code:
#print " --------------- "
return
raise BaseException("Did not have coupon code information")
if not date :
date = 183600
else :
self.tweets_with_dates += 1
self.total_expiry_time += date
self.exp_time.append(date/3600)
print self.tweets_with_dates, int(numpy.median(self.exp_time))
#print date
#print self.tweets_with_dates
print tweet
#print " ----------------------------------- "
#print "Tweet : ",
#print "Url : ",
#print url_name
#print "Date : "
#print "Coupons : " + str(self.tweets_with_coupons)
#print "Dates : " + str(self.tweets_with_dates)
#print "Total Expiry Time :" + str(self.total_expiry_time/3600) + "hours"
#print "Avg Expiry Time :" + str((self.total_expiry_time/(self.tweets_with_dates+1))/3600) + "hours"
print '--------------------------------------'
#print "CODE : " + code
key = url_name + ':::' + code
#print "KEY : " + key
#print "Tweet : "
#print tweet
#print "Url : ",
#print url_name
#print " ----------------------------------- "
ds = DataStore()
#print url_name,code,date
#get outer url - url uptil 3 '/'s . eg - http://www.etsy.com/
outer_url = "parent::"+Utilities.get_shortened_url(url_name,3)
ds.insert(key,url_name,code,tweet,date,outer_url)
#.........这里部分代码省略.........