本文整理汇总了Python中tweepy.API.statuses_lookup方法的典型用法代码示例。如果您正苦于以下问题:Python API.statuses_lookup方法的具体用法?Python API.statuses_lookup怎么用?Python API.statuses_lookup使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tweepy.API
的用法示例。
在下文中一共展示了API.statuses_lookup方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: len
# 需要导入模块: from tweepy import API [as 别名]
# 或者: from tweepy.API import statuses_lookup [as 别名]
n = last_batch
if n != 0:
max_index = cut_off_rt_index[(retweets_per_batch * n) - 1]
else:
max_index = -1
while n < len(id_lists):
m = 0
start_index = max_index + 1
while m < retweets_per_batch / 100:
tweets = api.statuses_lookup(id_lists[n].values.tolist())
for tweet in tweets:
index = tweettable[(tweettable['id'] == tweet.id)].index.values[0]
# print(index)
if hasattr(tweet, 'retweeted_status'):
tweettable.loc[index, 'text'] = (
'RT @' + tweet.retweeted_status.user.screen_name + ': ' +
tweet.retweeted_status.text)
untruncated_number += 1
# find out whether this tweet has the highest index so far
# (order was not preserved in the process),
示例2: __init__
# 需要导入模块: from tweepy import API [as 别名]
# 或者: from tweepy.API import statuses_lookup [as 别名]
class RetweetUpdater:
# perform authentication
def __init__(self):
auth = OAuthHandler(TwitterAuth.consumer_key, TwitterAuth.consumer_secret)
auth.set_access_token(TwitterAuth.access_token, TwitterAuth.access_token_secret)
self.api = API(auth)
def statusLookup(self, tweetIDs, collection):
for attempt in range(10):
try:
tweets = self.api.statuses_lookup(tweetIDs)
# extract features of each tweet
for tweet in tweets:
tweetID = tweet.id
doc = collection.find_one({"id": tweetID})
doc['retweet_count'] = tweet.retweet_count
collection.save(doc)
break
except TweepError:
print "tweepy error - try again"
def updateCount(self, outputDatabaseName, collectionName):
# open file containing list of tweet-IDs from 2013
try:
print "Connecting to database"
conn=pymongo.MongoClient()
outputDB = conn[outputDatabaseName]
collection = outputDB[collectionName]
apiCalls = 0
tweetCount = 0
tweetIDs = []
docs = []
print "Start Twitter API calls"
# Iterate over all documents
for doc in collection.find(no_cursor_timeout=True):
tweetID = doc['id']
doc['retweet_count'] = -1
collection.save(doc)
tweetIDs.append(tweetID)
docs.append(doc)
tweetCount += 1
# when 100 tweet IDs have been collected, make API call
if len(tweetIDs) == 100:
self.statusLookup(tweetIDs, collection)
tweetIDs = []
docs = []
apiCalls += 1
print("Progress: "+str(tweetCount))
# after 180 API calls, wait for 15 minutes (Twitter rate limit)
if apiCalls == 180:
print("Sleep for 15 minutes - "+ time.strftime('%X') + "\n")
time.sleep(15*60)
apiCalls = 0
# Lookup remaining tweets
if len(tweetIDs) != 0:
self.statusLookup(tweetIDs, collection)
print("Progress: "+str(tweetCount))
# Remove tweets that were deleted / accounts supspended
collection.delete_many({'retweet_count': -1})
except pymongo.errors.ConnectionFailure, e:
print "Could not connect to MongoDB: %s" % e
示例3: __init__
# 需要导入模块: from tweepy import API [as 别名]
# 或者: from tweepy.API import statuses_lookup [as 别名]
class TwitterCrawler:
# perform authentication
def __init__(self):
auth = OAuthHandler(TwitterAuth.consumer_key, TwitterAuth.consumer_secret)
auth.set_access_token(TwitterAuth.access_token, TwitterAuth.access_token_secret)
self.api = API(auth)
# search for given query
def querySearch(self, query):
tweets = self.api.search(q=query, count = 50, result_type="recent")
# print features of each tweet
for tweet in tweets:
self.printFeatures(tweet)
def statusLookup(self, tweetIDs, collection):
tweets = self.api.statuses_lookup(tweetIDs)
# extract features of each tweet
for tweet in tweets:
collection.insert(json.loads(tweet.json))
# print features of a given tweet
def printFeatures(self, tweet):
# general tweet information
print("text: " + tweet.text)
print("retweeted: " + str(tweet.retweet_count))
print("favorited: " + str(tweet.favorite_count))
print("reply: " + str(tweet.in_reply_to_status_id_str != None))
print("created_at: " + str(tweet.created_at))
print("language: " + tweet.lang)
# if tweet has retweeted_status attribute, tweet is a retweet
if hasattr(tweet, 'retweeted_status'):
print("is a retweet: True")
else:
print("is a retweet: False")
# entities information (URLs, hashtags, media, etc.)
print("hashtag: " + str(tweet.entities.get('hashtags') != []))
print("media: " + str(tweet.entities.get('media') != []))
print("user mentions: " + str(tweet.entities.get('user_mentions') != []))
print("urls: " + str(tweet.entities.get('urls') != []))
# user details
print("followers: "+str(tweet.user.followers_count))
print("following: "+str(tweet.user.friends_count))
print("list appearances: "+str(tweet.user.listed_count))
print("number of tweets: "+str(tweet.user.statuses_count))
print("verified: "+str(tweet.user.verified)+"\n")
def crawl(self, inputFilename, outputDatabaseName, collectionName):
# open file containing list of tweet-IDs from 2013
try:
print "Connecting to database"
conn=pymongo.MongoClient()
outputDB = conn[outputDatabaseName]
collection = outputDB[collectionName]
print "Start Twitter API calls"
with open(inputFilename) as f:
apiCalls = 0
tweetCount = 0
tweetIDs = []
for line in f:
# extract and collect tweet ID
parts = line.split()
tweetID = parts[0]
tweetIDs.append(tweetID)
tweetCount += 1
# when 100 tweet IDs have been collected, make API call
if len(tweetIDs) == 100:
self.statusLookup(tweetIDs, collection)
tweetIDs = []
apiCalls += 1
print("Progress: "+str(tweetCount))
# after 180 API calls, wait for 15 minutes (Twitter rate limit)
if apiCalls == 180:
print("Sleep for 15 minutes - "+ time.strftime('%X') + "\n")
time.sleep(15*60)
apiCalls = 0
except pymongo.errors.ConnectionFailure, e:
print "Could not connect to MongoDB: %s" % e
示例4: open
# 需要导入模块: from tweepy import API [as 别名]
# 或者: from tweepy.API import statuses_lookup [as 别名]
tweet_writer = io.DatumWriter(tweet_schema)
avro_writer = datafile.DataFileWriter(
open("../data/tweets.avro", 'wb'),
tweet_writer,
tweet_schema)
if avro_writer:
logger.info("Avro schema loaded.")
# API limit on GET statuses/lookup is 100 per call.
# Rate limit is currently 180 per fifteen minutes.
# That means a delay of about five seconds per call.
# I make it six for safety.
for chunk,index in chunks(tweets,100):
try:
tweet_objects = api.statuses_lookup(chunk)
except tweepy.TweepError as e:
logger.error("Error from tweepy: " + str(e))
if tweet_objects:
logger.info("Chunk %d (%d/%d,%f%%): %d fully hydrated tweets loaded." % (index/100,index/100,(len(tweets)/100)+1, float(index)/(len(tweets)/100),len(tweet_objects)))
for tweet in tweet_objects:
#logger.info("Processing tweet %d" % tweet._json["id"])
try:
avro_ = process_tweet(tweet._json)
avro_writer.append(avro_)
except:
logger.error("Error processing tweet: %d" % tweet._json["id"])
with open("error.list","ab") as f:
f.write(str(tweet._json["id"])+'\n')