本文整理汇总了Python中library.file_io.FileIO.getFileByDay方法的典型用法代码示例。如果您正苦于以下问题:Python FileIO.getFileByDay方法的具体用法?Python FileIO.getFileByDay怎么用?Python FileIO.getFileByDay使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类library.file_io.FileIO
的用法示例。
在下文中一共展示了FileIO.getFileByDay方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: writeTweetsForDay
# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import getFileByDay [as 别名]
def writeTweetsForDay(currentDay):
fileName = houston_data_folder+FileIO.getFileByDay(currentDay)
for tweet in tweets.find({'ca': {'$gt':currentDay, '$lt': currentDay+timedelta(seconds=86399)}}, fields=['ca', 'tx', 'uid']):
screenName = GenerateHoustonTweetsData.getScreenName(tweet['uid'])
if screenName!=None:
data = {'id': tweet['_id'], 'text': tweet['tx'], 'created_at':getStringRepresentationForTweetTimestamp(tweet['ca']), 'user':{'screen_name': GenerateHoustonTweetsData.getScreenName(tweet['uid'])}}
FileIO.writeToFileAsJson(data, fileName)
os.system('gzip %s'%fileName)
示例2: writeClusters
# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import getFileByDay [as 别名]
def writeClusters(hdStreamClusteringObject, currentMessageTime):
print '\n\n\nEntering:', currentMessageTime, len(hdStreamClusteringObject.phraseTextAndDimensionMap), len(hdStreamClusteringObject.phraseTextToPhraseObjectMap), len(hdStreamClusteringObject.clusters)
iterationData = {'time_stamp': getStringRepresentationForTweetTimestamp(currentMessageTime),
'clusters': map(TwitterCrowdsSpecificMethods.getClusterInMapFormat, [cluster for cluster, _ in sorted(StreamCluster.iterateByAttribute(hdStreamClusteringObject.clusters.values(), 'length'), key=itemgetter(1), reverse=True)]),
'settings': Settings.getSerialzedObject(hdStreamClusteringObject.stream_settings)
}
FileIO.writeToFileAsJson(iterationData, hdStreamClusteringObject.stream_settings['lsh_clusters_folder']+FileIO.getFileByDay(currentMessageTime))
print 'Leaving: ', currentMessageTime, len(hdStreamClusteringObject.phraseTextAndDimensionMap), len(hdStreamClusteringObject.phraseTextToPhraseObjectMap), len(hdStreamClusteringObject.clusters)
示例3: iterateHoustonClusters
# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import getFileByDay [as 别名]
def iterateHoustonClusters(startingDay=datetime(2010,11,1), endingDay=datetime(2010,11,19)):
while startingDay<=endingDay:
for line in FileIO.iterateJsonFromFile(houston_twitter_stream_settings.lsh_clusters_folder+FileIO.getFileByDay(startingDay)):
currentTime = getDateTimeObjectFromTweetTimestamp(line['time_stamp'])
for clusterMap in line['clusters']: yield (currentTime, TwitterCrowdsSpecificMethods.getClusterFromMapFormat(clusterMap))
startingDay+=timedelta(days=1)
示例4: iterateTweetsFromHouston
# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import getFileByDay [as 别名]
def iterateTweetsFromHouston(houstonDataStartTime=datetime(2010,11,1), houstonDataEndTime=datetime(2011,5,30)):
currentTime = houstonDataStartTime
while currentTime <= houstonDataEndTime:
for tweet in TwitterIterators.iterateFromFile(houston_twitter_stream_settings.twitter_users_tweets_folder+'%s.gz'%FileIO.getFileByDay(currentTime)): yield tweet
currentTime+=timedelta(days=1)
示例5: iterateTweetsFromExperts
# 需要导入模块: from library.file_io import FileIO [as 别名]
# 或者: from library.file_io.FileIO import getFileByDay [as 别名]
def iterateTweetsFromExperts(expertsDataStartTime=datetime(2011,3,19), expertsDataEndTime=datetime(2011,4,12)):
experts = getExperts()
currentTime = expertsDataStartTime
while currentTime <= expertsDataEndTime:
for tweet in TwitterIterators.iterateFromFile(experts_twitter_stream_settings.twitter_users_tweets_folder+'%s.gz'%FileIO.getFileByDay(currentTime)):
if tweet['user']['id_str'] in experts:
if getDateTimeObjectFromTweetTimestamp(tweet['created_at']) <= expertsDataEndTime : yield tweet
else: return
currentTime+=timedelta(days=1)