本文整理汇总了Python中DBManager.DBManager.insert_tweet_objects方法的典型用法代码示例。如果您正苦于以下问题:Python DBManager.insert_tweet_objects方法的具体用法?Python DBManager.insert_tweet_objects怎么用?Python DBManager.insert_tweet_objects使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类DBManager.DBManager
的用法示例。
在下文中一共展示了DBManager.insert_tweet_objects方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from DBManager import DBManager [as 别名]
# 或者: from DBManager.DBManager import insert_tweet_objects [as 别名]
class ImportManager:
"""
This class imports handles importing tweets to the database from various sources such as text files
"""
__file_path = None
__components_in_a_line = None
def __init__(self):
"""
Constructor method
:param file_path_to_import: String a txt file path containing tweet ids
:return: ImportManager instance
"""
self.__db_manager = DBManager()
self.__helper = GeneralHelpers()
self.__preprocess_manager = PreprocessManager()
self.__tweets_classes_dictionary = {}
# magic numbers
self.__components_in_a_line = 2
self.__max_num_of_tweets_at_once = 100
def run(self, file_path_to_import):
"""
Runs all necessary methods to import tweets for a year
:return: void
"""
self.__file_path = file_path_to_import
# getting tweets with their classes
tweets_with_classes = self._parse_tweets_from_file()
self.__tweets_with_classes = tweets_with_classes
# finding duplicates
unique_tweets, duplicate_tweets = self._find_duplicates(tweets_with_classes)
print("Found "+str(len(duplicate_tweets))+" duplicate tweets.")
self.__helper.pretty_print_list(duplicate_tweets, "Duplicate tweets:")
print("Continuing with unique ones.")
# getting tweet ids from [tweet_id, class]
unique_tweets_ids = self._get_tweets_ids(unique_tweets)
# retrieving tweets from Twitter
all_tweet_information = self._retrieve_tweets_from_twitter(unique_tweets_ids)
# some tweets may not be found on Twitter
not_found_tweets_on_twitter = self._find_not_found_tweets_on_twitter(all_tweet_information)
# creating db model objects
all_tweet_objects = self._create_tweet_objects(all_tweet_information)
# insert to database
success_count, not_imported_tweets = self.__db_manager.insert_tweet_objects(all_tweet_objects)
print("\n")
print('-'*10)
print('Total Math:')
print('Unique tweets:'+str(len(unique_tweets)))
print('Tweets not found:'+str(len(not_found_tweets_on_twitter)))
print('Tweets not inserted:'+str(len(not_imported_tweets)))
print('Tweets OK:'+str(success_count))
print(str(len(unique_tweets))+"=="+str(len(not_found_tweets_on_twitter)+len(not_imported_tweets)+success_count))
def _parse_tweets_from_file(self):
"""
Parses tweet ids and classes from txt file
:return: list, holds [[124214124, positive],...]
"""
characters_to_remove = ["'", '"', '\n', ' ']
with open(self.__file_path, 'r') as tweets_ids_file:
tweets_with_classes = []
self.tweets_classes_dictionary = {}
# Iterating over lines in txt file
for line in tweets_ids_file:
line_components = line.split(",")
# if there are two components in a line. E.g. "121412412412", "positive"
if self.__components_in_a_line == len(line_components):
# iterating over components
for index, component in enumerate(line_components):
# removing unnecessary characters
line_components[index] = self.__preprocess_manager.remove_characters_in_string(component,
characters_to_remove)
tweets_with_classes.append(line_components)
self.__tweets_classes_dictionary.update({line_components[0]:line_components[1]})
return tweets_with_classes
def _find_duplicates(self, tweets_with_classes):
"""
#.........这里部分代码省略.........