本文整理汇总了Python中DataModel.addProcessedFile方法的典型用法代码示例。如果您正苦于以下问题:Python DataModel.addProcessedFile方法的具体用法?Python DataModel.addProcessedFile怎么用?Python DataModel.addProcessedFile使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类DataModel
的用法示例。
在下文中一共展示了DataModel.addProcessedFile方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: process_files
# 需要导入模块: import DataModel [as 别名]
# 或者: from DataModel import addProcessedFile [as 别名]
def process_files(global_config, attr_definitions, input_dir, recursive=True):
start_time = datetime.datetime.now()
# Initialize the database session connection
db_name = global_config["db_name"] + global_config["this_season"]
session = DbSession.open_db_session(db_name)
some_files_processed = False
# read the ignore file list config each time through the loop. Any files
# in the ignore list will be skipped
ignore_filelist = read_ignore_filelist_cfg(input_dir + "IgnoreFiles.txt")
# The following regular expression will select all files that conform to
# the file naming format Team*.txt. Build a list of all datafiles that match
# the naming format within the directory passed in via command line
# arguments.
file_regex = re.compile("Team[a-zA-Z0-9_]+.txt")
files = get_files(global_config, session, db_name, input_dir, file_regex, recursive)
if len(files) > 0:
log_msg = "files retrieved, elapsed time - %s" % (str(datetime.datetime.now() - start_time))
print log_msg
global_config["logger"].debug("%s - %s" % (process_files.__name__, log_msg))
global_config["logger"].debug("%s - %d Files to be processed" % (process_files.__name__, len(files)))
# Process data files
for data_filename in files:
# If the file is on the ignore list (quarantined), then skip it
if data_filename.split("/")[-1] in ignore_filelist:
global_config["logger"].debug("%s - Ignoring file: %s" % (process_files.__name__, data_filename))
continue
# Make sure that the data file has not already been processed. We have seen cases
# where the data file gets inserted into the list of files to be processed more than
# once.
file_processed = isFileProcessed(global_config, session, db_name, data_filename)
if not file_processed:
try:
global_config["logger"].debug("%s - Processing file: %s" % (process_files.__name__, data_filename))
process_file(global_config, session, attr_definitions, data_filename)
except Exception, e:
global_config["logger"].debug(
"%s - Error processing file: %s" % (process_files.__name__, data_filename)
)
# log the exception but continue processing other files
log_exception(global_config["logger"], e)
# add the file to the set of processed files so that we don't process it again. Do it outside the
# try/except block so that we don't try to process a bogus file over and over again.
DataModel.addProcessedFile(session, data_filename)
some_files_processed = True
else:
global_config["logger"].debug(
"%s - Skipping file: %s, already processed" % (process_files.__name__, data_filename)
)
# Commit all updates to the database
session.commit()
示例2: process_files
# 需要导入模块: import DataModel [as 别名]
# 或者: from DataModel import addProcessedFile [as 别名]
def process_files(global_config, attr_definitions, input_dir, recursive=True):
start_time = datetime.datetime.now()
# Initialize the database session connection
db_name = global_config['db_name']
session = DbSession.open_db_session(db_name)
some_files_processed = False
# The following regular expression will select all files that conform to
# the file naming format Team*.txt. Build a list of all datafiles that match
# the naming format within the directory passed in via command line
# arguments.
file_regex = re.compile('Team[a-zA-Z0-9_]+.txt')
files = get_files(global_config, session, db_name, input_dir, file_regex, recursive)
print 'files retrieved, elapsed time - %s' % (str(datetime.datetime.now()-start_time))
# Process data files
for data_filename in files:
try:
process_file( global_config, session, attr_definitions, data_filename)
except Exception, e:
# log the exception but continue processing other files
log_exception(global_config['logger'], e)
# add the file to the set of processed files so that we don't process it again. Do it outside the
# try/except block so that we don't try to process a bogus file over and over again.
DataModel.addProcessedFile(session, data_filename)
some_files_processed = True
# Commit all updates to the database
session.commit()
示例3: process_files
# 需要导入模块: import DataModel [as 别名]
# 或者: from DataModel import addProcessedFile [as 别名]
def process_files(session, db_name, attr_definitions, input_dir, recursive, test):
# The following regular expression will select all files that conform to
# the file naming format Team*.txt. Build a list of all datafiles that match
# the naming format within the directory passed in via command line
# arguments.
file_regex = re.compile('Team[a-zA-Z0-9_]+.txt')
files = get_files(session, db_name, input_dir, file_regex, recursive, test)
# Process data files
for data_filename in files:
try:
process_file( session, attr_definitions, data_filename)
except Exception, e:
# log the exception but continue processing other files
log_exception(e)
# add the file to the set of processed files so that we don't process it again. Do it outside the
# try/except block so that we don't try to process a bogus file over and over again.
DataModel.addProcessedFile(session, data_filename)
# Commit all updates to the database
session.commit()