本文整理汇总了Python中WMCore.DataStructs.LumiList.LumiList.getRuns方法的典型用法代码示例。如果您正苦于以下问题:Python LumiList.getRuns方法的具体用法?Python LumiList.getRuns怎么用?Python LumiList.getRuns使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类WMCore.DataStructs.LumiList.LumiList
的用法示例。
在下文中一共展示了LumiList.getRuns方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: adjustLumisForCompletion
# 需要导入模块: from WMCore.DataStructs.LumiList import LumiList [as 别名]
# 或者: from WMCore.DataStructs.LumiList.LumiList import getRuns [as 别名]
def adjustLumisForCompletion(self, task, unprocessed):
"""Sets the run, lumi information in the task information for the
completion jobs. Returns True if completion jobs are needed,
otherwise False.
"""
missingDir = "automatic_splitting/missing_lumis/" #TODO in ServerUtilities to be shared with PJ
try:
available = set(os.listdir(missingDir)) & unprocessed
except OSError:
available = set()
failed = set(self.failedJobs) & unprocessed
if len(available) == 0 and len(failed) == 0:
return False
missing = LumiList()
for missingFile in available:
with open(os.path.join(missingDir, missingFile)) as fd:
self.logger.info("Adding missing lumis from job %s", missingFile)
missing = missing + LumiList(compactList=literal_eval(fd.read()))
for failedId in failed:
f = None
try:
tmpdir = tempfile.mkdtemp()
f = tarfile.open("run_and_lumis.tar.gz")
fn = "job_lumis_{0}.json".format(failedId)
f.extract(fn, path=tmpdir)
with open(os.path.join(tmpdir, fn)) as fd:
injson = json.load(fd)
missing = missing + LumiList(compactList=injson)
self.logger.info("Adding lumis from failed job %s", failedId)
finally:
if f:
f.close()
shutil.rmtree(tmpdir)
missing_compact = missing.getCompactList()
runs = missing.getRuns()
# Compact list is like
# {
# '1': [[1, 33], [35, 35], [37, 47], [49, 75], [77, 130], [133, 136]],
# '2':[[1,45],[50,80]]
# }
# Now we turn lumis it into something like:
# lumis=['1, 33, 35, 35, 37, 47, 49, 75, 77, 130, 133, 136','1,45,50,80']
# which is the format expected by buildLumiMask in the splitting algorithm
lumis = [",".join(str(l) for l in functools.reduce(lambda x, y:x + y, missing_compact[run])) for run in runs]
task['tm_split_args']['runs'] = runs
task['tm_split_args']['lumis'] = lumis
return True
示例2: run
# 需要导入模块: from WMCore.DataStructs.LumiList import LumiList [as 别名]
# 或者: from WMCore.DataStructs.LumiList.LumiList import getRuns [as 别名]
#.........这里部分代码省略.........
## handled, we are sure that if we reached this point it will not raise EnvironmentException.
## But otherwise we should take this into account.
with UserTarball(name=tarFilename, logger=self.logger, config=self.config) as tb:
inputFiles = [re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'inputFiles', [])]
tb.addFiles(userFiles=inputFiles, cfgOutputName=cfgOutputName)
configArguments['adduserfiles'] = [os.path.basename(f) for f in inputFiles]
try:
uploadResult = tb.upload(filecacheurl = filecacheurl)
except HTTPException as hte:
if 'X-Error-Info' in hte.headers:
reason = hte.headers['X-Error-Info']
reason_re = re.compile(r'\AFile size is ([0-9]*)B\. This is bigger than the maximum allowed size of ([0-9]*)B\.$')
re_match = reason_re.match(reason)
if re_match:
ISBSize = int(re_match.group(1))
ISBSizeLimit = int(re_match.group(2))
reason = "%sError%s:" % (colors.RED, colors.NORMAL)
reason += " Input sanbox size is ~%sMB. This is bigger than the maximum allowed size of %sMB." % (ISBSize/1024/1024, ISBSizeLimit/1024/1024)
ISBContent = sorted(tb.content, reverse=True)
biggestFileSize = ISBContent[0][0]
ndigits = int(math.ceil(math.log(biggestFileSize+1, 10)))
reason += "\nInput sanbox content sorted by size[Bytes]:"
for (size, name) in ISBContent:
reason += ("\n%" + str(ndigits) + "s\t%s") % (size, name)
raise ClientException(reason)
raise hte
except Exception as e:
msg = ("Impossible to calculate the checksum of the sandbox tarball.\nError message: %s.\n"
"More details can be found in %s" % (e, self.logger.logfile))
LOGGERS['CRAB3'].exception(msg) #the traceback is only printed into the logfile
raise ClientException(msg)
debugFilesUploadResult = None
with UserTarball(name=debugTarFilename, logger=self.logger, config=self.config) as dtb:
dtb.addMonFiles()
try:
debugFilesUploadResult = dtb.upload(filecacheurl = filecacheurl)
except Exception as e:
msg = ("Problem uploading debug_files.tar.gz.\nError message: %s.\n"
"More details can be found in %s" % (e, self.logger.logfile))
LOGGERS['CRAB3'].exception(msg) #the traceback is only printed into the logfile
configArguments['cacheurl'] = filecacheurl
configArguments['cachefilename'] = "%s.tar.gz" % uploadResult
if debugFilesUploadResult is not None:
configArguments['debugfilename'] = "%s.tar.gz" % debugFilesUploadResult
self.logger.debug("Result uploading input files: %(cachefilename)s " % configArguments)
# Upload list of user-defined input files to process as the primary input
userFilesList = getattr(self.config.Data, 'userInputFiles', None)
if userFilesList:
self.logger.debug("Attaching list of user-specified primary input files.")
userFilesList = map(string.strip, userFilesList)
userFilesList = [file for file in userFilesList if file]
if len(userFilesList) != len(set(userFilesList)):
msg = "%sWarning%s:" % (colors.RED, colors.NORMAL)
msg += " CRAB configuration parameter Data.userInputFiles contains duplicated entries."
msg += " Duplicated entries will be removed."
self.logger.warning(msg)
configArguments['userfiles'] = set(userFilesList)
configArguments['primarydataset'] = getattr(self.config.Data, 'outputPrimaryDataset', 'CRAB_UserFiles')
lumi_mask_name = getattr(self.config.Data, 'lumiMask', None)
lumi_list = None
if lumi_mask_name:
self.logger.debug("Attaching lumi mask %s to the request" % (lumi_mask_name))
try:
lumi_list = getLumiList(lumi_mask_name, logger = self.logger)
except ValueError as ex:
msg = "%sError%s:" % (colors.RED, colors.NORMAL)
msg += " Failed to load lumi mask %s : %s" % (lumi_mask_name, ex)
raise ConfigurationException(msg)
run_ranges = getattr(self.config.Data, 'runRange', None)
if run_ranges:
run_ranges_is_valid = re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$', run_ranges)
if run_ranges_is_valid:
run_list = getRunList(run_ranges)
if lumi_list:
lumi_list.selectRuns(run_list)
if not lumi_list:
msg = "Invalid CRAB configuration: The intersection between the lumi mask and the run range is null."
raise ConfigurationException(msg)
else:
if len(run_list) > 50000:
msg = "CRAB configuration parameter Data.runRange includes %s runs." % str(len(run_list))
msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs."
raise ConfigurationException(msg)
lumi_list = LumiList(runs = run_list)
else:
msg = "Invalid CRAB configuration: Parameter Data.runRange should be a comma separated list of integers or (inclusive) ranges. Example: '12345,99900-99910'"
raise ConfigurationException(msg)
if lumi_list:
configArguments['runs'] = lumi_list.getRuns()
## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5'
lumi_mask = lumi_list.getCompactList()
configArguments['lumis'] = [str(reduce(lambda x,y: x+y, lumi_mask[run]))[1:-1].replace(' ','') for run in configArguments['runs']]
configArguments['jobtype'] = 'Analysis'
return tarFilename, configArguments
示例3: run
# 需要导入模块: from WMCore.DataStructs.LumiList import LumiList [as 别名]
# 或者: from WMCore.DataStructs.LumiList.LumiList import getRuns [as 别名]
def run(self, filecacheurl = None):
"""
Override run() for JobType
"""
taskDict, webdir = self.getTaskDict()
addoutputfiles = literal_eval(getColumn(taskDict, 'tm_outfiles'))
tfileoutfiles = literal_eval(getColumn(taskDict, 'tm_tfile_outfiles'))
edmoutfiles = literal_eval(getColumn(taskDict, 'tm_edm_outfiles'))
jobarch = getColumn(taskDict, 'tm_job_arch')
jobsw = getColumn(taskDict, 'tm_job_sw')
sandboxFilename = os.path.join(self.workdir, 'sandbox.tar.gz')
getFileFromURL(webdir + '/sandbox.tar.gz', sandboxFilename, self.proxyfilename)
configArguments = {'addoutputfiles' : addoutputfiles,
'tfileoutfiles' : tfileoutfiles,
'edmoutfiles' : edmoutfiles,
'jobarch' : jobarch,
'jobsw' : jobsw,
}
# Maybe the user wnat to change the dataset
if getattr(self.config.Data, 'inputDataset', None):
configArguments['inputdata'] = self.config.Data.inputDataset
ufc = CRABClient.Emulator.getEmulator('ufc')({'endpoint' : filecacheurl, "pycurl": True})
result = ufc.upload(sandboxFilename, excludeList = NEW_USER_SANDBOX_EXCLUSIONS)
if 'hashkey' not in result:
self.logger.error("Failed to upload source files: %s" % str(result))
raise CachefileNotFoundException
configArguments['cacheurl'] = filecacheurl
configArguments['cachefilename'] = "%s.tar.gz" % str(result['hashkey'])
# Upload list of user-defined input files to process as the primary input
userFilesList = getattr(self.config.Data, 'userInputFiles', None)
if userFilesList:
self.logger.debug("Attaching list of user-specified primary input files.")
userFilesList = map(string.strip, userFilesList)
userFilesList = [file for file in userFilesList if file]
if len(userFilesList) != len(set(userFilesList)):
msg = "%sWarning%s:" % (colors.RED, colors.NORMAL)
msg += " CRAB configuration parameter Data.userInputFiles contains duplicated entries."
msg += " Duplicated entries will be removed."
self.logger.warning(msg)
configArguments['userfiles'] = set(userFilesList)
configArguments['primarydataset'] = getattr(self.config.Data, 'outputPrimaryDataset', 'CRAB_UserFiles')
lumi_mask_name = getattr(self.config.Data, 'lumiMask', None)
lumi_list = None
if lumi_mask_name:
self.logger.debug("Attaching lumi mask %s to the request" % (lumi_mask_name))
try:
lumi_list = getLumiList(lumi_mask_name, logger = self.logger)
except ValueError as ex:
msg = "%sError%s:" % (colors.RED, colors.NORMAL)
msg += " Failed to load lumi mask %s : %s" % (lumi_mask_name, ex)
raise ConfigurationException(msg)
run_ranges = getattr(self.config.Data, 'runRange', None)
if run_ranges:
run_ranges_is_valid = re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$', run_ranges)
if run_ranges_is_valid:
run_list = getRunList(run_ranges)
if lumi_list:
lumi_list.selectRuns(run_list)
if not lumi_list:
msg = "Invalid CRAB configuration: The intersection between the lumi mask and the run range is null."
raise ConfigurationException(msg)
else:
if len(run_list) > 50000:
msg = "CRAB configuration parameter Data.runRange includes %s runs." % str(len(run_list))
msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs."
raise ConfigurationException(msg)
lumi_list = LumiList(runs = run_list)
else:
msg = "Invalid CRAB configuration: Parameter Data.runRange should be a comma separated list of integers or (inclusive) ranges. Example: '12345,99900-99910'"
raise ConfigurationException(msg)
if lumi_list:
configArguments['runs'] = lumi_list.getRuns()
## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5'
lumi_mask = lumi_list.getCompactList()
configArguments['lumis'] = [str(reduce(lambda x,y: x+y, lumi_mask[run]))[1:-1].replace(' ','') for run in configArguments['runs']]
configArguments['jobtype'] = 'Analysis'
return sandboxFilename, configArguments
示例4: run
# 需要导入模块: from WMCore.DataStructs.LumiList import LumiList [as 别名]
# 或者: from WMCore.DataStructs.LumiList.LumiList import getRuns [as 别名]
#.........这里部分代码省略.........
# Get SCRAM environment
scram = ScramEnvironment(logger=self.logger)
configArguments.update({'jobarch' : scram.scramArch,
'jobsw' : scram.cmsswVersion, })
# Build tarball
if self.workdir:
tarUUID = PandaInterface.wrappedUuidGen()
self.logger.debug('UNIQUE NAME: tarUUID %s ' % tarUUID)
if len(tarUUID):
tarFilename = os.path.join(self.workdir, tarUUID +'default.tgz')
cfgOutputName = os.path.join(self.workdir, 'CMSSW_cfg.py')
else:
raise EnvironmentException('Problem with uuidgen while preparing for Sandbox upload.')
else:
_dummy, tarFilename = tempfile.mkstemp(suffix='.tgz')
_dummy, cfgOutputName = tempfile.mkstemp(suffix='_cfg.py')
#configArguments['userisburl'] = 'https://'+ self.config.General.ufccacheUrl + '/crabcache/file?hashkey=' + uploadResults['hashkey']#XXX hardcoded
#configArguments['userisburl'] = 'INSERTuserisburl'#XXX hardcoded
if getattr(self.config.Data, 'inputDataset', None):
configArguments['inputdata'] = self.config.Data.inputDataset
# configArguments['ProcessingVersion'] = getattr(self.config.Data, 'processingVersion', None)
# Create CMSSW config
self.logger.debug("self.config: %s" % self.config)
self.logger.debug("self.config.JobType.psetName: %s" % self.config.JobType.psetName)
cmsswCfg = CMSSWConfig(config=self.config, logger=self.logger,
userConfig=self.config.JobType.psetName)
## Interogate CMSSW config and user config for output file names. For now no use for EDM files or TFiles here.
edmfiles, tfiles = cmsswCfg.outputFiles()
addoutputFiles = [re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'outputFiles', []) if re.sub(r'^file:', '', file) not in edmfiles+tfiles]
self.logger.debug("The following EDM output files will be collected: %s" % edmfiles)
self.logger.debug("The following TFile output files will be collected: %s" % tfiles)
self.logger.debug("The following user output files will be collected: %s" % addoutputFiles)
configArguments['edmoutfiles'] = edmfiles
configArguments['tfileoutfiles'] = tfiles
configArguments['addoutputfiles'].extend(addoutputFiles)
# Write out CMSSW config
cmsswCfg.writeFile(cfgOutputName)
with UserTarball(name=tarFilename, logger=self.logger, config=self.config) as tb:
inputFiles = [re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'inputFiles', [])]
tb.addFiles(userFiles=inputFiles, cfgOutputName=cfgOutputName)
configArguments['adduserfiles'] = [os.path.basename(f) for f in inputFiles]
uploadResults = tb.upload()
self.logger.debug("Result uploading input files: %s " % str(uploadResults))
configArguments['cachefilename'] = uploadResults[1]
configArguments['cacheurl'] = uploadResults[0]
isbchecksum = uploadResults[2]
# Upload list of user-defined input files to process as the primary input
userFileName = getattr(self.config.Data, 'userInputFile', None)
if userFileName:
self.logger.debug("Attaching a list of user-specified primary input files from %s." % userFileName)
fnames = []
for fname in open(userFileName).readlines():
fnames.append(fname.strip())
configArguments['userfiles'] = filter(lambda x: x, fnames) #removing whitelines and empty objects
primDS = getattr(self.config.Data, 'primaryDataset', None)
if primDS:
# Normalizes "foo/bar" and "/foo/bar" to "/foo/bar"
primDS = "/" + os.path.join(*primDS.split("/"))
if not re.match("/%(primDS)s.*" % lfnParts, primDS):
self.logger.warning("Invalid primary dataset name %s for private MC; publishing may fail" % primDS)
configArguments['inputdata'] = primDS
else:
configArguments['inputdata'] = getattr(self.config.Data, 'inputDataset', '/CRAB_UserFiles')
lumi_mask_name = getattr(self.config.Data, 'lumiMask', None)
lumi_list = None
if lumi_mask_name:
self.logger.debug("Attaching lumi mask %s to the request" % lumi_mask_name)
lumi_list = getLumiList(lumi_mask_name, logger = self.logger)
run_ranges = getattr(self.config.Data, 'runRange', None)
run_ranges_is_valid = run_ranges is not None and isinstance(run_ranges, str) and re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$', run_ranges)
if run_ranges_is_valid:
run_list = getRunList(run_ranges)
if lumi_list:
lumi_list.selectRuns(run_list)
else:
if len(run_list) > 50000:
msg = "Data.runRange includes %s runs." % str(len(run_list))
msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs."
raise ConfigurationException(msg)
lumi_list = LumiList(runs = run_list)
if lumi_list:
configArguments['runs'] = lumi_list.getRuns()
## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5'
lumi_mask = lumi_list.getCompactList()
configArguments['lumis'] = [str(reduce(lambda x,y: x+y, lumi_mask[run]))[1:-1].replace(' ','') for run in configArguments['runs']]
configArguments['jobtype'] = 'Analysis'
return tarFilename, configArguments, isbchecksum
示例5: run
# 需要导入模块: from WMCore.DataStructs.LumiList import LumiList [as 别名]
# 或者: from WMCore.DataStructs.LumiList.LumiList import getRuns [as 别名]
#.........这里部分代码省略.........
configArguments["addoutputfiles"].extend(addoutputFiles)
## Give warning message in case no output file was detected in the CMSSW pset
## nor was any specified in the CRAB configuration.
if (
not configArguments["edmoutfiles"]
and not configArguments["tfileoutfiles"]
and not configArguments["addoutputfiles"]
):
msg = "%sWarning%s:" % (colors.RED, colors.NORMAL)
if getattr(
self.config.JobType,
"disableAutomaticOutputCollection",
getParamDefaultValue("JobType.disableAutomaticOutputCollection"),
):
msg += " Automatic detection of output files in the CMSSW configuration is disabled from the CRAB configuration"
msg += " and no output file was explicitly specified in the CRAB configuration."
else:
msg += " CRAB could not detect any output file in the CMSSW configuration"
msg += " nor was any explicitly specified in the CRAB configuration."
msg += " Hence CRAB will not collect any output file from this task."
self.logger.warning(msg)
## UserTarball calls ScramEnvironment which can raise EnvironmentException.
## Since ScramEnvironment is already called above and the exception is not
## handled, we are sure that if we reached this point it will not raise EnvironmentException.
## But otherwise we should take this into account.
with UserTarball(name=tarFilename, logger=self.logger, config=self.config) as tb:
inputFiles = [re.sub(r"^file:", "", file) for file in getattr(self.config.JobType, "inputFiles", [])]
tb.addFiles(userFiles=inputFiles, cfgOutputName=cfgOutputName)
configArguments["adduserfiles"] = [os.path.basename(f) for f in inputFiles]
uploadResults = tb.upload(filecacheurl=filecacheurl)
self.logger.debug("Result uploading input files: %s " % str(uploadResults))
configArguments["cacheurl"] = filecacheurl
configArguments["cachefilename"] = uploadResults[0]
isbchecksum = uploadResults[1]
# Upload list of user-defined input files to process as the primary input
userFilesList = getattr(self.config.Data, "userInputFiles", None)
if userFilesList:
self.logger.debug("Attaching list of user-specified primary input files.")
userFilesList = map(string.strip, userFilesList)
userFilesList = [file for file in userFilesList if file]
if len(userFilesList) != len(set(userFilesList)):
msg = "%sWarning%s:" % (colors.RED, colors.NORMAL)
msg += " CRAB configuration parameter Data.userInputFiles contains duplicated entries."
msg += " Duplicated entries will be removed."
self.logger.warning(msg)
configArguments["userfiles"] = set(userFilesList)
## Get the user-specified primary dataset name.
primaryDataset = getattr(self.config.Data, "primaryDataset", "CRAB_UserFiles")
# Normalizes "foo/bar" and "/foo/bar" to "/foo/bar"
primaryDataset = "/" + os.path.join(*primaryDataset.split("/"))
if not re.match("/%(primDS)s.*" % (lfnParts), primaryDataset):
self.logger.warning("Invalid primary dataset name %s; publication may fail." % (primaryDataset))
configArguments["inputdata"] = primaryDataset
lumi_mask_name = getattr(self.config.Data, "lumiMask", None)
lumi_list = None
if lumi_mask_name:
self.logger.debug("Attaching lumi mask %s to the request" % (lumi_mask_name))
try:
lumi_list = getLumiList(lumi_mask_name, logger=self.logger)
except ValueError as ex:
msg = "%sError%s:" % (colors.RED, colors.NORMAL)
msg += " Failed to load lumi mask %s : %s" % (lumi_mask_name, ex)
raise ConfigurationException(msg)
run_ranges = getattr(self.config.Data, "runRange", None)
if run_ranges:
run_ranges_is_valid = re.match("^\d+((?!(-\d+-))(\,|\-)\d+)*$", run_ranges)
if run_ranges_is_valid:
run_list = getRunList(run_ranges)
if lumi_list:
lumi_list.selectRuns(run_list)
if not lumi_list:
msg = "Invalid CRAB configuration: The intersection between the lumi mask and the run range is null."
raise ConfigurationException(msg)
else:
if len(run_list) > 50000:
msg = "CRAB configuration parameter Data.runRange includes %s runs." % str(len(run_list))
msg += (
" When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs."
)
raise ConfigurationException(msg)
lumi_list = LumiList(runs=run_list)
else:
msg = "Invalid CRAB configuration: Parameter Data.runRange should be a comma separated list of integers or (inclusive) ranges. Example: '12345,99900-99910'"
raise ConfigurationException(msg)
if lumi_list:
configArguments["runs"] = lumi_list.getRuns()
## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5'
lumi_mask = lumi_list.getCompactList()
configArguments["lumis"] = [
str(reduce(lambda x, y: x + y, lumi_mask[run]))[1:-1].replace(" ", "")
for run in configArguments["runs"]
]
configArguments["jobtype"] = "Analysis"
return tarFilename, configArguments, isbchecksum
示例6: run
# 需要导入模块: from WMCore.DataStructs.LumiList import LumiList [as 别名]
# 或者: from WMCore.DataStructs.LumiList.LumiList import getRuns [as 别名]
#.........这里部分代码省略.........
configArguments.update({'jobarch' : scram.scramArch,
'jobsw' : scram.cmsswVersion, })
# Build tarball
if self.workdir:
tarUUID = PandaInterface.wrappedUuidGen()
self.logger.debug('UNIQUE NAME: tarUUID %s ' % tarUUID)
if len(tarUUID):
tarFilename = os.path.join(self.workdir, tarUUID +'default.tgz')
cfgOutputName = os.path.join(self.workdir, 'CMSSW_cfg.py')
else:
raise EnvironmentException('Problem with uuidgen while preparing for Sandbox upload.')
else:
_dummy, tarFilename = tempfile.mkstemp(suffix='.tgz')
_dummy, cfgOutputName = tempfile.mkstemp(suffix='_cfg.py')
if getattr(self.config.Data, 'inputDataset', None):
configArguments['inputdata'] = self.config.Data.inputDataset
# configArguments['ProcessingVersion'] = getattr(self.config.Data, 'processingVersion', None)
# Create CMSSW config
self.logger.debug("self.config: %s" % self.config)
self.logger.debug("self.config.JobType.psetName: %s" % self.config.JobType.psetName)
cmsswCfg = CMSSWConfig(config=self.config, logger=self.logger,
userConfig=self.config.JobType.psetName)
## Interogate CMSSW config and user config for output file names. For now no use for EDM files or TFiles here.
edmfiles, tfiles = cmsswCfg.outputFiles()
addoutputFiles = [re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'outputFiles', []) if re.sub(r'^file:', '', file) not in edmfiles+tfiles]
self.logger.debug("The following EDM output files will be collected: %s" % edmfiles)
self.logger.debug("The following TFile output files will be collected: %s" % tfiles)
self.logger.debug("The following user output files will be collected: %s" % addoutputFiles)
configArguments['edmoutfiles'] = edmfiles
configArguments['tfileoutfiles'] = tfiles
configArguments['addoutputfiles'].extend(addoutputFiles)
# Write out CMSSW config
cmsswCfg.writeFile(cfgOutputName)
## UserTarball calls ScramEnvironment which can raise EnvironmentException.
## Since ScramEnvironment is already called above and the exception is not
## handled, we are sure that if we reached this point it will not raise EnvironmentException.
## But otherwise we should take this into account.
with UserTarball(name=tarFilename, logger=self.logger, config=self.config) as tb:
inputFiles = [re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'inputFiles', [])]
tb.addFiles(userFiles=inputFiles, cfgOutputName=cfgOutputName)
configArguments['adduserfiles'] = [os.path.basename(f) for f in inputFiles]
uploadResults = tb.upload(filecacheurl = filecacheurl)
self.logger.debug("Result uploading input files: %s " % str(uploadResults))
configArguments['cacheurl'] = filecacheurl
configArguments['cachefilename'] = uploadResults[0]
isbchecksum = uploadResults[1]
# Upload list of user-defined input files to process as the primary input
userFilesList = getattr(self.config.Data, 'userInputFiles', None)
if userFilesList:
self.logger.debug("Attaching list of user-specified primary input files.")
userFilesList = map(string.strip, userFilesList)
userFilesList = [file for file in userFilesList if file]
if len(userFilesList) != len(set(userFilesList)):
msg = "%sWarning%s: CRAB configuration parameter Data.userInputFiles contains duplicated entries." % (colors.RED, colors.NORMAL)
msg += " Duplicated entries will be removed."
self.logger.warning(msg)
configArguments['userfiles'] = set(userFilesList)
## Get the user-specified primary dataset name.
primaryDataset = getattr(self.config.Data, 'primaryDataset', 'CRAB_UserFiles')
# Normalizes "foo/bar" and "/foo/bar" to "/foo/bar"
primaryDataset = "/" + os.path.join(*primaryDataset.split("/"))
if not re.match("/%(primDS)s.*" % (lfnParts), primaryDataset):
self.logger.warning("Invalid primary dataset name %s; publication may fail." % (primaryDataset))
configArguments['inputdata'] = primaryDataset
lumi_mask_name = getattr(self.config.Data, 'lumiMask', None)
lumi_list = None
if lumi_mask_name:
self.logger.debug("Attaching lumi mask %s to the request" % lumi_mask_name)
lumi_list = getLumiList(lumi_mask_name, logger = self.logger)
run_ranges = getattr(self.config.Data, 'runRange', None)
run_ranges_is_valid = run_ranges is not None and isinstance(run_ranges, str) and re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$', run_ranges)
if run_ranges_is_valid:
run_list = getRunList(run_ranges)
if lumi_list:
lumi_list.selectRuns(run_list)
else:
if len(run_list) > 50000:
msg = "Data.runRange includes %s runs." % str(len(run_list))
msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs."
raise ConfigurationException(msg)
lumi_list = LumiList(runs = run_list)
if lumi_list:
configArguments['runs'] = lumi_list.getRuns()
## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5'
lumi_mask = lumi_list.getCompactList()
configArguments['lumis'] = [str(reduce(lambda x,y: x+y, lumi_mask[run]))[1:-1].replace(' ','') for run in configArguments['runs']]
configArguments['jobtype'] = 'Analysis'
return tarFilename, configArguments, isbchecksum
示例7: run
# 需要导入模块: from WMCore.DataStructs.LumiList import LumiList [as 别名]
# 或者: from WMCore.DataStructs.LumiList.LumiList import getRuns [as 别名]
#.........这里部分代码省略.........
## in the sense that a second loading of the same pset may not produce the same
## result. Therefore there is a cache in CMSSWConfig to avoid loading any CMSSW
## pset twice. However, some "complicated" psets seem to evade the caching.
## Thus, to be safe, keep the CMSSWConfig instance in a class variable, so that
## it can be reused later if wanted (for example, in PrivateMC when checking if
## the pset has an LHE source) instead of having to load the pset again.
## As for what does "complicated" psets mean, Daniel Riley said that there are
## some psets where one module modifies the configuration from another module.
self.cmsswCfg = CMSSWConfig(config=self.config, logger=self.logger,
userConfig=self.config.JobType.psetName)
## Interrogate the CMSSW pset for output files (only output files produced by
## PoolOutputModule or TFileService are identified automatically). Do this
## automatic detection even if JobType.disableAutomaticOutputCollection = True,
## so that we can still classify the output files in EDM, TFile and additional
## output files in the Task DB (and the job ad).
## TODO: Do we really need this classification at all? cmscp and PostJob read
## the FJR to know if an output file is EDM, TFile or other.
edmfiles, tfiles = self.cmsswCfg.outputFiles()
## If JobType.disableAutomaticOutputCollection = True, ignore the EDM and TFile
## output files that are not listed in JobType.outputFiles.
if getattr(self.config.JobType, 'disableAutomaticOutputCollection', getParamDefaultValue('JobType.disableAutomaticOutputCollection')):
outputFiles = [re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'outputFiles', [])]
edmfiles = [file for file in edmfiles if file in outputFiles]
tfiles = [file for file in tfiles if file in outputFiles]
## Get the list of additional output files that have to be collected as given
## in JobType.outputFiles, but remove duplicates listed already as EDM files or
## TFiles.
addoutputFiles = [re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'outputFiles', []) if re.sub(r'^file:', '', file) not in edmfiles+tfiles]
self.logger.debug("The following EDM output files will be collected: %s" % edmfiles)
self.logger.debug("The following TFile output files will be collected: %s" % tfiles)
self.logger.debug("The following user output files will be collected: %s" % addoutputFiles)
configArguments['edmoutfiles'] = edmfiles
configArguments['tfileoutfiles'] = tfiles
configArguments['addoutputfiles'].extend(addoutputFiles)
# Write out CMSSW config
self.cmsswCfg.writeFile(cfgOutputName)
## UserTarball calls ScramEnvironment which can raise EnvironmentException.
## Since ScramEnvironment is already called above and the exception is not
## handled, we are sure that if we reached this point it will not raise EnvironmentException.
## But otherwise we should take this into account.
with UserTarball(name=tarFilename, logger=self.logger, config=self.config) as tb:
inputFiles = [re.sub(r'^file:', '', file) for file in getattr(self.config.JobType, 'inputFiles', [])]
tb.addFiles(userFiles=inputFiles, cfgOutputName=cfgOutputName)
configArguments['adduserfiles'] = [os.path.basename(f) for f in inputFiles]
uploadResults = tb.upload(filecacheurl = filecacheurl)
self.logger.debug("Result uploading input files: %s " % str(uploadResults))
configArguments['cacheurl'] = filecacheurl
configArguments['cachefilename'] = uploadResults[0]
isbchecksum = uploadResults[1]
# Upload list of user-defined input files to process as the primary input
userFilesList = getattr(self.config.Data, 'userInputFiles', None)
if userFilesList:
self.logger.debug("Attaching list of user-specified primary input files.")
userFilesList = map(string.strip, userFilesList)
userFilesList = [file for file in userFilesList if file]
if len(userFilesList) != len(set(userFilesList)):
msg = "%sWarning%s:" % (colors.RED, colors.NORMAL)
msg += " CRAB configuration parameter Data.userInputFiles contains duplicated entries."
msg += " Duplicated entries will be removed."
self.logger.warning(msg)
configArguments['userfiles'] = set(userFilesList)
## Get the user-specified primary dataset name.
primaryDataset = getattr(self.config.Data, 'primaryDataset', 'CRAB_UserFiles')
# Normalizes "foo/bar" and "/foo/bar" to "/foo/bar"
primaryDataset = "/" + os.path.join(*primaryDataset.split("/"))
if not re.match("/%(primDS)s.*" % (lfnParts), primaryDataset):
self.logger.warning("Invalid primary dataset name %s; publication may fail." % (primaryDataset))
configArguments['inputdata'] = primaryDataset
lumi_mask_name = getattr(self.config.Data, 'lumiMask', None)
lumi_list = None
if lumi_mask_name:
self.logger.debug("Attaching lumi mask %s to the request" % (lumi_mask_name))
lumi_list = getLumiList(lumi_mask_name, logger = self.logger)
run_ranges = getattr(self.config.Data, 'runRange', None)
run_ranges_is_valid = run_ranges is not None and isinstance(run_ranges, str) and re.match('^\d+((?!(-\d+-))(\,|\-)\d+)*$', run_ranges)
if run_ranges_is_valid:
run_list = getRunList(run_ranges)
if lumi_list:
lumi_list.selectRuns(run_list)
else:
if len(run_list) > 50000:
msg = "CRAB configuration parameter Data.runRange includes %s runs." % str(len(run_list))
msg += " When Data.lumiMask is not specified, Data.runRange can not include more than 50000 runs."
raise ConfigurationException(msg)
lumi_list = LumiList(runs = run_list)
if lumi_list:
configArguments['runs'] = lumi_list.getRuns()
## For each run we encode the lumis as a string representing a list of integers: [[1,2],[5,5]] ==> '1,2,5,5'
lumi_mask = lumi_list.getCompactList()
configArguments['lumis'] = [str(reduce(lambda x,y: x+y, lumi_mask[run]))[1:-1].replace(' ','') for run in configArguments['runs']]
configArguments['jobtype'] = 'Analysis'
return tarFilename, configArguments, isbchecksum