本文整理汇总了Python中WMCore.DataStructs.Fileset.Fileset.addFile方法的典型用法代码示例。如果您正苦于以下问题:Python Fileset.addFile方法的具体用法?Python Fileset.addFile怎么用?Python Fileset.addFile使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类WMCore.DataStructs.Fileset.Fileset
的用法示例。
在下文中一共展示了Fileset.addFile方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: algorithm
# 需要导入模块: from WMCore.DataStructs.Fileset import Fileset [as 别名]
# 或者: from WMCore.DataStructs.Fileset.Fileset import addFile [as 别名]
def algorithm(self, *args, **kwargs):
"""
_algorithm_
Implement merge algorithm for the subscription provided
"""
fileset = list(self.subscription.availableFiles())
mergeSize = int(kwargs['merge_size'])
overflow = bool(kwargs.get('all_files', False))
fileset.sort()
accumSize = 0
jobFiles = Fileset()
locationDict = self.sortByLocation()
for location in locationDict:
baseName = makeUUID()
self.newGroup()
for f in locationDict[location]:
accumSize += f['size']
jobFiles.addFile(f)
if accumSize >= mergeSize:
self.newJob(name = '%s-%s' % (baseName, len(self.currentGroup.jobs) + 1),
files = jobFiles)
self.currentJob["mask"].setMaxAndSkipEvents(-1, 0)
accumSize = 0
jobFiles = Fileset()
if len(jobFiles) > 0:
if overflow:
self.newJob(name = '%s-%s' % (baseName, len(self.currentGroup.jobs) + 1),
files = jobFiles)
self.currentJob["mask"].setMaxAndSkipEvents(-1, 0)
示例2: createSubscription
# 需要导入模块: from WMCore.DataStructs.Fileset import Fileset [as 别名]
# 或者: from WMCore.DataStructs.Fileset.Fileset import addFile [as 别名]
def createSubscription(self, nFiles, lumisPerFile, twoSites = False, nEventsPerFile = 100):
"""
_createSubscription_
Create a subscription for testing
"""
baseName = makeUUID()
testFileset = Fileset(name = baseName)
for i in range(nFiles):
newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile,
i, lumisPerFile, 'blenheim')
testFileset.addFile(newFile)
if twoSites:
for i in range(nFiles):
newFile = self.createFile('%s_%i_2' % (baseName, i), nEventsPerFile,
i, lumisPerFile, 'malpaquet')
testFileset.addFile(newFile)
testSubscription = Subscription(fileset = testFileset,
workflow = self.testWorkflow,
split_algo = "EventAwareLumiBased",
type = "Processing")
return testSubscription
示例3: oneHundredFiles
# 需要导入模块: from WMCore.DataStructs.Fileset import Fileset [as 别名]
# 或者: from WMCore.DataStructs.Fileset.Fileset import addFile [as 别名]
def oneHundredFiles(self, splittingAlgo="EventBased", jobType="Processing"):
"""
_oneHundredFiles_
Generate a WMBS data stack representing 100 files for job splitter
testing
"""
fileset1 = Fileset(name="EventBasedFiles1")
for i in range(0, 100):
f = File(
"/store/MultipleFileSplit%s.root" % i, 1000, 100, 10 + i, 12312 # lfn # size # events # run # lumi
)
f["locations"].add("BULLSHIT")
fileset1.addFile(f)
work = Workflow()
subscription1 = Subscription(fileset=fileset1, workflow=work, split_algo=splittingAlgo, type=jobType)
splitter = SplitterFactory()
jobfactory = splitter(subscription1)
jobs = jobfactory(events_per_job=100)
# for jobGroup in jobs:
# yield jobGroup
self.manager.addSeeder("RandomSeeder", **self.seedlistForRandom)
self.manager.addSeeder("RunAndLumiSeeder")
return jobs
示例4: processDataset
# 需要导入模块: from WMCore.DataStructs.Fileset import Fileset [as 别名]
# 或者: from WMCore.DataStructs.Fileset.Fileset import addFile [as 别名]
def processDataset(self):
"""
_processDataset_
Import the Dataset contents and create a set of jobs from it
"""
# //
# // Now create the job definitions
#//
logging.debug("SplitSize = %s" % self.splitSize)
logging.debug("AllowedSites = %s" % self.allowedSites)
thefiles = Fileset(name='FilesToSplit')
reader = DBSReader(self.dbsUrl)
fileList = reader.dbs.listFiles(analysisDataset = self.inputDataset(),
retriveList = [ 'retrive_block',
'retrive_run'])
blocks = {}
for f in fileList:
block = f['Block']['Name']
if not blocks.has_key(block):
blocks[block] = reader.listFileBlockLocation(block)
f['Block']['StorageElementList'].extend(blocks[block])
wmbsFile = File(f['LogicalFileName'])
[ wmbsFile['locations'].add(x) for x in blocks[block] ]
wmbsFile['block'] = block
thefiles.addFile(
wmbsFile
)
work = Workflow()
subs = Subscription(
fileset = thefiles,
workflow = work,
split_algo = 'FileBased',
type = "Processing")
splitter = SplitterFactory()
jobfactory = splitter(subs)
jobs = jobfactory(files_per_job = self.splitSize)
jobDefs = []
for job in jobs.jobs:
#job.mask.setMaxAndSkipEvents(-1, 0)
jobDef = JobDefinition()
jobDef['LFNS'].extend(job.listLFNs())
jobDef['SkipEvents'] = 0
jobDef['MaxEvents'] = -1
[ jobDef['SENames'].extend(list(x['locations']))
for x in job.listFiles() ]
jobDefs.append(jobDef)
return jobDefs
示例5: testMetaData
# 需要导入模块: from WMCore.DataStructs.Fileset import Fileset [as 别名]
# 或者: from WMCore.DataStructs.Fileset.Fileset import addFile [as 别名]
def testMetaData(self):
"""
_testMetaData_
Make sure that the workflow name, task, owner and white and black lists
make it into each job object.
"""
testWorkflow = Workflow(spec = "spec.pkl", owner = "Steve",
name = "TestWorkflow", task = "TestTask")
testFileset = Fileset(name = "TestFileset")
testFile = File(lfn = "someLFN")
testFileset.addFile(testFile)
testFileset.commit()
testSubscription = Subscription(fileset = testFileset,
workflow = testWorkflow,
split_algo = "FileBased")
myJobFactory = JobFactory(subscription = testSubscription)
testJobGroups = myJobFactory(siteWhitelist = ["site1"], siteBlacklist = ["site2"])
self.assertTrue(len(testJobGroups) > 0)
for testJobGroup in testJobGroups:
self.assertTrue(len(testJobGroup.jobs) > 0)
for job in testJobGroup.jobs:
self.assertEqual(job["task"], "TestTask", "Error: Task is wrong.")
self.assertEqual(job["workflow"], "TestWorkflow", "Error: Workflow is wrong.")
self.assertEqual(job["owner"], "Steve", "Error: Owner is wrong.")
return
示例6: execute
# 需要导入模块: from WMCore.DataStructs.Fileset import Fileset [as 别名]
# 或者: from WMCore.DataStructs.Fileset.Fileset import addFile [as 别名]
def execute(self, *args, **kwargs): #pylint: disable=unused-argument
# since https://github.com/dmwm/CRABServer/issues/5633 totalunits can be a float
# but that would confuse WMCore, therefore cast to int
totalevents = int(kwargs['task']['tm_totalunits'])
firstEvent = 1
lastEvent = totalevents
firstLumi = 1
lastLumi = 10
# Set a default of 100 events per lumi. This is set as a task
# property, as the splitting considers it independently of the file
# information provided by the fake dataset.
if not kwargs['task']['tm_events_per_lumi']:
kwargs['task']['tm_events_per_lumi'] = 100
#MC comes with only one MCFakeFile
singleMCFileset = Fileset(name = "MCFakeFileSet")
newFile = File("MCFakeFile", size = 1000, events = totalevents)
newFile.setLocation(self.getListOfSites())
newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
newFile["block"] = 'MCFakeBlock'
newFile["first_event"] = firstEvent
newFile["last_event"] = lastEvent
singleMCFileset.addFile(newFile)
return Result(task=kwargs['task'], result=singleMCFileset)
示例7: testCommit
# 需要导入模块: from WMCore.DataStructs.Fileset import Fileset [as 别名]
# 或者: from WMCore.DataStructs.Fileset.Fileset import addFile [as 别名]
def testCommit(self):
"""
Testcase for the commit method of the Fileset class
"""
localTestFileSet = Fileset('LocalTestFileset', self.initialSet)
fsSize = len(localTestFileSet.getFiles(type = "lfn"))
#Dummy file to test
fileTestCommit = File('/tmp/filetestcommit',0000,1,1)
#File is added to the newfiles attribute of localTestFileSet
localTestFileSet.addFile(fileTestCommit)
assert fsSize == len(localTestFileSet.getFiles(type = "lfn")) - 1, 'file not added'\
'correctly to test fileset'
newfilestemp = localTestFileSet.newfiles
assert fileTestCommit in newfilestemp, 'test file not in the new files'\
'list'
#After commit, dummy file is supposed to move from newfiles to files
localTestFileSet.commit()
#First, testing if the new file is present at file set object attribute of the Fileset object
assert newfilestemp.issubset(localTestFileSet.files), 'Test file not ' \
'present at fileset.files - fileset.commit ' \
'not working properly'
#Second, testing if the newfile set object attribute is empty
assert localTestFileSet.newfiles == set(), \
'Test file not present at fileset.newfiles ' \
'- fileset.commit not working properly'
示例8: testHardLimitSplittingOnly
# 需要导入模块: from WMCore.DataStructs.Fileset import Fileset [as 别名]
# 或者: from WMCore.DataStructs.Fileset.Fileset import addFile [as 别名]
def testHardLimitSplittingOnly(self):
"""
_testHardLimitSplittingOnly_
Checks that we can split a set of files where every file has a single
lumi too big to fit in a runnable job
"""
splitter = SplitterFactory()
# Create 3 single-big-lumi files
testFileset = Fileset(name="FilesetA")
testFileA = self.createFile("/this/is/file1", 1000, 0, 1, "somese.cern.ch")
testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "somese.cern.ch")
testFileC = self.createFile("/this/is/file3", 1000, 2, 1, "somese.cern.ch")
testFileset.addFile(testFileA)
testFileset.addFile(testFileB)
testFileset.addFile(testFileC)
testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow,
split_algo="EventAwareLumiByWork", type="Processing")
jobFactory = splitter(package="WMCore.DataStructs", subscription=testSubscription)
# Fail single lumis with more than 800 events and put 550 events per job
jobGroups = jobFactory(halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550,
max_events_per_lumi=800, performance=self.performanceParams)
self.assertEqual(len(jobGroups), 1)
jobs = jobGroups[0].jobs
self.assertEqual(len(jobs), 3)
for job in jobs:
self.assertTrue(job['failedOnCreation'])
self.assertIn("Too many (estimated) events (1000.0) in", job['failedReason'])
return
示例9: testG_LumiMask
# 需要导入模块: from WMCore.DataStructs.Fileset import Fileset [as 别名]
# 或者: from WMCore.DataStructs.Fileset.Fileset import addFile [as 别名]
def testG_LumiMask(self):
"""
_testG_LumiMask_
Test that we can use a lumi-mask to filter good runs/lumis.
"""
splitter = SplitterFactory()
# Create 3 files with 100 events per lumi:
# - file1 with 1 run of 8 lumis
# - file2 with 2 runs of 2 lumis each
# - file3 with 1 run of 5 lumis
fileA = File(lfn = "/this/is/file1", size = 1000, events = 800)
fileB = File(lfn = "/this/is/file2", size = 1000, events = 400)
fileC = File(lfn = "/this/is/file3", size = 1000, events = 500)
lumiListA = []
for lumi in range(8):
lumiListA.append(10 + lumi)
fileA.addRun(Run(1, *lumiListA))
fileA.setLocation("somese.cern.ch")
lumiListB1 = []
lumiListB2 = []
for lumi in range(2):
lumiListB1.append(20 + lumi)
lumiListB2.append(30 + lumi)
fileB.addRun(Run(2, *lumiListB1))
fileB.addRun(Run(3, *lumiListB2))
fileB.setLocation("somese.cern.ch")
lumiListC = []
for lumi in range(5):
lumiListC.append(40 + lumi)
fileC.addRun(Run(4, *lumiListC))
fileC.setLocation("somese.cern.ch")
testFileset = Fileset(name = 'Fileset')
testFileset.addFile(fileA)
testFileset.addFile(fileB)
testFileset.addFile(fileC)
testSubscription = Subscription(fileset = testFileset,
workflow = self.testWorkflow,
split_algo = "EventAwareLumiBased",
type = "Processing")
jobFactory = splitter(package = "WMCore.DataStructs",
subscription = testSubscription)
# Use a lumi-mask = {1: [[10,14]], 2: [[20,21]], 4: [[40,41]]}
jobGroups = jobFactory(halt_job_on_file_boundaries = False,
splitOnRun = False,
events_per_job = 850,
runs = ['1', '2', '4'],
lumis = ['10,14', '20,21', '40,41'],
performance = self.performanceParams)
self.assertEqual(len(jobGroups), 1, "There should be only one job group")
jobs = jobGroups[0].jobs
self.assertEqual(len(jobs), 2, "Two jobs must be in the jobgroup")
self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {1: [[10, 14]], 2: [[20, 21]], 4: [[40, 40]]})
self.assertEqual(jobs[1]['mask'].getRunAndLumis(), {4: [[41, 41]]})
示例10: execute
# 需要导入模块: from WMCore.DataStructs.Fileset import Fileset [as 别名]
# 或者: from WMCore.DataStructs.Fileset.Fileset import addFile [as 别名]
def execute(self, *args, **kwargs):
totalevents = kwargs['task']['tm_totalunits']
firstEvent = 1
lastEvent = totalevents
firstLumi = 1
lastLumi = 10
# Set a default of 100 events per lumi. This is set as a task
# property, as the splitting considers it independently of the file
# information provided by the fake dataset.
if not kwargs['task']['tm_events_per_lumi']:
kwargs['task']['tm_events_per_lumi'] = 100
#MC comes with only one MCFakeFile
singleMCFileset = Fileset(name = "MCFakeFileSet")
newFile = File("MCFakeFile", size = 1000, events = totalevents)
sbj = SiteDBJSON({"key":self.config.TaskWorker.cmskey,
"cert":self.config.TaskWorker.cmscert})
newFile.setLocation(sbj.getAllCMSNames())
newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
newFile["block"] = 'MCFakeBlock'
newFile["first_event"] = firstEvent
newFile["last_event"] = lastEvent
singleMCFileset.addFile(newFile)
return Result(task=kwargs['task'], result=singleMCFileset)
示例11: testF_HardLimitSplittingOnly
# 需要导入模块: from WMCore.DataStructs.Fileset import Fileset [as 别名]
# 或者: from WMCore.DataStructs.Fileset.Fileset import addFile [as 别名]
def testF_HardLimitSplittingOnly(self):
"""
_testF_HardLimitSplittingOnly_
Checks that we can split a set of files where every file has a single
lumi too big to fit in a runnable job
"""
splitter = SplitterFactory()
# Create 3 single-big-lumi files
testFileset = Fileset(name="FilesetA")
testFileA = self.createFile("/this/is/file1", 1000, 0, 1, "somese.cern.ch")
testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "somese.cern.ch")
testFileC = self.createFile("/this/is/file3", 1000, 2, 1, "somese.cern.ch")
testFileset.addFile(testFileA)
testFileset.addFile(testFileB)
testFileset.addFile(testFileC)
testSubscription = Subscription(fileset=testFileset,
workflow=self.testWorkflow,
split_algo="EventAwareLumiBased",
type="Processing")
jobFactory = splitter(package="WMCore.DataStructs",
subscription=testSubscription)
# Settings are to split on job boundaries, to fail sing lumis with more than 800 events
# and to put 550 events per job
jobGroups = jobFactory(halt_job_on_file_boundaries=True,
splitOnRun=True,
events_per_job=550,
job_time_limit=9600,
performance=self.performanceParams)
self.assertEqual(len(jobGroups), 1, "There should be only one job group")
jobs = jobGroups[0].jobs
self.assertEqual(len(jobs), 3, "Three jobs must be in the jobgroup")
for i in range(0, 3):
self.assertTrue(jobs[i]['failedOnCreation'], "It should have been marked as failed")
runNums = jobs[i]['mask']['runAndLumis'].keys()
self.assertEqual(len(runNums), 1)
lumiNums = jobs[i]['mask']['runAndLumis'].values()[0]
self.assertEqual(len(lumiNums), 1)
finalLumi = []
for pair in lumiNums:
finalLumi.extend(range(pair[0], pair[1] + 1))
self.assertEqual(len(finalLumi), 1)
self.assertEqual(jobs[i]['failedReason'],
"File /this/is/file%d has a single lumi %s, in run %s with too many events 1000 and it woud take 12000 sec to run" % (
i + 1, finalLumi[0], runNums[0]))
return
示例12: generateFakeMCFile
# 需要导入模块: from WMCore.DataStructs.Fileset import Fileset [as 别名]
# 或者: from WMCore.DataStructs.Fileset.Fileset import addFile [as 别名]
def generateFakeMCFile(self, numEvents=100, firstEvent=1, lastEvent=100, firstLumi=1, lastLumi=10):
# MC comes with only one MCFakeFile
singleMCFileset = Fileset(name="MCTestFileset")
newFile = File("MCFakeFileTest", size=1000, events=numEvents)
newFile.setLocation("se01")
newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
newFile["first_event"] = firstEvent
newFile["last_event"] = lastEvent
testWorkflow = Workflow()
singleMCFileset.addFile(newFile)
singleMCFileSubscription = Subscription(
fileset=singleMCFileset, workflow=testWorkflow, split_algo="EventBased", type="Production"
)
return singleMCFileSubscription
示例13: execute
# 需要导入模块: from WMCore.DataStructs.Fileset import Fileset [as 别名]
# 或者: from WMCore.DataStructs.Fileset.Fileset import addFile [as 别名]
def execute(self, *args, **kwargs):
self.logger.info("Data discovery and splitting for %s using user-provided files" % kwargs['task']['tm_taskname'])
if 'tm_user_files' in kwargs['task'] and kwargs['task']['tm_user_files']:
userfiles = kwargs['task']['tm_user_files']
else: ## For backward compatibility only.
userfiles = kwargs['task']['tm_arguments'].get('userfiles')
splitting = kwargs['task']['tm_split_algo']
total_units = kwargs['task']['tm_totalunits']
if not userfiles or splitting != 'FileBased':
if not userfiles:
msg = "No files specified to process for task %s." % kwargs['task']['tm_taskname']
if splitting != 'FileBased':
msg = "Data.splitting must be set to 'FileBased' when using a custom set of files."
self.logger.error("Setting %s as failed: %s" % (kwargs['task']['tm_taskname'], msg))
configreq = {'workflow': kwargs['task']['tm_taskname'],
'status': "FAILED",
'subresource': 'failure',
'failure': b64encode(msg)}
self.server.post(self.resturi, data = urllib.urlencode(configreq))
raise StopHandler(msg)
if hasattr(self.config.Sites, 'available'):
locations = self.config.Sites.available
else:
sbj = SiteDBJSON({"key":self.config.TaskWorker.cmskey,
"cert":self.config.TaskWorker.cmscert})
locations = sbj.getAllCMSNames()
userFileset = Fileset(name = kwargs['task']['tm_taskname'])
self.logger.info("There are %d files specified by the user." % len(userfiles))
if total_units > 0:
self.logger.info("Will run over the first %d files." % total_units)
file_counter = 0
for userfile, idx in zip(userfiles, range(len(userfiles))):
newFile = File(userfile, size = 1000, events = 1)
newFile.setLocation(locations)
newFile.addRun(Run(1, idx))
newFile["block"] = 'UserFilesFakeBlock'
newFile["first_event"] = 1
newFile["last_event"] = 2
userFileset.addFile(newFile)
file_counter += 1
if total_units > 0 and file_counter >= total_units:
break
return Result(task = kwargs['task'], result = userFileset)
示例14: testProductionRunNumber
# 需要导入模块: from WMCore.DataStructs.Fileset import Fileset [as 别名]
# 或者: from WMCore.DataStructs.Fileset.Fileset import addFile [as 别名]
def testProductionRunNumber(self):
"""
_testProductionRunNumber_
Verify that jobs created by production subscritpions have the correct
run number is their job mask. Also verify that non-production
subscriptions don't have modified run numbers.
"""
testWorkflow = Workflow(spec = "spec.pkl", owner = "Steve",
name = "TestWorkflow", task = "TestTask")
testFileset = Fileset(name = "TestFileset")
testFile = File(lfn = "someLFN")
testFileset.addFile(testFile)
testFileset.commit()
testSubscription = Subscription(fileset = testFileset,
workflow = testWorkflow,
split_algo = "FileBased",
type = "Production")
myJobFactory = JobFactory(subscription = testSubscription)
testJobGroups = myJobFactory()
for testJobGroup in testJobGroups:
for job in testJobGroup.jobs:
assert job["mask"]["FirstRun"] == 1, \
"Error: First run is wrong."
assert job["mask"]["LastRun"] == 1, \
"Error: Last run is wrong."
testSubscription = Subscription(fileset = testFileset,
workflow = testWorkflow,
split_algo = "FileBased",
type = "Processing")
myJobFactory = JobFactory(subscription = testSubscription)
testJobGroups = myJobFactory()
for testJobGroup in testJobGroups:
for job in testJobGroup.jobs:
assert job["mask"]["FirstRun"] == None, \
"Error: First run is wrong."
assert job["mask"]["LastRun"] == None, \
"Error: Last run is wrong."
return
示例15: getFileset
# 需要导入模块: from WMCore.DataStructs.Fileset import Fileset [as 别名]
# 或者: from WMCore.DataStructs.Fileset.Fileset import addFile [as 别名]
def getFileset(self):
"""
Get a fileset based on the task
"""
fileset = Fileset(name='Merge%s' % (type))
for i in range(0, random.randint(15, 25)):
# Use the testDir to generate a random lfn
inpFile = File(lfn="%s/%s.root" % (self.testDir, makeUUID()),
size=random.randint(200000, 1000000),
events=random.randint(1000, 2000))
inpFile.setLocation('Megiddo')
fileset.addFile(inpFile)
return fileset