本文整理汇总了Python中WMCore.DataStructs.Fileset.Fileset类的典型用法代码示例。如果您正苦于以下问题:Python Fileset类的具体用法?Python Fileset怎么用?Python Fileset使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Fileset类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: createSubscription
def createSubscription(self, nFiles, lumisPerFile, twoSites = False, nEventsPerFile = 100):
"""
_createSubscription_
Create a subscription for testing
"""
baseName = makeUUID()
testFileset = Fileset(name = baseName)
for i in range(nFiles):
newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile,
i, lumisPerFile, 'blenheim')
testFileset.addFile(newFile)
if twoSites:
for i in range(nFiles):
newFile = self.createFile('%s_%i_2' % (baseName, i), nEventsPerFile,
i, lumisPerFile, 'malpaquet')
testFileset.addFile(newFile)
testSubscription = Subscription(fileset = testFileset,
workflow = self.testWorkflow,
split_algo = "EventAwareLumiBased",
type = "Processing")
return testSubscription
示例2: processDataset
def processDataset(self):
"""
_processDataset_
Import the Dataset contents and create a set of jobs from it
"""
# //
# // Now create the job definitions
#//
logging.debug("SplitSize = %s" % self.splitSize)
logging.debug("AllowedSites = %s" % self.allowedSites)
thefiles = Fileset(name='FilesToSplit')
reader = DBSReader(self.dbsUrl)
fileList = reader.dbs.listFiles(analysisDataset = self.inputDataset(),
retriveList = [ 'retrive_block',
'retrive_run'])
blocks = {}
for f in fileList:
block = f['Block']['Name']
if not blocks.has_key(block):
blocks[block] = reader.listFileBlockLocation(block)
f['Block']['StorageElementList'].extend(blocks[block])
wmbsFile = File(f['LogicalFileName'])
[ wmbsFile['locations'].add(x) for x in blocks[block] ]
wmbsFile['block'] = block
thefiles.addFile(
wmbsFile
)
work = Workflow()
subs = Subscription(
fileset = thefiles,
workflow = work,
split_algo = 'FileBased',
type = "Processing")
splitter = SplitterFactory()
jobfactory = splitter(subs)
jobs = jobfactory(files_per_job = self.splitSize)
jobDefs = []
for job in jobs.jobs:
#job.mask.setMaxAndSkipEvents(-1, 0)
jobDef = JobDefinition()
jobDef['LFNS'].extend(job.listLFNs())
jobDef['SkipEvents'] = 0
jobDef['MaxEvents'] = -1
[ jobDef['SENames'].extend(list(x['locations']))
for x in job.listFiles() ]
jobDefs.append(jobDef)
return jobDefs
示例3: oneHundredFiles
def oneHundredFiles(self, splittingAlgo="EventBased", jobType="Processing"):
"""
_oneHundredFiles_
Generate a WMBS data stack representing 100 files for job splitter
testing
"""
fileset1 = Fileset(name="EventBasedFiles1")
for i in range(0, 100):
f = File(
"/store/MultipleFileSplit%s.root" % i, 1000, 100, 10 + i, 12312 # lfn # size # events # run # lumi
)
f["locations"].add("BULLSHIT")
fileset1.addFile(f)
work = Workflow()
subscription1 = Subscription(fileset=fileset1, workflow=work, split_algo=splittingAlgo, type=jobType)
splitter = SplitterFactory()
jobfactory = splitter(subscription1)
jobs = jobfactory(events_per_job=100)
# for jobGroup in jobs:
# yield jobGroup
self.manager.addSeeder("RandomSeeder", **self.seedlistForRandom)
self.manager.addSeeder("RunAndLumiSeeder")
return jobs
示例4: testG_LumiMask
def testG_LumiMask(self):
"""
_testG_LumiMask_
Test that we can use a lumi-mask to filter good runs/lumis.
"""
splitter = SplitterFactory()
# Create 3 files with 100 events per lumi:
# - file1 with 1 run of 8 lumis
# - file2 with 2 runs of 2 lumis each
# - file3 with 1 run of 5 lumis
fileA = File(lfn = "/this/is/file1", size = 1000, events = 800)
fileB = File(lfn = "/this/is/file2", size = 1000, events = 400)
fileC = File(lfn = "/this/is/file3", size = 1000, events = 500)
lumiListA = []
for lumi in range(8):
lumiListA.append(10 + lumi)
fileA.addRun(Run(1, *lumiListA))
fileA.setLocation("somese.cern.ch")
lumiListB1 = []
lumiListB2 = []
for lumi in range(2):
lumiListB1.append(20 + lumi)
lumiListB2.append(30 + lumi)
fileB.addRun(Run(2, *lumiListB1))
fileB.addRun(Run(3, *lumiListB2))
fileB.setLocation("somese.cern.ch")
lumiListC = []
for lumi in range(5):
lumiListC.append(40 + lumi)
fileC.addRun(Run(4, *lumiListC))
fileC.setLocation("somese.cern.ch")
testFileset = Fileset(name = 'Fileset')
testFileset.addFile(fileA)
testFileset.addFile(fileB)
testFileset.addFile(fileC)
testSubscription = Subscription(fileset = testFileset,
workflow = self.testWorkflow,
split_algo = "EventAwareLumiBased",
type = "Processing")
jobFactory = splitter(package = "WMCore.DataStructs",
subscription = testSubscription)
# Use a lumi-mask = {1: [[10,14]], 2: [[20,21]], 4: [[40,41]]}
jobGroups = jobFactory(halt_job_on_file_boundaries = False,
splitOnRun = False,
events_per_job = 850,
runs = ['1', '2', '4'],
lumis = ['10,14', '20,21', '40,41'],
performance = self.performanceParams)
self.assertEqual(len(jobGroups), 1, "There should be only one job group")
jobs = jobGroups[0].jobs
self.assertEqual(len(jobs), 2, "Two jobs must be in the jobgroup")
self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {1: [[10, 14]], 2: [[20, 21]], 4: [[40, 40]]})
self.assertEqual(jobs[1]['mask'].getRunAndLumis(), {4: [[41, 41]]})
示例5: execute
def execute(self, *args, **kwargs):
totalevents = kwargs['task']['tm_totalunits']
firstEvent = 1
lastEvent = totalevents
firstLumi = 1
lastLumi = 10
# Set a default of 100 events per lumi. This is set as a task
# property, as the splitting considers it independently of the file
# information provided by the fake dataset.
if not kwargs['task']['tm_events_per_lumi']:
kwargs['task']['tm_events_per_lumi'] = 100
#MC comes with only one MCFakeFile
singleMCFileset = Fileset(name = "MCFakeFileSet")
newFile = File("MCFakeFile", size = 1000, events = totalevents)
sbj = SiteDBJSON({"key":self.config.TaskWorker.cmskey,
"cert":self.config.TaskWorker.cmscert})
newFile.setLocation(sbj.getAllCMSNames())
newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
newFile["block"] = 'MCFakeBlock'
newFile["first_event"] = firstEvent
newFile["last_event"] = lastEvent
singleMCFileset.addFile(newFile)
return Result(task=kwargs['task'], result=singleMCFileset)
示例6: setUp
def setUp(self):
"""
_setUp_
Create two subscriptions: One that contains a single file and one that
contains multiple files.
"""
self.multipleFileFileset = Fileset(name="TestFileset1")
for i in range(10):
newFile = File(makeUUID(), size=1000, events=100)
newFile.setLocation("se01")
self.multipleFileFileset.addFile(newFile)
self.singleFileFileset = Fileset(name="TestFileset2")
newFile = File("/some/file/name", size=1000, events=100)
newFile.setLocation("se02")
self.singleFileFileset.addFile(newFile)
self.emptyFileFileset = Fileset(name="TestFileset3")
newFile = File("/some/file/name", size=1000, events=0)
newFile.setdefault("se03")
self.emptyFileFileset.addFile(newFile)
testWorkflow = Workflow()
self.multipleFileSubscription = Subscription(
fileset=self.multipleFileFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing"
)
self.singleFileSubscription = Subscription(
fileset=self.singleFileFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing"
)
self.emptyFileSubscription = Subscription(
fileset=self.emptyFileFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing"
)
return
示例7: testHardLimitSplittingOnly
def testHardLimitSplittingOnly(self):
"""
_testHardLimitSplittingOnly_
Checks that we can split a set of files where every file has a single
lumi too big to fit in a runnable job
"""
splitter = SplitterFactory()
# Create 3 single-big-lumi files
testFileset = Fileset(name="FilesetA")
testFileA = self.createFile("/this/is/file1", 1000, 0, 1, "somese.cern.ch")
testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "somese.cern.ch")
testFileC = self.createFile("/this/is/file3", 1000, 2, 1, "somese.cern.ch")
testFileset.addFile(testFileA)
testFileset.addFile(testFileB)
testFileset.addFile(testFileC)
testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow,
split_algo="EventAwareLumiByWork", type="Processing")
jobFactory = splitter(package="WMCore.DataStructs", subscription=testSubscription)
# Fail single lumis with more than 800 events and put 550 events per job
jobGroups = jobFactory(halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550,
max_events_per_lumi=800, performance=self.performanceParams)
self.assertEqual(len(jobGroups), 1)
jobs = jobGroups[0].jobs
self.assertEqual(len(jobs), 3)
for job in jobs:
self.assertTrue(job['failedOnCreation'])
self.assertIn("Too many (estimated) events (1000.0) in", job['failedReason'])
return
示例8: testMetaData
def testMetaData(self):
"""
_testMetaData_
Make sure that the workflow name, task, owner and white and black lists
make it into each job object.
"""
testWorkflow = Workflow(spec = "spec.pkl", owner = "Steve",
name = "TestWorkflow", task = "TestTask")
testFileset = Fileset(name = "TestFileset")
testFile = File(lfn = "someLFN")
testFileset.addFile(testFile)
testFileset.commit()
testSubscription = Subscription(fileset = testFileset,
workflow = testWorkflow,
split_algo = "FileBased")
myJobFactory = JobFactory(subscription = testSubscription)
testJobGroups = myJobFactory(siteWhitelist = ["site1"], siteBlacklist = ["site2"])
self.assertTrue(len(testJobGroups) > 0)
for testJobGroup in testJobGroups:
self.assertTrue(len(testJobGroup.jobs) > 0)
for job in testJobGroup.jobs:
self.assertEqual(job["task"], "TestTask", "Error: Task is wrong.")
self.assertEqual(job["workflow"], "TestWorkflow", "Error: Workflow is wrong.")
self.assertEqual(job["owner"], "Steve", "Error: Owner is wrong.")
return
示例9: setUp
def setUp(self):
"""
_setUp_
Create two subscriptions: One that contains a single file and one that
contains multiple files.
"""
self.multipleFileFileset = Fileset(name = "TestFileset1")
for i in range(10):
newFile = File(makeUUID(), size = 1000, events = 100)
newFile.setLocation('blenheim')
newFile.setLocation('malpaquet')
self.multipleFileFileset.addFile(newFile)
self.singleFileFileset = Fileset(name = "TestFileset2")
newFile = File("/some/file/name", size = 1000, events = 100)
newFile.setLocation('blenheim')
self.singleFileFileset.addFile(newFile)
testWorkflow = Workflow()
self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset,
workflow = testWorkflow,
split_algo = "FileBased",
type = "Processing")
self.singleFileSubscription = Subscription(fileset = self.singleFileFileset,
workflow = testWorkflow,
split_algo = "FileBased",
type = "Processing")
#self.multipleFileSubscription.create()
#self.singleFileSubscription.create()
return
示例10: algorithm
def algorithm(self, *args, **kwargs):
"""
_algorithm_
Implement merge algorithm for the subscription provided
"""
fileset = list(self.subscription.availableFiles())
mergeSize = int(kwargs['merge_size'])
overflow = bool(kwargs.get('all_files', False))
fileset.sort()
accumSize = 0
jobFiles = Fileset()
locationDict = self.sortByLocation()
for location in locationDict:
baseName = makeUUID()
self.newGroup()
for f in locationDict[location]:
accumSize += f['size']
jobFiles.addFile(f)
if accumSize >= mergeSize:
self.newJob(name = '%s-%s' % (baseName, len(self.currentGroup.jobs) + 1),
files = jobFiles)
self.currentJob["mask"].setMaxAndSkipEvents(-1, 0)
accumSize = 0
jobFiles = Fileset()
if len(jobFiles) > 0:
if overflow:
self.newJob(name = '%s-%s' % (baseName, len(self.currentGroup.jobs) + 1),
files = jobFiles)
self.currentJob["mask"].setMaxAndSkipEvents(-1, 0)
示例11: execute
def execute(self, *args, **kwargs): #pylint: disable=unused-argument
# since https://github.com/dmwm/CRABServer/issues/5633 totalunits can be a float
# but that would confuse WMCore, therefore cast to int
totalevents = int(kwargs['task']['tm_totalunits'])
firstEvent = 1
lastEvent = totalevents
firstLumi = 1
lastLumi = 10
# Set a default of 100 events per lumi. This is set as a task
# property, as the splitting considers it independently of the file
# information provided by the fake dataset.
if not kwargs['task']['tm_events_per_lumi']:
kwargs['task']['tm_events_per_lumi'] = 100
#MC comes with only one MCFakeFile
singleMCFileset = Fileset(name = "MCFakeFileSet")
newFile = File("MCFakeFile", size = 1000, events = totalevents)
newFile.setLocation(self.getListOfSites())
newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
newFile["block"] = 'MCFakeBlock'
newFile["first_event"] = firstEvent
newFile["last_event"] = lastEvent
singleMCFileset.addFile(newFile)
return Result(task=kwargs['task'], result=singleMCFileset)
示例12: testCall
def testCall(self):
fileset = Fileset(name="FakeFeederTest")
for i in range(1, 21):
self.feeder([fileset])
set = fileset.getFiles(type = "set")
if len(set) > 0:
file = set.pop()
fileset.commit()
示例13: testF_HardLimitSplittingOnly
def testF_HardLimitSplittingOnly(self):
"""
_testF_HardLimitSplittingOnly_
Checks that we can split a set of files where every file has a single
lumi too big to fit in a runnable job
"""
splitter = SplitterFactory()
# Create 3 single-big-lumi files
testFileset = Fileset(name="FilesetA")
testFileA = self.createFile("/this/is/file1", 1000, 0, 1, "somese.cern.ch")
testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "somese.cern.ch")
testFileC = self.createFile("/this/is/file3", 1000, 2, 1, "somese.cern.ch")
testFileset.addFile(testFileA)
testFileset.addFile(testFileB)
testFileset.addFile(testFileC)
testSubscription = Subscription(fileset=testFileset,
workflow=self.testWorkflow,
split_algo="EventAwareLumiBased",
type="Processing")
jobFactory = splitter(package="WMCore.DataStructs",
subscription=testSubscription)
# Settings are to split on job boundaries, to fail sing lumis with more than 800 events
# and to put 550 events per job
jobGroups = jobFactory(halt_job_on_file_boundaries=True,
splitOnRun=True,
events_per_job=550,
job_time_limit=9600,
performance=self.performanceParams)
self.assertEqual(len(jobGroups), 1, "There should be only one job group")
jobs = jobGroups[0].jobs
self.assertEqual(len(jobs), 3, "Three jobs must be in the jobgroup")
for i in range(0, 3):
self.assertTrue(jobs[i]['failedOnCreation'], "It should have been marked as failed")
runNums = jobs[i]['mask']['runAndLumis'].keys()
self.assertEqual(len(runNums), 1)
lumiNums = jobs[i]['mask']['runAndLumis'].values()[0]
self.assertEqual(len(lumiNums), 1)
finalLumi = []
for pair in lumiNums:
finalLumi.extend(range(pair[0], pair[1] + 1))
self.assertEqual(len(finalLumi), 1)
self.assertEqual(jobs[i]['failedReason'],
"File /this/is/file%d has a single lumi %s, in run %s with too many events 1000 and it woud take 12000 sec to run" % (
i + 1, finalLumi[0], runNums[0]))
return
示例14: setUp
def setUp(self):
"""
_setUp_
Create two subscriptions: One that contains a single file and one that
contains multiple files.
"""
self.multipleFileFileset = Fileset(name = "TestFileset1")
for i in range(10):
newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
newFile.addRun(Run(i, *[45+i]))
self.multipleFileFileset.addFile(newFile)
self.singleFileFileset = Fileset(name = "TestFileset2")
newFile = File("/some/file/name", size = 1000, events = 100, locations = set(["somese.cern.ch"]))
newFile.addRun(Run(1, *[45]))
self.singleFileFileset.addFile(newFile)
self.multipleFileLumiset = Fileset(name = "TestFileset3")
for i in range(10):
newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
newFile.addRun(Run(1, *[45+i/3]))
self.multipleFileLumiset.addFile(newFile)
self.singleLumiFileset = Fileset(name = "TestFileset4")
for i in range(10):
newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
newFile.addRun(Run(1, *[45]))
self.singleLumiFileset.addFile(newFile)
testWorkflow = Workflow()
self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset,
workflow = testWorkflow,
split_algo = "EndOfRun",
type = "Processing")
self.singleFileSubscription = Subscription(fileset = self.singleFileFileset,
workflow = testWorkflow,
split_algo = "EndOfRun",
type = "Processing")
self.multipleLumiSubscription = Subscription(fileset = self.multipleFileLumiset,
workflow = testWorkflow,
split_algo = "EndOfRun",
type = "Processing")
self.singleLumiSubscription = Subscription(fileset = self.singleLumiFileset,
workflow = testWorkflow,
split_algo = "EndOfRun",
type = "Processing")
return
示例15: generateFakeMCFile
def generateFakeMCFile(self, numEvents=100, firstEvent=1, lastEvent=100, firstLumi=1, lastLumi=10):
# MC comes with only one MCFakeFile
singleMCFileset = Fileset(name="MCTestFileset")
newFile = File("MCFakeFileTest", size=1000, events=numEvents)
newFile.setLocation("se01")
newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
newFile["first_event"] = firstEvent
newFile["last_event"] = lastEvent
testWorkflow = Workflow()
singleMCFileset.addFile(newFile)
singleMCFileSubscription = Subscription(
fileset=singleMCFileset, workflow=testWorkflow, split_algo="EventBased", type="Production"
)
return singleMCFileSubscription