当前位置: 首页>>代码示例>>Python>>正文


Python Fileset.Fileset类代码示例

本文整理汇总了Python中WMCore.DataStructs.Fileset.Fileset的典型用法代码示例。如果您正苦于以下问题:Python Fileset类的具体用法?Python Fileset怎么用?Python Fileset使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Fileset类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: createSubscription

    def createSubscription(self, nFiles, lumisPerFile, twoSites = False, nEventsPerFile = 100):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name = baseName)
        for i in range(nFiles):
            newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile,
                                      i, lumisPerFile, 'blenheim')
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = self.createFile('%s_%i_2' % (baseName, i), nEventsPerFile,
                                          i, lumisPerFile, 'malpaquet')
                testFileset.addFile(newFile)


        testSubscription = Subscription(fileset = testFileset,
                                         workflow = self.testWorkflow,
                                         split_algo = "EventAwareLumiBased",
                                         type = "Processing")

        return testSubscription
开发者ID:ticoann,项目名称:WMCore,代码行数:27,代码来源:EventAwareLumiBased_t.py

示例2: processDataset

    def processDataset(self):
        """
        _processDataset_

        Import the Dataset contents and create a set of jobs from it

        """

        #  //
        # // Now create the job definitions
        #//
        logging.debug("SplitSize = %s" % self.splitSize)
        logging.debug("AllowedSites = %s" % self.allowedSites)
        thefiles = Fileset(name='FilesToSplit')
        reader = DBSReader(self.dbsUrl)
        fileList = reader.dbs.listFiles(analysisDataset = self.inputDataset(),
                                        retriveList = [ 'retrive_block',
                                                        'retrive_run'])

        blocks = {}

        for f in fileList:
            block = f['Block']['Name']
            if not blocks.has_key(block):
                blocks[block] = reader.listFileBlockLocation(block)
            f['Block']['StorageElementList'].extend(blocks[block])
            wmbsFile = File(f['LogicalFileName'])
            [ wmbsFile['locations'].add(x) for x in blocks[block] ]
            wmbsFile['block'] = block
            thefiles.addFile(
                wmbsFile
                )


        work = Workflow()
        subs = Subscription(
            fileset = thefiles,
            workflow = work,
            split_algo = 'FileBased',
            type = "Processing")
        splitter = SplitterFactory()
        jobfactory = splitter(subs)

        jobs = jobfactory(files_per_job = self.splitSize)



        jobDefs = []
        for job in jobs.jobs:
            #job.mask.setMaxAndSkipEvents(-1, 0)
            jobDef = JobDefinition()
            jobDef['LFNS'].extend(job.listLFNs())
            jobDef['SkipEvents'] = 0
            jobDef['MaxEvents'] = -1
            [ jobDef['SENames'].extend(list(x['locations']))
              for x  in job.listFiles() ]
            jobDefs.append(jobDef)


        return jobDefs
开发者ID:PerilousApricot,项目名称:CRAB2,代码行数:60,代码来源:ADSJobFactory.py

示例3: oneHundredFiles

    def oneHundredFiles(self, splittingAlgo="EventBased", jobType="Processing"):
        """
        _oneHundredFiles_
        
        Generate a WMBS data stack representing 100 files for job splitter
        testing
        
        """
        fileset1 = Fileset(name="EventBasedFiles1")
        for i in range(0, 100):
            f = File(
                "/store/MultipleFileSplit%s.root" % i, 1000, 100, 10 + i, 12312  # lfn  # size  # events  # run  # lumi
            )
            f["locations"].add("BULLSHIT")

            fileset1.addFile(f)

        work = Workflow()
        subscription1 = Subscription(fileset=fileset1, workflow=work, split_algo=splittingAlgo, type=jobType)
        splitter = SplitterFactory()
        jobfactory = splitter(subscription1)
        jobs = jobfactory(events_per_job=100)
        # for jobGroup in jobs:
        #    yield jobGroup

        self.manager.addSeeder("RandomSeeder", **self.seedlistForRandom)
        self.manager.addSeeder("RunAndLumiSeeder")

        return jobs
开发者ID:,项目名称:,代码行数:29,代码来源:

示例4: testG_LumiMask

    def testG_LumiMask(self):
        """
        _testG_LumiMask_

        Test that we can use a lumi-mask to filter good runs/lumis.
        """
        splitter = SplitterFactory()

        # Create 3 files with 100 events per lumi:
        # - file1 with 1 run  of 8 lumis
        # - file2 with 2 runs of 2 lumis each
        # - file3 with 1 run  of 5 lumis
        fileA = File(lfn = "/this/is/file1", size = 1000, events = 800)
        fileB = File(lfn = "/this/is/file2", size = 1000, events = 400)
        fileC = File(lfn = "/this/is/file3", size = 1000, events = 500)

        lumiListA = []
        for lumi in range(8):
            lumiListA.append(10 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.setLocation("somese.cern.ch")
        lumiListB1 = []
        lumiListB2 = []
        for lumi in range(2):
            lumiListB1.append(20 + lumi)
            lumiListB2.append(30 + lumi)
        fileB.addRun(Run(2, *lumiListB1))
        fileB.addRun(Run(3, *lumiListB2))
        fileB.setLocation("somese.cern.ch")
        lumiListC = []
        for lumi in range(5):
            lumiListC.append(40 + lumi)
        fileC.addRun(Run(4, *lumiListC))
        fileC.setLocation("somese.cern.ch")

        testFileset = Fileset(name = 'Fileset')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.addFile(fileC)

        testSubscription = Subscription(fileset = testFileset,
                                        workflow = self.testWorkflow,
                                        split_algo = "EventAwareLumiBased",
                                        type = "Processing")
        jobFactory = splitter(package = "WMCore.DataStructs",
                              subscription = testSubscription)

        # Use a lumi-mask = {1: [[10,14]], 2: [[20,21]], 4: [[40,41]]}
        jobGroups = jobFactory(halt_job_on_file_boundaries = False,
                               splitOnRun = False,
                               events_per_job = 850,
                               runs = ['1', '2', '4'],
                               lumis = ['10,14', '20,21', '40,41'],
                               performance = self.performanceParams)

        self.assertEqual(len(jobGroups), 1, "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 2, "Two jobs must be in the jobgroup")
        self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {1: [[10, 14]], 2: [[20, 21]], 4: [[40, 40]]})
        self.assertEqual(jobs[1]['mask'].getRunAndLumis(), {4: [[41, 41]]})
开发者ID:lucacopa,项目名称:WMCore,代码行数:60,代码来源:EventAwareLumiBased_t.py

示例5: execute

    def execute(self, *args, **kwargs):

        totalevents = kwargs['task']['tm_totalunits']
        firstEvent = 1
        lastEvent = totalevents
        firstLumi = 1
        lastLumi = 10

        # Set a default of 100 events per lumi.  This is set as a task
        # property, as the splitting considers it independently of the file
        # information provided by the fake dataset.
        if not kwargs['task']['tm_events_per_lumi']:
            kwargs['task']['tm_events_per_lumi'] = 100

        #MC comes with only one MCFakeFile
        singleMCFileset = Fileset(name = "MCFakeFileSet")
        newFile = File("MCFakeFile", size = 1000, events = totalevents)
        sbj = SiteDBJSON({"key":self.config.TaskWorker.cmskey,
                          "cert":self.config.TaskWorker.cmscert})
        newFile.setLocation(sbj.getAllCMSNames())
        newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        newFile["block"] = 'MCFakeBlock'
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent
        singleMCFileset.addFile(newFile)

        return Result(task=kwargs['task'], result=singleMCFileset)
开发者ID:HassenRiahi,项目名称:CRABServer,代码行数:27,代码来源:MakeFakeFileSet.py

示例6: setUp

    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name="TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation("se01")
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name="TestFileset2")
        newFile = File("/some/file/name", size=1000, events=100)
        newFile.setLocation("se02")
        self.singleFileFileset.addFile(newFile)

        self.emptyFileFileset = Fileset(name="TestFileset3")
        newFile = File("/some/file/name", size=1000, events=0)
        newFile.setdefault("se03")
        self.emptyFileFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing"
        )
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing"
        )
        self.emptyFileSubscription = Subscription(
            fileset=self.emptyFileFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing"
        )

        return
开发者ID:stuartw,项目名称:WMCore,代码行数:35,代码来源:EventBased_t.py

示例7: testHardLimitSplittingOnly

    def testHardLimitSplittingOnly(self):
        """
        _testHardLimitSplittingOnly_

        Checks that we can split a set of files where every file has a single
        lumi too big to fit in a runnable job
        """
        splitter = SplitterFactory()

        # Create 3 single-big-lumi files
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 1, "somese.cern.ch")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "somese.cern.ch")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 1, "somese.cern.ch")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork", type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs", subscription=testSubscription)

        # Fail single lumis with more than 800 events and put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550,
                               max_events_per_lumi=800, performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3)
        for job in jobs:
            self.assertTrue(job['failedOnCreation'])
            self.assertIn("Too many (estimated) events (1000.0) in", job['failedReason'])

        return
开发者ID:alexanderrichards,项目名称:WMCore,代码行数:34,代码来源:EventAwareLumiByWork_t.py

示例8: testMetaData

    def testMetaData(self):
        """
        _testMetaData_

        Make sure that the workflow name, task, owner and white and black lists
        make it into each job object.
        """
        testWorkflow = Workflow(spec = "spec.pkl", owner = "Steve",
                                name = "TestWorkflow", task = "TestTask")

        testFileset = Fileset(name = "TestFileset")
        testFile = File(lfn = "someLFN")
        testFileset.addFile(testFile)
        testFileset.commit()

        testSubscription = Subscription(fileset = testFileset,
                                        workflow = testWorkflow,
                                        split_algo = "FileBased")

        myJobFactory = JobFactory(subscription = testSubscription)
        testJobGroups =  myJobFactory(siteWhitelist = ["site1"], siteBlacklist = ["site2"])
        self.assertTrue(len(testJobGroups) > 0)

        for testJobGroup in testJobGroups:
            self.assertTrue(len(testJobGroup.jobs) > 0)
            for job in testJobGroup.jobs:
                self.assertEqual(job["task"], "TestTask", "Error: Task is wrong.")
                self.assertEqual(job["workflow"], "TestWorkflow", "Error: Workflow is wrong.")
                self.assertEqual(job["owner"], "Steve", "Error: Owner is wrong.")
        return
开发者ID:BrunoCoimbra,项目名称:WMCore,代码行数:30,代码来源:JobFactory_t.py

示例9: setUp

    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name = "TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100)
            newFile.setLocation('blenheim')
            newFile.setLocation('malpaquet')
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name = "TestFileset2")
        newFile = File("/some/file/name", size = 1000, events = 100)
        newFile.setLocation('blenheim')
        self.singleFileFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset,
                                                     workflow = testWorkflow,
                                                     split_algo = "FileBased",
                                                     type = "Processing")
        self.singleFileSubscription = Subscription(fileset = self.singleFileFileset,
                                                   workflow = testWorkflow,
                                                   split_algo = "FileBased",
                                                   type = "Processing")

        #self.multipleFileSubscription.create()
        #self.singleFileSubscription.create()

        return
开发者ID:ticoann,项目名称:WMCore,代码行数:33,代码来源:FileBased_t.py

示例10: algorithm

    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Implement merge algorithm for the subscription provided

        """
        fileset = list(self.subscription.availableFiles())

        mergeSize = int(kwargs['merge_size'])
        overflow  = bool(kwargs.get('all_files', False))
        fileset.sort()

        accumSize = 0
        jobFiles = Fileset()
        locationDict = self.sortByLocation()
        for location in locationDict:
            baseName = makeUUID()
            self.newGroup()
            for f in locationDict[location]:
                accumSize += f['size']
                jobFiles.addFile(f)
                if accumSize >= mergeSize:
                    self.newJob(name = '%s-%s' % (baseName, len(self.currentGroup.jobs) + 1),
                                      files = jobFiles)
                    self.currentJob["mask"].setMaxAndSkipEvents(-1, 0)
                    accumSize = 0
                    jobFiles = Fileset()

            if len(jobFiles) > 0:
                if overflow:
                    self.newJob(name = '%s-%s' % (baseName, len(self.currentGroup.jobs) + 1),
                                      files = jobFiles)
                    self.currentJob["mask"].setMaxAndSkipEvents(-1, 0)
开发者ID:AndresTanasijczuk,项目名称:WMCore,代码行数:34,代码来源:MergeBySize.py

示例11: execute

    def execute(self, *args, **kwargs): #pylint: disable=unused-argument

        # since https://github.com/dmwm/CRABServer/issues/5633 totalunits can be a float
        # but that would confuse WMCore, therefore cast to int
        totalevents = int(kwargs['task']['tm_totalunits'])
        firstEvent = 1
        lastEvent = totalevents
        firstLumi = 1
        lastLumi = 10

        # Set a default of 100 events per lumi.  This is set as a task
        # property, as the splitting considers it independently of the file
        # information provided by the fake dataset.
        if not kwargs['task']['tm_events_per_lumi']:
            kwargs['task']['tm_events_per_lumi'] = 100

        #MC comes with only one MCFakeFile
        singleMCFileset = Fileset(name = "MCFakeFileSet")
        newFile = File("MCFakeFile", size = 1000, events = totalevents)
        newFile.setLocation(self.getListOfSites())
        newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        newFile["block"] = 'MCFakeBlock'
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent
        singleMCFileset.addFile(newFile)

        return Result(task=kwargs['task'], result=singleMCFileset)
开发者ID:belforte,项目名称:CRABServer,代码行数:27,代码来源:MakeFakeFileSet.py

示例12: testCall

 def testCall(self):
     fileset = Fileset(name="FakeFeederTest")
     for i in range(1, 21):
         self.feeder([fileset])
         set = fileset.getFiles(type = "set")
         if len(set) > 0:
             file = set.pop()
         fileset.commit()
开发者ID:stuartw,项目名称:WMCore,代码行数:8,代码来源:FakeFeeder_t.py

示例13: testF_HardLimitSplittingOnly

    def testF_HardLimitSplittingOnly(self):
        """
        _testF_HardLimitSplittingOnly_

        Checks that we can split a set of files where every file has a single
        lumi too big to fit in a runnable job
        """
        splitter = SplitterFactory()

        # Create 3 single-big-lumi files
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 1, "somese.cern.ch")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "somese.cern.ch")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 1, "somese.cern.ch")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        # Settings are to split on job boundaries, to fail sing lumis with more than 800 events
        # and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               job_time_limit=9600,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1, "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3, "Three jobs must be in the jobgroup")
        for i in range(0, 3):
            self.assertTrue(jobs[i]['failedOnCreation'], "It should have been marked as failed")

            runNums = jobs[i]['mask']['runAndLumis'].keys()
            self.assertEqual(len(runNums), 1)

            lumiNums = jobs[i]['mask']['runAndLumis'].values()[0]
            self.assertEqual(len(lumiNums), 1)

            finalLumi = []
            for pair in lumiNums:
                finalLumi.extend(range(pair[0], pair[1] + 1))
            self.assertEqual(len(finalLumi), 1)

            self.assertEqual(jobs[i]['failedReason'],
                             "File /this/is/file%d has a single lumi %s, in run %s with too many events 1000 and it woud take 12000 sec to run" % (
                             i + 1, finalLumi[0], runNums[0]))

        return
开发者ID:,项目名称:,代码行数:54,代码来源:

示例14: setUp

    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name = "TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
            newFile.addRun(Run(i, *[45+i]))
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name = "TestFileset2")
        newFile = File("/some/file/name", size = 1000, events = 100, locations = set(["somese.cern.ch"]))
        newFile.addRun(Run(1, *[45]))
        self.singleFileFileset.addFile(newFile)

        self.multipleFileLumiset = Fileset(name = "TestFileset3")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
            newFile.addRun(Run(1, *[45+i/3]))
            self.multipleFileLumiset.addFile(newFile)

        self.singleLumiFileset = Fileset(name = "TestFileset4")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
            newFile.addRun(Run(1, *[45]))
            self.singleLumiFileset.addFile(newFile)
            

        testWorkflow = Workflow()
        self.multipleFileSubscription  = Subscription(fileset = self.multipleFileFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "EndOfRun",
                                                      type = "Processing")
        self.singleFileSubscription    = Subscription(fileset = self.singleFileFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "EndOfRun",
                                                      type = "Processing")
        self.multipleLumiSubscription  = Subscription(fileset = self.multipleFileLumiset,
                                                      workflow = testWorkflow,
                                                      split_algo = "EndOfRun",
                                                      type = "Processing")
        self.singleLumiSubscription    = Subscription(fileset = self.singleLumiFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "EndOfRun",
                                                      type = "Processing")


        return
开发者ID:zhiwenuil,项目名称:WMCore,代码行数:51,代码来源:EndOfRun_t.py

示例15: generateFakeMCFile

 def generateFakeMCFile(self, numEvents=100, firstEvent=1, lastEvent=100, firstLumi=1, lastLumi=10):
     # MC comes with only one MCFakeFile
     singleMCFileset = Fileset(name="MCTestFileset")
     newFile = File("MCFakeFileTest", size=1000, events=numEvents)
     newFile.setLocation("se01")
     newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
     newFile["first_event"] = firstEvent
     newFile["last_event"] = lastEvent
     testWorkflow = Workflow()
     singleMCFileset.addFile(newFile)
     singleMCFileSubscription = Subscription(
         fileset=singleMCFileset, workflow=testWorkflow, split_algo="EventBased", type="Production"
     )
     return singleMCFileSubscription
开发者ID:stuartw,项目名称:WMCore,代码行数:14,代码来源:EventBased_t.py


注:本文中的WMCore.DataStructs.Fileset.Fileset类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。