本文整理汇总了Python中grid_control.datasets.DataProvider.create方法的典型用法代码示例。如果您正苦于以下问题:Python DataProvider.create方法的具体用法?Python DataProvider.create怎么用?Python DataProvider.create使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类grid_control.datasets.DataProvider
的用法示例。
在下文中一共展示了DataProvider.create方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: setupJobParameters
# 需要导入模块: from grid_control.datasets import DataProvider [as 别名]
# 或者: from grid_control.datasets.DataProvider import create [as 别名]
def setupJobParameters(self, config, pm):
config = config.addSections(['dataset']).addTags([self])
self.dataSplitter = None
self.dataRefresh = None
self.dataset = config.get('dataset', '').strip()
if self.dataset == '':
return
config.set('se output pattern', '@[email protected][email protected][email protected][email protected]@', override = False)
config.set('default lookup', 'DATASETNICK', override = False)
defaultProvider = config.get('dataset provider', 'ListProvider')
dataProvider = DataProvider.create(config, self.dataset, defaultProvider)
splitterName = config.get('dataset splitter', 'FileBoundarySplitter')
splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName))
self.dataSplitter = splitterClass(config)
self.checkSE = config.getBool('dataset storage check', True, onChange = None)
# Create and register dataset parameter plugin
paramSource = DataParameterSource(config.getWorkPath(), 'data',
dataProvider, self.dataSplitter, self.initDataProcessor())
DataParameterSource.datasetsAvailable['data'] = paramSource
# Select dataset refresh rate
self.dataRefresh = config.getTime('dataset refresh', -1, onChange = None)
if self.dataRefresh > 0:
paramSource.resyncSetup(interval = max(self.dataRefresh, dataProvider.queryLimit()))
utils.vprint('Dataset source will be queried every %s' % utils.strTime(self.dataRefresh), -1)
else:
paramSource.resyncSetup(interval = 0)
def externalRefresh(sig, frame):
paramSource.resyncSetup(force = True)
signal.signal(signal.SIGUSR2, externalRefresh)
if self.dataSplitter.getMaxJobs() == 0:
raise UserError('There are no events to process')
示例2: getEntries
# 需要导入模块: from grid_control.datasets import DataProvider [as 别名]
# 或者: from grid_control.datasets.DataProvider import create [as 别名]
def getEntries(self, path, metadata, events, seList, objStore):
datacachePath = os.path.join(objStore.get('GC_WORKDIR', ''), 'datacache.dat')
source = utils.QM((self.source == '') and os.path.exists(datacachePath), datacachePath, self.source)
if source and (source not in self.lfnMap):
pSource = DataProvider.create(createConfigFactory().getConfig(), source, 'ListProvider')
for (n, fl) in map(lambda b: (b[DataProvider.Dataset], b[DataProvider.FileList]), pSource.getBlocks()):
self.lfnMap.setdefault(source, {}).update(dict(map(lambda fi: (self.lfnTrans(fi[DataProvider.URL]), n), fl)))
pList = set()
for key in filter(lambda k: k in metadata, self.parentKeys):
pList.update(map(lambda pPath: self.lfnMap.get(source, {}).get(self.lfnTrans(pPath)), metadata[key]))
metadata['PARENT_PATH'] = filter(lambda x: x, pList)
yield (path, metadata, events, seList, objStore)
示例3: setupJobParameters
# 需要导入模块: from grid_control.datasets import DataProvider [as 别名]
# 或者: from grid_control.datasets.DataProvider import create [as 别名]
def setupJobParameters(self, config, pm):
config = config.changeView(viewClass = TaggedConfigView, addSections = ['dataset'], addTags = [self])
self.dataSplitter = None
self.dataRefresh = None
self._forceRefresh = config.getState('resync', detail = 'dataset', default = False)
def userRefresh(config, old_obj, cur_obj, cur_entry, obj2str):
if ((old_obj == '') and (cur_obj != '')):
raise UserError('It is currently not possible to attach a dataset to a non-dataset task!')
self._forceRefresh = True
return cur_obj
self.dataset = config.get('dataset', '', onChange = userRefresh).strip()
if self.dataset == '':
return
config.set('se output pattern', '@[email protected][email protected][email protected][email protected]@')
config.set('default lookup', 'DATASETNICK')
defaultProvider = config.get('dataset provider', 'ListProvider')
dataProvider = DataProvider.create(config, self.dataset, defaultProvider)
splitterName = config.get('dataset splitter', 'FileBoundarySplitter')
splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName))
self.dataSplitter = splitterClass(config)
# Create and register dataset parameter source
paramSplitProcessor = config.getCompositePlugin('dataset processor',
'BasicDataSplitProcessor SECheckSplitProcessor', 'MultiDataSplitProcessor',
cls = DataSplitProcessor).getInstance(config)
paramSource = DataParameterSource(config.getWorkPath(), 'data',
dataProvider, self.dataSplitter, paramSplitProcessor)
DataParameterSource.datasetsAvailable['data'] = paramSource
# Select dataset refresh rate
self.dataRefresh = config.getTime('dataset refresh', -1, onChange = None)
if self.dataRefresh > 0:
paramSource.resyncSetup(interval = max(self.dataRefresh, dataProvider.queryLimit()))
utils.vprint('Dataset source will be queried every %s' % utils.strTime(self.dataRefresh), -1)
else:
paramSource.resyncSetup(interval = 0)
if self._forceRefresh:
paramSource.resyncSetup(force = True)
def externalRefresh(sig, frame):
paramSource.resyncSetup(force = True)
signal.signal(signal.SIGUSR2, externalRefresh)
if self.dataSplitter.getMaxJobs() == 0:
raise UserError('There are no events to process')
示例4: __init__
# 需要导入模块: from grid_control.datasets import DataProvider [as 别名]
# 或者: from grid_control.datasets.DataProvider import create [as 别名]
def __init__(self, config):
dsPath = config.get('source dataset path')
self.source = DataProvider.create(config, None, dsPath, 'ListProvider')
示例5: main
# 需要导入模块: from grid_control.datasets import DataProvider [as 别名]
# 或者: from grid_control.datasets.DataProvider import create [as 别名]
def main():
dataset = args[0].strip()
cfgSettings = {'dbs blacklist T1': 'False', 'remove empty blocks': 'False',
'remove empty files': 'False', 'location format': opts.locationfmt,
'nickname check collision': 'False'}
if opts.metadata or opts.blockmetadata:
cfgSettings['lumi filter'] = '-'
cfgSettings['keep lumi metadata'] = 'True'
section = 'dataset'
fillerList = [DefaultFilesConfigFiller()]
if opts.settings:
fillerList.append(FileConfigFiller([opts.settings]))
tmpCfg = Config(fillerList, opts.settings)
section = tmpCfg.get('global', ['task', 'module'])
dummyConfig = Config(fillerList + [DictConfigFiller({section: cfgSettings})], opts.settings)
dummyConfig.opts = opts
dummyConfig = dummyConfig.addSections(['dataset'])
if os.path.exists(dataset):
provider = DataProvider.loadState(dataset, dummyConfig)
else:
provider = DataProvider.create(dummyConfig, dataset, opts.provider)
blocks = provider.getBlocks()
if len(blocks) == 0:
raise DatasetError('No blocks!')
datasets = set(map(lambda x: x[DataProvider.Dataset], blocks))
if len(datasets) > 1 or opts.info:
headerbase = [(DataProvider.Dataset, 'Dataset')]
else:
print 'Dataset: %s' % blocks[0][DataProvider.Dataset]
headerbase = []
if opts.configentry:
print
print 'dataset ='
infos = {}
order = []
maxnick = 5
for block in blocks:
dsName = block[DataProvider.Dataset]
if not infos.get(dsName, None):
order.append(dsName)
infos[dsName] = dict([(DataProvider.Dataset, dsName)])
if DataProvider.Nickname not in block and opts.confignick:
try:
if '/' in dsName:
block[DataProvider.Nickname] = dsName.lstrip('/').split('/')[1]
else:
block[DataProvider.Nickname] = dsName
except:
pass
if DataProvider.Nickname not in block and opts.confignick:
block[DataProvider.Nickname] = np.getName(None, dsName, block)
if DataProvider.Nickname in block:
nick = block[DataProvider.Nickname]
infos[dsName][DataProvider.Nickname] = nick
maxnick = max(maxnick, len(nick))
if len(block[DataProvider.FileList]):
infos[dsName][DataProvider.URL] = block[DataProvider.FileList][0][DataProvider.URL]
for dsID, dsName in enumerate(order):
info = infos[dsName]
short = DataProvider.providers.get(provider.__class__.__name__, provider.__class__.__name__)
print '', info.get(DataProvider.Nickname, 'nick%d' % dsID).rjust(maxnick), ':', short, ':',
print '%s%s' % (provider._datasetExpr, QM(short == 'list', ' %% %s' % info[DataProvider.Dataset], ''))
if opts.listdatasets:
# Add some enums for consistent access to info dicts
DataProvider.NFiles = -1
DataProvider.NBlocks = -2
print
infos = {}
order = []
infosum = {DataProvider.Dataset : 'Sum'}
for block in blocks:
dsName = block.get(DataProvider.Dataset, '')
if not infos.get(dsName, None):
order.append(dsName)
infos[dsName] = {DataProvider.Dataset: block[DataProvider.Dataset]}
def updateInfos(target):
target[DataProvider.NBlocks] = target.get(DataProvider.NBlocks, 0) + 1
target[DataProvider.NFiles] = target.get(DataProvider.NFiles, 0) + len(block[DataProvider.FileList])
target[DataProvider.NEntries] = target.get(DataProvider.NEntries, 0) + block[DataProvider.NEntries]
updateInfos(infos[dsName])
updateInfos(infosum)
head = [(DataProvider.Dataset, 'Dataset'), (DataProvider.NEntries, '#Events'),
(DataProvider.NBlocks, '#Blocks'), (DataProvider.NFiles, '#Files')]
utils.printTabular(head, map(lambda x: infos[x], order) + ["=", infosum])
if opts.listblocks:
print
utils.printTabular(headerbase + [(DataProvider.BlockName, 'Block'), (DataProvider.NEntries, 'Events')], blocks)
if opts.listfiles:
print
for block in blocks:
#.........这里部分代码省略.........
示例6: main
# 需要导入模块: from grid_control.datasets import DataProvider [as 别名]
# 或者: from grid_control.datasets.DataProvider import create [as 别名]
def main():
dataset = args[0].strip()
cfgSettings = {'dbs blacklist T1 *': 'False', 'remove empty blocks *': 'False',
'remove empty files *': 'False', 'location format *': opts.locationfmt,
'nickname check collision *': 'False'}
if opts.metadata or opts.blockmetadata:
cfgSettings['lumi filter *'] = '-'
cfgSettings['keep lumi metadata *'] = 'True'
config = getConfig(configFile = opts.settings, configDict = {'dataset': cfgSettings})
if os.path.exists(dataset):
provider = DataProvider.getInstance('ListProvider', config, dataset, None)
else:
provider = DataProvider.create(config, dataset, opts.provider)
blocks = provider.getBlocks()
if len(blocks) == 0:
raise DatasetError('No blocks!')
datasets = set(map(lambda x: x[DataProvider.Dataset], blocks))
if len(datasets) > 1 or opts.info:
headerbase = [(DataProvider.Dataset, 'Dataset')]
else:
print('Dataset: %s' % blocks[0][DataProvider.Dataset])
headerbase = []
if opts.configentry:
print('')
print('dataset =')
infos = {}
order = []
maxnick = 5
for block in blocks:
dsName = block[DataProvider.Dataset]
if not infos.get(dsName, None):
order.append(dsName)
infos[dsName] = dict([(DataProvider.Dataset, dsName)])
if DataProvider.Nickname not in block and opts.confignick:
try:
if '/' in dsName:
block[DataProvider.Nickname] = dsName.lstrip('/').split('/')[1]
else:
block[DataProvider.Nickname] = dsName
except Exception:
pass
if DataProvider.Nickname not in block and opts.confignick:
block[DataProvider.Nickname] = np.getName(None, dsName, block)
if DataProvider.Nickname in block:
nick = block[DataProvider.Nickname]
infos[dsName][DataProvider.Nickname] = nick
maxnick = max(maxnick, len(nick))
if len(block[DataProvider.FileList]):
infos[dsName][DataProvider.URL] = block[DataProvider.FileList][0][DataProvider.URL]
for dsID, dsName in enumerate(order):
info = infos[dsName]
short = DataProvider.providers.get(provider.__class__.__name__, provider.__class__.__name__)
nickname = info.get(DataProvider.Nickname, 'nick%d' % dsID).rjust(maxnick)
filterExpr = utils.QM(short == 'list', ' %% %s' % info[DataProvider.Dataset], '')
print('\t%s : %s : %s%s' % (nickname, short, provider._datasetExpr, filterExpr))
if opts.listdatasets:
# Add some enums for consistent access to info dicts
DataProvider.NFiles = -1
DataProvider.NBlocks = -2
print('')
infos = {}
order = []
infosum = {DataProvider.Dataset : 'Sum'}
for block in blocks:
dsName = block.get(DataProvider.Dataset, '')
if not infos.get(dsName, None):
order.append(dsName)
infos[dsName] = {DataProvider.Dataset: block[DataProvider.Dataset]}
def updateInfos(target):
target[DataProvider.NBlocks] = target.get(DataProvider.NBlocks, 0) + 1
target[DataProvider.NFiles] = target.get(DataProvider.NFiles, 0) + len(block[DataProvider.FileList])
target[DataProvider.NEntries] = target.get(DataProvider.NEntries, 0) + block[DataProvider.NEntries]
updateInfos(infos[dsName])
updateInfos(infosum)
head = [(DataProvider.Dataset, 'Dataset'), (DataProvider.NEntries, '#Events'),
(DataProvider.NBlocks, '#Blocks'), (DataProvider.NFiles, '#Files')]
utils.printTabular(head, map(lambda x: infos[x], order) + ['=', infosum])
if opts.listblocks:
print('')
utils.printTabular(headerbase + [(DataProvider.BlockName, 'Block'), (DataProvider.NEntries, 'Events')], blocks)
if opts.listfiles:
print('')
for block in blocks:
if len(datasets) > 1:
print('Dataset: %s' % block[DataProvider.Dataset])
print('Blockname: %s' % block[DataProvider.BlockName])
utils.printTabular([(DataProvider.URL, 'Filename'), (DataProvider.NEntries, 'Events')], block[DataProvider.FileList])
print('')
def printMetadata(src, maxlen):
for (mk, mv) in src:
#.........这里部分代码省略.........