本文整理汇总了Python中grid_control.datasets.DataProvider类的典型用法代码示例。如果您正苦于以下问题:Python DataProvider类的具体用法?Python DataProvider怎么用?Python DataProvider使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了DataProvider类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: dataset_show_diff
def dataset_show_diff(options):
if len(options.args) != 2:
options.parser.exit_with_usage(options.parser.usage('data'))
provider_a = DataProvider.load_from_file(options.args[0])
provider_b = DataProvider.load_from_file(options.args[1])
block_resync_tuple = DataProvider.resync_blocks(provider_a.get_block_list_cached(show_stats=False),
provider_b.get_block_list_cached(show_stats=False))
(block_list_added, block_list_missing, block_list_matching) = block_resync_tuple
def _dataset_iter_matching_blocks():
for (block_old, block_new, _, _) in block_list_matching:
def _format_change(old, new):
if old != new:
return '%s -> %s' % (old, new)
return old
block_old[DataProvider.NFiles] = _format_change(len(block_old.get(DataProvider.FileList, [])),
len(block_new.get(DataProvider.FileList, [])))
block_old[DataProvider.NEntries] = _format_change(block_old[DataProvider.NEntries],
block_new[DataProvider.NEntries])
yield block_old
header_list = [(DataProvider.Dataset, 'Dataset'), (DataProvider.BlockName, 'Block'),
(DataProvider.NFiles, '#Files'), (DataProvider.NEntries, '#Entries')]
if block_list_added:
ConsoleTable.create(header_list, dataset_iter_blocks(block_list_added), title='Added blocks')
if block_list_missing:
ConsoleTable.create(header_list, dataset_iter_blocks(block_list_missing), title='Removed blocks')
if block_list_matching:
ConsoleTable.create(header_list, _dataset_iter_matching_blocks(), title='Matching blocks')
示例2: __init__
def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None):
dataset_config = config.change_view(default_on_change=TriggerResync(['datasets', 'parameters']))
self._lumi_filter = dataset_config.get_lookup(['lumi filter', '%s lumi filter' % datasource_name],
default={}, parser=parse_lumi_filter, strfun=str_lumi)
if not self._lumi_filter.empty():
config.set('%s processor' % datasource_name, 'LumiDataProcessor', '+=')
DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc)
# LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well
self._lumi_query = dataset_config.get_bool(
['lumi metadata', '%s lumi metadata' % datasource_name], default=not self._lumi_filter.empty())
config.set('phedex sites matcher mode', 'ShellStyleMatcher', '?=')
# PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont!
self._phedex_filter = dataset_config.get_filter('phedex sites', '-* T1_*_Disk T2_* T3_*',
default_matcher='BlackWhiteMatcher', default_filter='StrictListFilter')
self._only_complete = dataset_config.get_bool('only complete sites', True)
self._only_valid = dataset_config.get_bool('only valid', True)
self._location_format = dataset_config.get_enum('location format',
CMSLocationFormat, CMSLocationFormat.hostname)
self._pjrc = JSONRestClient(url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas')
self._sitedb = SiteDB()
dataset_expr_parts = split_opt(dataset_expr, '@#')
(self._dataset_path, self._dataset_instance, self._dataset_block_selector) = dataset_expr_parts
instance_default = dataset_config.get('dbs instance', '')
self._dataset_instance = self._dataset_instance or instance_default
if not self._dataset_instance:
self._dataset_instance = 'prod/global'
elif '/' not in self._dataset_instance:
self._dataset_instance = 'prod/%s' % self._dataset_instance
self._dataset_block_selector = self._dataset_block_selector or 'all'
示例3: __init__
def __init__(self, dataDir, srcName, dataProvider, dataSplitter, dataProc, repository, keepOld = True):
LimitedResyncParameterSource.__init__(self)
(self._dn, self._name, self._data_provider, self._data_splitter, self._part_proc, self._keepOld) = \
(dataDir, srcName, dataProvider, dataSplitter, dataProc, keepOld)
repository['dataset:%s' % srcName] = self
self.resyncSetup(interval = -1)
if not dataProvider: # debug mode - used by scripts - disables resync
self._maxN = self._data_splitter.getMaxJobs()
return
# look for aborted resyncs - and try to restore old state if possible
if self._existsDataPath('cache.dat.resync') and self._existsDataPath('map.tar.resync'):
utils.renameFile(self._getDataPath('cache.dat.resync'), self._getDataPath('cache.dat'))
utils.renameFile(self._getDataPath('map.tar.resync'), self._getDataPath('map.tar'))
elif self._existsDataPath('cache.dat.resync') or self._existsDataPath('map.tar.resync'):
raise DatasetError('Found broken resync state')
if self._existsDataPath('cache.dat') and self._existsDataPath('map.tar'):
self._data_splitter.importPartitions(self._getDataPath('map.tar'))
else:
DataProvider.saveToFile(self._getDataPath('cache.dat'), self._data_provider.getBlocks(show_stats = False))
self._data_splitter.splitDataset(self._getDataPath('map.tar'), self._data_provider.getBlocks(show_stats = False))
self._maxN = self._data_splitter.getMaxJobs()
示例4: __init__
def __init__(self, config, datasetExpr, datasetNick = None):
self._changeTrigger = triggerResync(['datasets', 'parameters'])
self._lumi_filter = config.getLookup('lumi filter', {}, parser = parseLumiFilter, strfun = strLumi, onChange = self._changeTrigger)
if not self._lumi_filter.empty():
config.set('dataset processor', 'LumiDataProcessor', '+=')
DataProvider.__init__(self, config, datasetExpr, datasetNick)
# LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well
self._lumi_query = config.getBool('lumi metadata', not self._lumi_filter.empty(), onChange = self._changeTrigger)
config.set('phedex sites matcher mode', 'shell', '?=')
# PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont!
self._phedexFilter = config.getFilter('phedex sites', '-* T1_*_Disk T2_* T3_*',
defaultMatcher = 'blackwhite', defaultFilter = 'strict', onChange = self._changeTrigger)
self._onlyComplete = config.getBool('only complete sites', True, onChange = self._changeTrigger)
self._locationFormat = config.getEnum('location format', CMSLocationFormat, CMSLocationFormat.hostname, onChange = self._changeTrigger)
self._pjrc = JSONRestClient(url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas')
self._sitedb = SiteDB()
(self._datasetPath, self._datasetInstance, self._datasetBlock) = optSplit(datasetExpr, '@#')
instance_default = config.get('dbs instance', '', onChange = self._changeTrigger)
self._datasetInstance = self._datasetInstance or instance_default
if not self._datasetInstance:
self._datasetInstance = 'prod/global'
elif '/' not in self._datasetInstance:
self._datasetInstance = 'prod/%s' % self._datasetInstance
self._datasetBlock = self._datasetBlock or 'all'
self.onlyValid = config.getBool('only valid', True, onChange = self._changeTrigger)
示例5: discover_blocks
def discover_blocks(options):
# Get work directory, create dbs dump directory
if os.path.isdir(options.args[0]):
work_dn = os.path.abspath(os.path.normpath(options.args[0]))
else:
work_dn = gc_create_config(config_file=options.args[0]).get_work_path()
if not options.opts.tempdir:
options.opts.tempdir = os.path.join(work_dn, 'dbs')
if not os.path.exists(options.opts.tempdir):
os.mkdir(options.opts.tempdir)
# get provider with dataset information
config = gc_create_config(config_dict={'dataset': options.config_dict}, load_old_config=False)
if options.opts.input_file:
provider = DataProvider.create_instance('ListProvider',
config, 'dataset', options.opts.input_file)
else:
provider = DataProvider.create_instance('DBSInfoProvider',
config, 'dataset', options.args[0])
blocks = provider.get_block_list_cached(show_stats=False)
DataProvider.save_to_file(os.path.join(options.opts.tempdir, 'dbs.dat'), blocks)
if options.opts.discovery:
sys.exit(os.EX_OK)
return blocks
示例6: save_dataset
def save_dataset(opts, provider):
print('')
blocks = provider.getBlocks()
if opts.ordered:
sort_inplace(blocks, key = itemgetter(DataProvider.Dataset, DataProvider.BlockName))
for b in blocks:
sort_inplace(b[DataProvider.FileList], key = itemgetter(DataProvider.URL))
DataProvider.saveToFile(opts.save, blocks)
print('Dataset information saved to ./%s' % opts.save)
示例7: _check_lumi_filter
def _check_lumi_filter(self, block, idx_runs, idx_lumi):
lumi_filter = self._lumi_filter.lookup(block[DataProvider.Nickname], is_selector=False)
if not lumi_filter:
return
if (self._lumi_strict == LumiMode.strict) and ((idx_runs is None) or (idx_lumi is None)):
raise DatasetError('Strict lumi filter active but ' +
'dataset %s does not provide lumi information!' % DataProvider.get_block_id(block))
elif (self._lumi_strict == LumiMode.weak) and (idx_runs is None):
raise DatasetError('Weak lumi filter active but ' +
'dataset %s does not provide run information!' % DataProvider.get_block_id(block))
示例8: __init__
def __init__(self, dataDir, srcName, dataProvider, dataSplitter, dataProc, keepOld = True):
ParameterSource.__init__(self)
(self._dataDir, self._srcName, self._dataProvider, self._dataSplitter, self._part_proc) = \
(dataDir, srcName, dataProvider, dataSplitter, dataProc)
if not dataProvider:
pass # debug mode - used by scripts - disables resync
elif os.path.exists(self.getDataPath('cache.dat') and self.getDataPath('map.tar')):
self._dataSplitter.importPartitions(self.getDataPath('map.tar'))
else:
DataProvider.saveToFile(self.getDataPath('cache.dat'), self._dataProvider.getBlocks(silent = False))
self._dataSplitter.splitDataset(self.getDataPath('map.tar'), self._dataProvider.getBlocks())
self._maxN = self._dataSplitter.getMaxJobs()
self._keepOld = keepOld
示例9: create_dbs3_proto_blocks
def create_dbs3_proto_blocks(opts, dataset_blocks):
for dataset in dataset_blocks:
missing_info_blocks = []
dataset_types = set()
for block in dataset_blocks[dataset]:
block_dump = {'dataset_conf_list': [], 'files': [], 'file_conf_list': [], 'file_parent_list': []}
(block_size, block_dataset_types) = create_dbs3_json_files(opts, block, block_dump)
if len(block_dataset_types) > 1:
raise Exception('Data and MC files are mixed in block %s' % DataProvider.bName(block))
elif len(block_dataset_types) == 1:
yield (block, block_dump, block_size, block_dataset_types.pop())
else:
missing_info_blocks.append((block, block_dump, block_size))
dataset_types.update(block_dataset_types) # collect dataset types in this dataset for blocks with missing type information
if missing_info_blocks:
if len(dataset_types) > 1:
raise Exception('Data and MC files are mixed in dataset %s! Unable to determine dataset type for blocks without type info')
elif len(dataset_types) == 0:
if not opts.datatype:
raise Exception('Please supply dataset type via --datatype!')
dataset_type = opts.datatype
else:
dataset_type = dataset_types.pop()
for (block, block_dump, block_size) in missing_info_blocks:
yield (block, block_dump, block_size, dataset_type)
示例10: _resync_psrc
def _resync_psrc(self):
activity = Activity('Performing resync of datasource %r' % self.get_datasource_name())
# Get old and new dataset information
provider_old = DataProvider.load_from_file(self._get_data_path('cache.dat'))
block_list_old = provider_old.get_block_list_cached(show_stats=False)
self._provider.clear_cache()
block_list_new = self._provider.get_block_list_cached(show_stats=False)
self._provider.save_to_file(self._get_data_path('cache-new.dat'), block_list_new)
# Use old splitting information to synchronize with new dataset infos
partition_len_old = self.get_parameter_len()
partition_changes = self._resync_partitions(
self._get_data_path('map-new.tar'), block_list_old, block_list_new)
activity.finish()
if partition_changes is not None:
# Move current splitting to backup and use the new splitting from now on
def _rename_with_backup(new, cur, old):
if self._keep_old:
os.rename(self._get_data_path(cur), self._get_data_path(old))
os.rename(self._get_data_path(new), self._get_data_path(cur))
_rename_with_backup('map-new.tar', 'map.tar', 'map-old-%d.tar' % time.time())
_rename_with_backup('cache-new.dat', 'cache.dat', 'cache-old-%d.dat' % time.time())
self._set_reader(DataSplitter.load_partitions(self._get_data_path('map.tar')))
self._log.debug('Dataset resync finished: %d -> %d partitions', partition_len_old, self._len)
(pnum_list_redo, pnum_list_disable) = partition_changes
return (set(pnum_list_redo), set(pnum_list_disable), partition_len_old != self._len)
示例11: setupJobParameters
def setupJobParameters(self, config, pm):
config = config.addSections(['dataset']).addTags([self])
self.dataSplitter = None
self.dataRefresh = None
self.dataset = config.get('dataset', '').strip()
if self.dataset == '':
return
config.set('se output pattern', '@[email protected][email protected][email protected][email protected]@', override = False)
config.set('default lookup', 'DATASETNICK', override = False)
defaultProvider = config.get('dataset provider', 'ListProvider')
dataProvider = DataProvider.create(config, self.dataset, defaultProvider)
splitterName = config.get('dataset splitter', 'FileBoundarySplitter')
splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName))
self.dataSplitter = splitterClass(config)
self.checkSE = config.getBool('dataset storage check', True, onChange = None)
# Create and register dataset parameter plugin
paramSource = DataParameterSource(config.getWorkPath(), 'data',
dataProvider, self.dataSplitter, self.initDataProcessor())
DataParameterSource.datasetsAvailable['data'] = paramSource
# Select dataset refresh rate
self.dataRefresh = config.getTime('dataset refresh', -1, onChange = None)
if self.dataRefresh > 0:
paramSource.resyncSetup(interval = max(self.dataRefresh, dataProvider.queryLimit()))
utils.vprint('Dataset source will be queried every %s' % utils.strTime(self.dataRefresh), -1)
else:
paramSource.resyncSetup(interval = 0)
def externalRefresh(sig, frame):
paramSource.resyncSetup(force = True)
signal.signal(signal.SIGUSR2, externalRefresh)
if self.dataSplitter.getMaxJobs() == 0:
raise UserError('There are no events to process')
示例12: _resync
def _resync(self):
if self._data_provider:
activity = Activity('Performing resync of datasource %r' % self._name)
# Get old and new dataset information
ds_old = DataProvider.loadFromFile(self._getDataPath('cache.dat')).getBlocks(show_stats = False)
self._data_provider.clearCache()
ds_new = self._data_provider.getBlocks(show_stats = False)
self._data_provider.saveToFile(self._getDataPath('cache-new.dat'), ds_new)
# Use old splitting information to synchronize with new dataset infos
old_maxN = self._data_splitter.getMaxJobs()
jobChanges = self._data_splitter.resyncMapping(self._getDataPath('map-new.tar'), ds_old, ds_new)
activity.finish()
if jobChanges is not None:
# Move current splitting to backup and use the new splitting from now on
def backupRename(old, cur, new):
if self._keepOld:
os.rename(self._getDataPath(cur), self._getDataPath(old))
os.rename(self._getDataPath(new), self._getDataPath(cur))
backupRename( 'map-old-%d.tar' % time.time(), 'map.tar', 'map-new.tar')
backupRename('cache-old-%d.dat' % time.time(), 'cache.dat', 'cache-new.dat')
self._data_splitter.importPartitions(self._getDataPath('map.tar'))
self._maxN = self._data_splitter.getMaxJobs()
self._log.debug('Dataset resync finished: %d -> %d partitions', old_maxN, self._maxN)
return (set(jobChanges[0]), set(jobChanges[1]), old_maxN != self._maxN)
示例13: resync
def resync(self):
(result_redo, result_disable, result_sizeChange) = ParameterSource.resync(self)
if self.resyncEnabled() and self._dataProvider:
# Get old and new dataset information
old = DataProvider.loadFromFile(self.getDataPath('cache.dat')).getBlocks()
self._dataProvider.clearCache()
new = self._dataProvider.getBlocks()
self._dataProvider.saveToFile(self.getDataPath('cache-new.dat'), new)
# Use old splitting information to synchronize with new dataset infos
jobChanges = self._dataSplitter.resyncMapping(self.getDataPath('map-new.tar'), old, new)
if jobChanges:
# Move current splitting to backup and use the new splitting from now on
def backupRename(old, cur, new):
if self._keepOld:
os.rename(self.getDataPath(cur), self.getDataPath(old))
os.rename(self.getDataPath(new), self.getDataPath(cur))
backupRename( 'map-old-%d.tar' % time.time(), 'map.tar', 'map-new.tar')
backupRename('cache-old-%d.dat' % time.time(), 'cache.dat', 'cache-new.dat')
old_maxN = self._dataSplitter.getMaxJobs()
self._dataSplitter.importPartitions(self.getDataPath('map.tar'))
self._maxN = self._dataSplitter.getMaxJobs()
result_redo.update(jobChanges[0])
result_disable.update(jobChanges[1])
result_sizeChange = result_sizeChange or (old_maxN != self._maxN)
self.resyncFinished()
return (result_redo, result_disable, result_sizeChange)
示例14: _read_plfnp_map
def _read_plfnp_map(self, config, parent_dataset_expr):
if parent_dataset_expr and (parent_dataset_expr not in self._plfnp2pdn_cache):
# read parent source and fill lfnMap with parent_lfn_parts -> parent dataset name mapping
map_plfnp2pdn = self._plfnp2pdn_cache.setdefault(parent_dataset_expr, {})
for block in DataProvider.iter_blocks_from_expr(self._empty_config, parent_dataset_expr):
for fi in block[DataProvider.FileList]:
map_plfnp2pdn[self._get_lfnp(fi[DataProvider.URL])] = block[DataProvider.Dataset]
return self._plfnp2pdn_cache.get(parent_dataset_expr, {}) # return cached mapping
示例15: __init__
def __init__(self, config, name):
head = [(0, "Nickname")]
# Mapping between nickname and config files:
cfgList = config.get("nickname config", "")
self.nmCfg = config.getDict(
"nickname config", {}, parser=lambda x: map(str.strip, x.split(",")), str=lambda x: str.join(",", x)
)[0]
if cfgList:
if "config file" in config.getOptions():
raise ConfigError("Please use 'nickname config' instead of 'config file'")
allConfigFiles = utils.flatten(self.nmCfg.values())
config.set("config file", str.join("\n", allConfigFiles))
head.append((1, "Config file"))
# Mapping between nickname and constants:
self.nmCName = map(str.strip, config.get("nickname constants", "").split())
self.nmConst = {}
for var in self.nmCName:
tmp = config.getDict(var, {})[0]
for (nick, value) in tmp.items():
if value:
self.nmConst.setdefault(nick, {})[var] = value
else:
self.nmConst.setdefault(nick, {})[var] = ""
head.append((var, var))
# Mapping between nickname and lumi filter:
if "lumi filter" in config.getOptions():
raise ConfigError("Please use 'nickname lumi filter' instead of 'lumi filter'")
lumiParse = lambda x: formatLumi(parseLumiFilter(x))
self.nmLumi = config.getDict("nickname lumi filter", {}, parser=lumiParse)[0]
if self.nmLumi:
for dataset in config.get("dataset", "").splitlines():
(datasetNick, datasetProvider, datasetExpr) = DataProvider.parseDatasetExpr(config, dataset, None)
config.set(
"dataset %s" % datasetNick,
"lumi filter",
str.join(",", utils.flatten(fromNM(self.nmLumi, datasetNick, []))),
)
config.set("lumi filter", str.join(",", self.nmLumi.get(None, [])))
head.append((2, "Lumi filter"))
utils.vprint("Mapping between nickname and other settings:\n", -1)
def report():
for nick in sorted(set(self.nmCfg.keys() + self.nmConst.keys() + self.nmLumi.keys())):
tmp = {
0: nick,
1: str.join(", ", map(os.path.basename, self.nmCfg.get(nick, ""))),
2: self.displayLumi(self.nmLumi.get(nick, "")),
}
yield utils.mergeDicts([tmp, self.nmConst.get(nick, {})])
utils.printTabular(head, report(), "cl")
utils.vprint(level=-1)
CMSSW.__init__(self, config, name)