当前位置: 首页>>代码示例>>Python>>正文


Python datasets.DataProvider类代码示例

本文整理汇总了Python中grid_control.datasets.DataProvider的典型用法代码示例。如果您正苦于以下问题:Python DataProvider类的具体用法?Python DataProvider怎么用?Python DataProvider使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了DataProvider类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: dataset_show_diff

def dataset_show_diff(options):
	if len(options.args) != 2:
		options.parser.exit_with_usage(options.parser.usage('data'))

	provider_a = DataProvider.load_from_file(options.args[0])
	provider_b = DataProvider.load_from_file(options.args[1])
	block_resync_tuple = DataProvider.resync_blocks(provider_a.get_block_list_cached(show_stats=False),
		provider_b.get_block_list_cached(show_stats=False))
	(block_list_added, block_list_missing, block_list_matching) = block_resync_tuple

	def _dataset_iter_matching_blocks():
		for (block_old, block_new, _, _) in block_list_matching:
			def _format_change(old, new):
				if old != new:
					return '%s -> %s' % (old, new)
				return old
			block_old[DataProvider.NFiles] = _format_change(len(block_old.get(DataProvider.FileList, [])),
				len(block_new.get(DataProvider.FileList, [])))
			block_old[DataProvider.NEntries] = _format_change(block_old[DataProvider.NEntries],
				block_new[DataProvider.NEntries])
			yield block_old

	header_list = [(DataProvider.Dataset, 'Dataset'), (DataProvider.BlockName, 'Block'),
		(DataProvider.NFiles, '#Files'), (DataProvider.NEntries, '#Entries')]
	if block_list_added:
		ConsoleTable.create(header_list, dataset_iter_blocks(block_list_added), title='Added blocks')
	if block_list_missing:
		ConsoleTable.create(header_list, dataset_iter_blocks(block_list_missing), title='Removed blocks')
	if block_list_matching:
		ConsoleTable.create(header_list, _dataset_iter_matching_blocks(), title='Matching blocks')
开发者ID:grid-control,项目名称:grid-control,代码行数:30,代码来源:gc_multi_tool.py

示例2: __init__

	def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None):
		dataset_config = config.change_view(default_on_change=TriggerResync(['datasets', 'parameters']))
		self._lumi_filter = dataset_config.get_lookup(['lumi filter', '%s lumi filter' % datasource_name],
			default={}, parser=parse_lumi_filter, strfun=str_lumi)
		if not self._lumi_filter.empty():
			config.set('%s processor' % datasource_name, 'LumiDataProcessor', '+=')
		DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc)
		# LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well
		self._lumi_query = dataset_config.get_bool(
			['lumi metadata', '%s lumi metadata' % datasource_name], default=not self._lumi_filter.empty())
		config.set('phedex sites matcher mode', 'ShellStyleMatcher', '?=')
		# PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont!
		self._phedex_filter = dataset_config.get_filter('phedex sites', '-* T1_*_Disk T2_* T3_*',
			default_matcher='BlackWhiteMatcher', default_filter='StrictListFilter')
		self._only_complete = dataset_config.get_bool('only complete sites', True)
		self._only_valid = dataset_config.get_bool('only valid', True)
		self._location_format = dataset_config.get_enum('location format',
			CMSLocationFormat, CMSLocationFormat.hostname)
		self._pjrc = JSONRestClient(url='https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas')
		self._sitedb = SiteDB()

		dataset_expr_parts = split_opt(dataset_expr, '@#')
		(self._dataset_path, self._dataset_instance, self._dataset_block_selector) = dataset_expr_parts
		instance_default = dataset_config.get('dbs instance', '')
		self._dataset_instance = self._dataset_instance or instance_default
		if not self._dataset_instance:
			self._dataset_instance = 'prod/global'
		elif '/' not in self._dataset_instance:
			self._dataset_instance = 'prod/%s' % self._dataset_instance
		self._dataset_block_selector = self._dataset_block_selector or 'all'
开发者ID:grid-control,项目名称:grid-control,代码行数:30,代码来源:provider_cms.py

示例3: __init__

	def __init__(self, dataDir, srcName, dataProvider, dataSplitter, dataProc, repository, keepOld = True):
		LimitedResyncParameterSource.__init__(self)
		(self._dn, self._name, self._data_provider, self._data_splitter, self._part_proc, self._keepOld) = \
			(dataDir, srcName, dataProvider, dataSplitter, dataProc, keepOld)
		repository['dataset:%s' % srcName] = self
		self.resyncSetup(interval = -1)

		if not dataProvider: # debug mode - used by scripts - disables resync
			self._maxN = self._data_splitter.getMaxJobs()
			return

		# look for aborted resyncs - and try to restore old state if possible
		if self._existsDataPath('cache.dat.resync') and self._existsDataPath('map.tar.resync'):
			utils.renameFile(self._getDataPath('cache.dat.resync'), self._getDataPath('cache.dat'))
			utils.renameFile(self._getDataPath('map.tar.resync'), self._getDataPath('map.tar'))
		elif self._existsDataPath('cache.dat.resync') or self._existsDataPath('map.tar.resync'):
			raise DatasetError('Found broken resync state')

		if self._existsDataPath('cache.dat') and self._existsDataPath('map.tar'):
			self._data_splitter.importPartitions(self._getDataPath('map.tar'))
		else:
			DataProvider.saveToFile(self._getDataPath('cache.dat'), self._data_provider.getBlocks(show_stats = False))
			self._data_splitter.splitDataset(self._getDataPath('map.tar'), self._data_provider.getBlocks(show_stats = False))

		self._maxN = self._data_splitter.getMaxJobs()
开发者ID:Fra-nk,项目名称:grid-control,代码行数:25,代码来源:psource_data.py

示例4: __init__

	def __init__(self, config, datasetExpr, datasetNick = None):
		self._changeTrigger = triggerResync(['datasets', 'parameters'])
		self._lumi_filter = config.getLookup('lumi filter', {}, parser = parseLumiFilter, strfun = strLumi, onChange = self._changeTrigger)
		if not self._lumi_filter.empty():
			config.set('dataset processor', 'LumiDataProcessor', '+=')
		DataProvider.__init__(self, config, datasetExpr, datasetNick)
		# LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well
		self._lumi_query = config.getBool('lumi metadata', not self._lumi_filter.empty(), onChange = self._changeTrigger)
		config.set('phedex sites matcher mode', 'shell', '?=')
		# PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont!
		self._phedexFilter = config.getFilter('phedex sites', '-* T1_*_Disk T2_* T3_*',
			defaultMatcher = 'blackwhite', defaultFilter = 'strict', onChange = self._changeTrigger)
		self._onlyComplete = config.getBool('only complete sites', True, onChange = self._changeTrigger)
		self._locationFormat = config.getEnum('location format', CMSLocationFormat, CMSLocationFormat.hostname, onChange = self._changeTrigger)
		self._pjrc = JSONRestClient(url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/blockreplicas')
		self._sitedb = SiteDB()

		(self._datasetPath, self._datasetInstance, self._datasetBlock) = optSplit(datasetExpr, '@#')
		instance_default = config.get('dbs instance', '', onChange = self._changeTrigger)
		self._datasetInstance = self._datasetInstance or instance_default
		if not self._datasetInstance:
			self._datasetInstance = 'prod/global'
		elif '/' not in self._datasetInstance:
			self._datasetInstance = 'prod/%s' % self._datasetInstance
		self._datasetBlock = self._datasetBlock or 'all'
		self.onlyValid = config.getBool('only valid', True, onChange = self._changeTrigger)
开发者ID:Fra-nk,项目名称:grid-control,代码行数:26,代码来源:provider_cms.py

示例5: discover_blocks

def discover_blocks(options):
	# Get work directory, create dbs dump directory
	if os.path.isdir(options.args[0]):
		work_dn = os.path.abspath(os.path.normpath(options.args[0]))
	else:
		work_dn = gc_create_config(config_file=options.args[0]).get_work_path()
	if not options.opts.tempdir:
		options.opts.tempdir = os.path.join(work_dn, 'dbs')
	if not os.path.exists(options.opts.tempdir):
		os.mkdir(options.opts.tempdir)

	# get provider with dataset information
	config = gc_create_config(config_dict={'dataset': options.config_dict}, load_old_config=False)
	if options.opts.input_file:
		provider = DataProvider.create_instance('ListProvider',
			config, 'dataset', options.opts.input_file)
	else:
		provider = DataProvider.create_instance('DBSInfoProvider',
			config, 'dataset', options.args[0])

	blocks = provider.get_block_list_cached(show_stats=False)
	DataProvider.save_to_file(os.path.join(options.opts.tempdir, 'dbs.dat'), blocks)
	if options.opts.discovery:
		sys.exit(os.EX_OK)
	return blocks
开发者ID:grid-control,项目名称:grid-control,代码行数:25,代码来源:dataset_dbs3_add.py

示例6: save_dataset

def save_dataset(opts, provider):
	print('')
	blocks = provider.getBlocks()
	if opts.ordered:
		sort_inplace(blocks, key = itemgetter(DataProvider.Dataset, DataProvider.BlockName))
		for b in blocks:
			sort_inplace(b[DataProvider.FileList], key = itemgetter(DataProvider.URL))
	DataProvider.saveToFile(opts.save, blocks)
	print('Dataset information saved to ./%s' % opts.save)
开发者ID:artus-analysis,项目名称:grid-control,代码行数:9,代码来源:datasetInfo.py

示例7: _check_lumi_filter

	def _check_lumi_filter(self, block, idx_runs, idx_lumi):
		lumi_filter = self._lumi_filter.lookup(block[DataProvider.Nickname], is_selector=False)
		if not lumi_filter:
			return
		if (self._lumi_strict == LumiMode.strict) and ((idx_runs is None) or (idx_lumi is None)):
			raise DatasetError('Strict lumi filter active but ' +
				'dataset %s does not provide lumi information!' % DataProvider.get_block_id(block))
		elif (self._lumi_strict == LumiMode.weak) and (idx_runs is None):
			raise DatasetError('Weak lumi filter active but ' +
				'dataset %s does not provide run information!' % DataProvider.get_block_id(block))
开发者ID:grid-control,项目名称:grid-control,代码行数:10,代码来源:lumi_proc.py

示例8: __init__

	def __init__(self, dataDir, srcName, dataProvider, dataSplitter, dataProc, keepOld = True):
		ParameterSource.__init__(self)
		(self._dataDir, self._srcName, self._dataProvider, self._dataSplitter, self._part_proc) = \
			(dataDir, srcName, dataProvider, dataSplitter, dataProc)

		if not dataProvider:
			pass # debug mode - used by scripts - disables resync
		elif os.path.exists(self.getDataPath('cache.dat') and self.getDataPath('map.tar')):
			self._dataSplitter.importPartitions(self.getDataPath('map.tar'))
		else:
			DataProvider.saveToFile(self.getDataPath('cache.dat'), self._dataProvider.getBlocks(silent = False))
			self._dataSplitter.splitDataset(self.getDataPath('map.tar'), self._dataProvider.getBlocks())

		self._maxN = self._dataSplitter.getMaxJobs()
		self._keepOld = keepOld
开发者ID:thomas-mueller,项目名称:grid-control,代码行数:15,代码来源:psource_data.py

示例9: create_dbs3_proto_blocks

def create_dbs3_proto_blocks(opts, dataset_blocks):
	for dataset in dataset_blocks:
		missing_info_blocks = []
		dataset_types = set()
		for block in dataset_blocks[dataset]:
			block_dump = {'dataset_conf_list': [], 'files': [], 'file_conf_list': [], 'file_parent_list': []}
			(block_size, block_dataset_types) = create_dbs3_json_files(opts, block, block_dump)
			if len(block_dataset_types) > 1:
				raise Exception('Data and MC files are mixed in block %s' % DataProvider.bName(block))
			elif len(block_dataset_types) == 1:
				yield (block, block_dump, block_size, block_dataset_types.pop())
			else:
				missing_info_blocks.append((block, block_dump, block_size))
			dataset_types.update(block_dataset_types) # collect dataset types in this dataset for blocks with missing type information

		if missing_info_blocks:
			if len(dataset_types) > 1:
				raise Exception('Data and MC files are mixed in dataset %s! Unable to determine dataset type for blocks without type info')
			elif len(dataset_types) == 0:
				if not opts.datatype:
					raise Exception('Please supply dataset type via --datatype!')
				dataset_type = opts.datatype
			else:
				dataset_type = dataset_types.pop()
			for (block, block_dump, block_size) in missing_info_blocks:
				yield (block, block_dump, block_size, dataset_type)
开发者ID:Fra-nk,项目名称:grid-control,代码行数:26,代码来源:datasetDBS3Add.py

示例10: _resync_psrc

	def _resync_psrc(self):
		activity = Activity('Performing resync of datasource %r' % self.get_datasource_name())
		# Get old and new dataset information
		provider_old = DataProvider.load_from_file(self._get_data_path('cache.dat'))
		block_list_old = provider_old.get_block_list_cached(show_stats=False)
		self._provider.clear_cache()
		block_list_new = self._provider.get_block_list_cached(show_stats=False)
		self._provider.save_to_file(self._get_data_path('cache-new.dat'), block_list_new)

		# Use old splitting information to synchronize with new dataset infos
		partition_len_old = self.get_parameter_len()
		partition_changes = self._resync_partitions(
			self._get_data_path('map-new.tar'), block_list_old, block_list_new)
		activity.finish()
		if partition_changes is not None:
			# Move current splitting to backup and use the new splitting from now on
			def _rename_with_backup(new, cur, old):
				if self._keep_old:
					os.rename(self._get_data_path(cur), self._get_data_path(old))
				os.rename(self._get_data_path(new), self._get_data_path(cur))
			_rename_with_backup('map-new.tar', 'map.tar', 'map-old-%d.tar' % time.time())
			_rename_with_backup('cache-new.dat', 'cache.dat', 'cache-old-%d.dat' % time.time())
			self._set_reader(DataSplitter.load_partitions(self._get_data_path('map.tar')))
			self._log.debug('Dataset resync finished: %d -> %d partitions', partition_len_old, self._len)
			(pnum_list_redo, pnum_list_disable) = partition_changes
			return (set(pnum_list_redo), set(pnum_list_disable), partition_len_old != self._len)
开发者ID:grid-control,项目名称:grid-control,代码行数:26,代码来源:psource_data.py

示例11: setupJobParameters

	def setupJobParameters(self, config, pm):
		config = config.addSections(['dataset']).addTags([self])
		self.dataSplitter = None
		self.dataRefresh = None
		self.dataset = config.get('dataset', '').strip()
		if self.dataset == '':
			return
		config.set('se output pattern', '@[email protected][email protected][email protected][email protected]@', override = False)
		config.set('default lookup', 'DATASETNICK', override = False)

		defaultProvider = config.get('dataset provider', 'ListProvider')
		dataProvider = DataProvider.create(config, self.dataset, defaultProvider)
		splitterName = config.get('dataset splitter', 'FileBoundarySplitter')
		splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName))
		self.dataSplitter = splitterClass(config)
		self.checkSE = config.getBool('dataset storage check', True, onChange = None)

		# Create and register dataset parameter plugin
		paramSource = DataParameterSource(config.getWorkPath(), 'data',
			dataProvider, self.dataSplitter, self.initDataProcessor())
		DataParameterSource.datasetsAvailable['data'] = paramSource

		# Select dataset refresh rate
		self.dataRefresh = config.getTime('dataset refresh', -1, onChange = None)
		if self.dataRefresh > 0:
			paramSource.resyncSetup(interval = max(self.dataRefresh, dataProvider.queryLimit()))
			utils.vprint('Dataset source will be queried every %s' % utils.strTime(self.dataRefresh), -1)
		else:
			paramSource.resyncSetup(interval = 0)
		def externalRefresh(sig, frame):
			paramSource.resyncSetup(force = True)
		signal.signal(signal.SIGUSR2, externalRefresh)

		if self.dataSplitter.getMaxJobs() == 0:
			raise UserError('There are no events to process')
开发者ID:mortenpi,项目名称:grid-control,代码行数:35,代码来源:task_data.py

示例12: _resync

	def _resync(self):
		if self._data_provider:
			activity = Activity('Performing resync of datasource %r' % self._name)
			# Get old and new dataset information
			ds_old = DataProvider.loadFromFile(self._getDataPath('cache.dat')).getBlocks(show_stats = False)
			self._data_provider.clearCache()
			ds_new = self._data_provider.getBlocks(show_stats = False)
			self._data_provider.saveToFile(self._getDataPath('cache-new.dat'), ds_new)

			# Use old splitting information to synchronize with new dataset infos
			old_maxN = self._data_splitter.getMaxJobs()
			jobChanges = self._data_splitter.resyncMapping(self._getDataPath('map-new.tar'), ds_old, ds_new)
			activity.finish()
			if jobChanges is not None:
				# Move current splitting to backup and use the new splitting from now on
				def backupRename(old, cur, new):
					if self._keepOld:
						os.rename(self._getDataPath(cur), self._getDataPath(old))
					os.rename(self._getDataPath(new), self._getDataPath(cur))
				backupRename(  'map-old-%d.tar' % time.time(),   'map.tar',   'map-new.tar')
				backupRename('cache-old-%d.dat' % time.time(), 'cache.dat', 'cache-new.dat')
				self._data_splitter.importPartitions(self._getDataPath('map.tar'))
				self._maxN = self._data_splitter.getMaxJobs()
				self._log.debug('Dataset resync finished: %d -> %d partitions', old_maxN, self._maxN)
				return (set(jobChanges[0]), set(jobChanges[1]), old_maxN != self._maxN)
开发者ID:Fra-nk,项目名称:grid-control,代码行数:25,代码来源:psource_data.py

示例13: resync

	def resync(self):
		(result_redo, result_disable, result_sizeChange) = ParameterSource.resync(self)
		if self.resyncEnabled() and self._dataProvider:
			# Get old and new dataset information
			old = DataProvider.loadFromFile(self.getDataPath('cache.dat')).getBlocks()
			self._dataProvider.clearCache()
			new = self._dataProvider.getBlocks()
			self._dataProvider.saveToFile(self.getDataPath('cache-new.dat'), new)

			# Use old splitting information to synchronize with new dataset infos
			jobChanges = self._dataSplitter.resyncMapping(self.getDataPath('map-new.tar'), old, new)
			if jobChanges:
				# Move current splitting to backup and use the new splitting from now on
				def backupRename(old, cur, new):
					if self._keepOld:
						os.rename(self.getDataPath(cur), self.getDataPath(old))
					os.rename(self.getDataPath(new), self.getDataPath(cur))
				backupRename(  'map-old-%d.tar' % time.time(),   'map.tar',   'map-new.tar')
				backupRename('cache-old-%d.dat' % time.time(), 'cache.dat', 'cache-new.dat')
				old_maxN = self._dataSplitter.getMaxJobs()
				self._dataSplitter.importPartitions(self.getDataPath('map.tar'))
				self._maxN = self._dataSplitter.getMaxJobs()
				result_redo.update(jobChanges[0])
				result_disable.update(jobChanges[1])
				result_sizeChange = result_sizeChange or (old_maxN != self._maxN)
			self.resyncFinished()
		return (result_redo, result_disable, result_sizeChange)
开发者ID:thomas-mueller,项目名称:grid-control,代码行数:27,代码来源:psource_data.py

示例14: _read_plfnp_map

	def _read_plfnp_map(self, config, parent_dataset_expr):
		if parent_dataset_expr and (parent_dataset_expr not in self._plfnp2pdn_cache):
			# read parent source and fill lfnMap with parent_lfn_parts -> parent dataset name mapping
			map_plfnp2pdn = self._plfnp2pdn_cache.setdefault(parent_dataset_expr, {})
			for block in DataProvider.iter_blocks_from_expr(self._empty_config, parent_dataset_expr):
				for fi in block[DataProvider.FileList]:
					map_plfnp2pdn[self._get_lfnp(fi[DataProvider.URL])] = block[DataProvider.Dataset]
		return self._plfnp2pdn_cache.get(parent_dataset_expr, {})  # return cached mapping
开发者ID:grid-control,项目名称:grid-control,代码行数:8,代码来源:scanner_basic.py

示例15: __init__

    def __init__(self, config, name):
        head = [(0, "Nickname")]

        # Mapping between nickname and config files:
        cfgList = config.get("nickname config", "")
        self.nmCfg = config.getDict(
            "nickname config", {}, parser=lambda x: map(str.strip, x.split(",")), str=lambda x: str.join(",", x)
        )[0]
        if cfgList:
            if "config file" in config.getOptions():
                raise ConfigError("Please use 'nickname config' instead of 'config file'")
            allConfigFiles = utils.flatten(self.nmCfg.values())
            config.set("config file", str.join("\n", allConfigFiles))
            head.append((1, "Config file"))

            # Mapping between nickname and constants:
        self.nmCName = map(str.strip, config.get("nickname constants", "").split())
        self.nmConst = {}
        for var in self.nmCName:
            tmp = config.getDict(var, {})[0]
            for (nick, value) in tmp.items():
                if value:
                    self.nmConst.setdefault(nick, {})[var] = value
                else:
                    self.nmConst.setdefault(nick, {})[var] = ""
            head.append((var, var))

            # Mapping between nickname and lumi filter:
        if "lumi filter" in config.getOptions():
            raise ConfigError("Please use 'nickname lumi filter' instead of 'lumi filter'")
        lumiParse = lambda x: formatLumi(parseLumiFilter(x))
        self.nmLumi = config.getDict("nickname lumi filter", {}, parser=lumiParse)[0]
        if self.nmLumi:
            for dataset in config.get("dataset", "").splitlines():
                (datasetNick, datasetProvider, datasetExpr) = DataProvider.parseDatasetExpr(config, dataset, None)
                config.set(
                    "dataset %s" % datasetNick,
                    "lumi filter",
                    str.join(",", utils.flatten(fromNM(self.nmLumi, datasetNick, []))),
                )
            config.set("lumi filter", str.join(",", self.nmLumi.get(None, [])))
            head.append((2, "Lumi filter"))

        utils.vprint("Mapping between nickname and other settings:\n", -1)

        def report():
            for nick in sorted(set(self.nmCfg.keys() + self.nmConst.keys() + self.nmLumi.keys())):
                tmp = {
                    0: nick,
                    1: str.join(", ", map(os.path.basename, self.nmCfg.get(nick, ""))),
                    2: self.displayLumi(self.nmLumi.get(nick, "")),
                }
                yield utils.mergeDicts([tmp, self.nmConst.get(nick, {})])

        utils.printTabular(head, report(), "cl")
        utils.vprint(level=-1)
        CMSSW.__init__(self, config, name)
开发者ID:gitter-badger,项目名称:grid-control,代码行数:57,代码来源:cmssw_advanced.py


注:本文中的grid_control.datasets.DataProvider类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。