本文整理汇总了Python中pyon.util.containers.get_safe函数的典型用法代码示例。如果您正苦于以下问题:Python get_safe函数的具体用法?Python get_safe怎么用?Python get_safe使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了get_safe函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _acquire_data
def _acquire_data(cls, config, unlock_new_data_callback):
"""
Ensures required keys (such as stream_id) are available from config, configures the publisher and then calls:
BaseDataHandler._new_data_constraints (only if config does not contain 'constraints')
BaseDataHandler._publish_data passing BaseDataHandler._get_data as a parameter
@param config Dict containing configuration parameters, may include constraints, formatters, etc
@param unlock_new_data_callback BaseDataHandler callback function to allow conditional unlocking of the BaseDataHandler._semaphore
"""
stream_id = get_safe(config, 'stream_id')
if not stream_id:
raise ConfigurationError('Configuration does not contain required \'stream_id\' key')
#TODO: Configure the publisher
publisher=None
constraints = get_safe(config,'constraints')
if not constraints:
gevent.getcurrent().link(unlock_new_data_callback)
constraints = cls._new_data_constraints(config)
config['constraints']=constraints
cls._publish_data(publisher, config, cls._get_data(config))
# Publish a 'TestFinished' event
if get_safe(config,'TESTING'):
log.debug('Publish TestingFinished event')
pub = EventPublisher('DeviceCommonLifecycleEvent')
pub.publish_event(origin='BaseDataHandler._acquire_data', description='TestingFinished')
示例2: _get_data
def _get_data(cls, config):
parser = get_safe(config, 'parser', None)
ext_dset_res = get_safe(config, 'external_dataset_res', None)
if ext_dset_res and parser:
#CBM: Not in use yet...
# t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension']
# x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension']
# y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension']
# z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension']
# var_lst = ext_dset_res.dataset_description.parameters['variables']
max_rec = get_safe(config, 'max_records', 1)
dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer')
tx_yml = get_safe(config, 'taxonomy')
ttool = TaxyTool.load(tx_yml) #CBM: Assertion inside RDT.__setitem__ requires same instance of TaxyTool
cnt = cls._calc_iter_cnt(len(parser.sensor_map), max_rec)
for x in xrange(cnt):
rdt = RecordDictionaryTool(taxonomy=ttool)
for name in parser.sensor_map:
d = parser.data_map[name][x*max_rec:(x+1)*max_rec]
rdt[name]=d
g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt)
yield g
else:
log.warn('No parser object found in config')
示例3: _make_management_call
def _make_management_call(self, url, method="get", data=None):
"""
Makes a call to the Rabbit HTTP management API using the passed in HTTP method.
"""
log.debug("Calling rabbit API management (%s): %s", method, url)
meth = getattr(requests, method)
try:
mgmt_cfg_key = CFG.get_safe("container.messaging.management.server", "rabbit_manage")
mgmt_cfg = CFG.get_safe("server." + mgmt_cfg_key)
username = get_safe(mgmt_cfg, "username") or "guest"
password = get_safe(mgmt_cfg, "password") or "guest"
with gevent.timeout.Timeout(10):
r = meth(url, auth=(username, password), data=data)
r.raise_for_status()
if not r.content == "":
content = json.loads(r.content)
else:
content = None
except gevent.timeout.Timeout as ex:
raise Timeout(str(ex))
except requests.exceptions.Timeout as ex:
raise Timeout(str(ex))
except (requests.exceptions.ConnectionError, socket.error) as ex:
raise ServiceUnavailable(str(ex))
except requests.exceptions.RequestException as ex:
# the generic base exception all requests' exceptions inherit from, raise our
# general server error too.
raise ServerError(str(ex))
return content
示例4: set_configuration
def set_configuration(self, config):
log.warn("DRIVER: set_configuration")
"""
expect configuration to have:
- parser module/class
- directory, wildcard to find data files
- optional timestamp of last granule
- optional poll rate
- publish info
"""
log.error("Log level: %s", log.getEffectiveLevel())
log.debug('using configuration: %s', config)
self.config = config
self.max_records = get_safe(config, 'max_records', 100)
self.stream_config = self.CFG.get('stream_config', {})
if len(self.stream_config) == 1:
stream_cfg = self.stream_config.values()[0]
elif len(self.stream_config) > 1:
stream_cfg = self.stream_config.values()[0]
stream_id = stream_cfg['stream_id']
stream_route = IonObject(OT.StreamRoute, routing_key=stream_cfg['routing_key'], exchange_point=stream_cfg['exchange_point'])
param_dict = stream_cfg['stream_def_dict']['parameter_dictionary']
self.publisher = StandaloneStreamPublisher(stream_id=stream_id, stream_route=stream_route)
self.parameter_dictionary = ParameterDictionary.load(param_dict)
self.time_field = self.parameter_dictionary.get_temporal_context()
self.latest_granule_time = get_safe(config, 'last_time', 0)
示例5: _constraints_for_new_request
def _constraints_for_new_request(cls, config):
old_list = get_safe(config, 'new_data_check') or []
# CBM: Fix this when the DotList crap is sorted out
old_list = list(old_list) # NOTE that the internal tuples are also DotList objects
ret = {}
base_url = get_safe(config, 'ds_params.base_url')
list_pattern = get_safe(config, 'ds_params.list_pattern')
date_pattern = get_safe(config, 'ds_params.date_pattern')
date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern')
curr_list = list_file_info(base_url, list_pattern)
# Determine which files are new
new_list = [x for x in curr_list if x not in old_list]
if len(new_list) is 0:
raise NoNewDataWarning()
# The curr_list is the new new_data_check - used for the next "new data" evaluation
config['set_new_data_check'] = curr_list
# The new_list is the set of new files - these will be processed
ret['new_files'] = new_list
ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern)
ret['end_time'] = get_time_from_filename(new_list[len(new_list) - 1][0], date_extraction_pattern, date_pattern)
ret['bounding_box'] = {}
ret['vars'] = []
return ret
示例6: _acquire_data
def _acquire_data(cls, config, publisher, unlock_new_data_callback):
"""
Ensures required keys (such as stream_id) are available from config, configures the publisher and then calls:
BaseDataHandler._new_data_constraints (only if config does not contain 'constraints')
BaseDataHandler._publish_data passing BaseDataHandler._get_data as a parameter
@param config Dict containing configuration parameters, may include constraints, formatters, etc
@param unlock_new_data_callback BaseDataHandler callback function to allow conditional unlocking of the BaseDataHandler._semaphore
"""
log.debug('start _acquire_data: config={0}'.format(config))
cls._init_acquisition_cycle(config)
constraints = get_safe(config,'constraints')
if not constraints:
gevent.getcurrent().link(unlock_new_data_callback)
constraints = cls._new_data_constraints(config)
if constraints is None:
raise InstrumentParameterException("Data constraints returned from _new_data_constraints cannot be None")
config['constraints'] = constraints
cls._publish_data(publisher, cls._get_data(config))
# Publish a 'TestFinished' event
if get_safe(config,'TESTING'):
log.debug('Publish TestingFinished event')
pub = EventPublisher('DeviceCommonLifecycleEvent')
pub.publish_event(origin='BaseDataHandler._acquire_data', description='TestingFinished')
示例7: _get_data
def _get_data(cls, config):
new_flst = get_safe(config, 'constraints.new_files', [])
hdr_cnt = get_safe(config, 'header_count', SlocumParser.DEFAULT_HEADER_SIZE)
for f in new_flst:
try:
parser = SlocumParser(f[0], hdr_cnt)
#CBM: Not in use yet...
# ext_dset_res = get_safe(config, 'external_dataset_res', None)
# t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension']
# x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension']
# y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension']
# z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension']
# var_lst = ext_dset_res.dataset_description.parameters['variables']
max_rec = get_safe(config, 'max_records', 1)
dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer')
#tx_yml = get_safe(config, 'taxonomy')
#ttool = TaxyTool.load(tx_yml) #CBM: Assertion inside RDT.__setitem__ requires same instance of TaxyTool
pdict = ParameterDictionary.load(get_safe(config, 'param_dictionary'))
cnt = calculate_iteration_count(len(parser.sensor_map), max_rec)
for x in xrange(cnt):
#rdt = RecordDictionaryTool(taxonomy=ttool)
rdt = RecordDictionaryTool(param_dictionary=pdict)
for name in parser.sensor_map:
d = parser.data_map[name][x*max_rec:(x+1)*max_rec]
rdt[name]=d
#g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt)
g = build_granule(data_producer_id=dprod_id, record_dictionary=rdt, param_dictionary=pdict)
yield g
except SlocumParseException as spe:
# TODO: Decide what to do here, raise an exception or carry on
log.error('Error parsing data file: \'{0}\''.format(f))
示例8: _constraints_for_new_request
def _constraints_for_new_request(cls, config):
old_list = get_safe(config, 'new_data_check') or []
# CBM: Fix this when the DotList crap is sorted out
old_list = list(old_list) # NOTE that the internal tuples are also DotList objects
ret = {}
base_url = get_safe(config, 'ds_params.base_url')
list_pattern = get_safe(config, 'ds_params.list_pattern')
date_pattern = get_safe(config, 'ds_params.date_pattern')
date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern')
curr_list = list_file_info(base_url, list_pattern)
#compare the last read files (old_list) with the current directory contents (curr_list)
#if the file names are the same (curr_file[0] and old_file[0]) check the size of the
#current file (curr_file[2]) with the file position when the last file was read (old_file[3])
#if there's more data now that was read last time, add the file to the list
new_list = []
for curr_file in curr_list:
found = False
for old_file in old_list:
if curr_file[0] == old_file[0]: #if filenames are the same, that means the file is still in the directory, and was previously read
found = True
if curr_file[2] > old_file[3]: #f2[2] is the current file size, f2[3] is the last read file size
new_list.append((curr_file[0], curr_file[1], curr_file[2], old_file[-1])) #add it in if the current file size is bigger than the last time
if not found:
new_list.append(curr_file)
config['set_new_data_check'] = curr_list
ret['new_files'] = new_list
ret['bounding_box'] = {}
ret['vars'] = []
return ret
示例9: execute
def execute(self, granule):
"""Processes incoming data!!!!
"""
rdt = RecordDictionaryTool.load_from_granule(granule)
#todo: use only flat dicts for now, may change later...
# rdt0 = rdt['coordinates']
# rdt1 = rdt['data']
pressure = get_safe(rdt, 'pres') #psd.get_values('conductivity')
longitude = get_safe(rdt, 'lon') # psd.get_values('longitude')
latitude = get_safe(rdt, 'lat') #psd.get_values('latitude')
time = get_safe(rdt, 'time') # psd.get_values('time')
height = get_safe(rdt, 'height') # psd.get_values('time')
log.warn('Got pressure: %s' % str(pressure))
# L1
# 1) The algorithm input is the L0 pressure data product (p_hex) and, in the case of the SBE 37IM, the pressure range (P_rng) from metadata.
# 2) Convert the hexadecimal string to a decimal string
# 3) For the SBE 37IM only, convert the pressure range (P_rng) from psia to dbar SBE 37IM
# Convert P_rng (input from metadata) from psia to dbar
# 4) Perform scaling operation
# SBE 37IM
# L1 pressure data product (in dbar):
# Use the constructor to put data into a granule
psc = PointSupplementConstructor(point_definition=self.outgoing_stream_def, stream_id=self.streams['output'])
### Assumes the config argument for output streams is known and there is only one 'output'.
### the stream id is part of the metadata which much go in each stream granule - this is awkward to do at the
### application level like this!
scaled_pressure = pressure
for i in xrange(len(pressure)):
#todo: get pressure range from metadata (if present) and include in calc
scaled_pressure[i] = ( pressure[i])
root_rdt = RecordDictionaryTool(taxonomy=self.tx)
#todo: use only flat dicts for now, may change later...
# data_rdt = RecordDictionaryTool(taxonomy=self.tx)
# coord_rdt = RecordDictionaryTool(taxonomy=self.tx)
root_rdt['pres'] = scaled_pressure
root_rdt['time'] = time
root_rdt['lat'] = latitude
root_rdt['lon'] = longitude
root_rdt['height'] = height
# root_rdt['coordinates'] = coord_rdt
# root_rdt['data'] = data_rdt
return build_granule(data_producer_id='ctd_L1_pressure', taxonomy=self.tx, record_dictionary=root_rdt)
return psc.close_stream_granule()
示例10: get_datastore
def get_datastore(self, ds_name, profile=DataStore.DS_PROFILE.BASIC, config=None):
"""
Factory method to get a datastore instance from given name, profile and config.
This is the central point to cache these instances, to decide persistent or mock
and to force clean the store on first use.
@param ds_name Logical name of datastore (will be scoped with sysname)
@param profile One of known constants determining the use of the store
@param config Override config to use
"""
assert ds_name, "Must provide ds_name"
if ds_name in self._datastores:
log.debug("get_datastore(): Found instance of store '%s'" % ds_name)
return self._datastores[ds_name]
scoped_name = ("%s_%s" % (get_sys_name(), ds_name)).lower()
# Imports here to prevent cyclic module dependency
from pyon.core.bootstrap import CFG
config = config or CFG
persistent = not bool(get_safe(config, "system.mockdb"))
force_clean = bool(get_safe(config, "system.force_clean"))
log.info(
"get_datastore(): Create instance of store '%s' {persistent=%s, force_clean=%s, scoped_name=%s}"
% (ds_name, persistent, force_clean, scoped_name)
)
# Persistent (CouchDB) or MockDB?
if persistent:
# Use inline import to prevent circular import dependency
from pyon.datastore.couchdb.couchdb_datastore import CouchDB_DataStore
new_ds = CouchDB_DataStore(datastore_name=scoped_name, profile=profile)
else:
# Use inline import to prevent circular import dependency
from pyon.datastore.mockdb.mockdb_datastore import MockDB_DataStore
new_ds = MockDB_DataStore(datastore_name=scoped_name) # , profile=profile)
# Clean the store instance
if force_clean:
try:
new_ds.delete_datastore(scoped_name)
except NotFound as nf:
pass
# Create store if not existing
if not new_ds.datastore_exists(scoped_name):
new_ds.create_datastore(scoped_name)
# Set a few standard datastore instance fields
new_ds.local_name = ds_name
new_ds.ds_profile = profile
self._datastores[ds_name] = new_ds
return new_ds
示例11: execute
def execute(input=None, context=None, config=None, params=None, state=None, fileName = None):
stream_definition_id = params
mpl_allowed_numerical_types = ['int32', 'int64', 'uint32', 'uint64', 'float32', 'float64']
if stream_definition_id == None:
log.error("Matplotlib transform: Need a output stream definition to process graphs")
return None
# parse the incoming data
rdt = RecordDictionaryTool.load_from_granule(input)
# build a list of fields/variables that need to be plotted. Use the list provided by the UI
# since the retrieved granule might have extra fields.
fields = rdt.fields
resolution = "640x480"
if config:
if 'parameters' in config:
fields = config['parameters']
if 'resolution' in config:
resolution = config['resolution']
vardict = {}
vardict['time'] = get_safe(rdt, 'time')
if vardict['time'] == None:
print "Matplotlib transform: Did not receive a time field to work with"
log.error("Matplotlib transform: Did not receive a time field to work with")
return None
for field in fields:
if field == 'time':
continue
# only consider fields which are supposed to be numbers.
if (rdt[field] != None) and (rdt[field].dtype not in mpl_allowed_numerical_types):
continue
vardict[field] = get_safe(rdt, field)
print
arrLen = len(vardict['time'])
# init the graph_data structure for storing values
graph_data = {}
for varname in vardict.keys():
graph_data[varname] = []
# If code reached here, the graph data storage has been initialized. Just add values
# to the list
for varname in vardict.keys(): # psd.list_field_names():
if vardict[varname] == None:
# create an array of zeros to compensate for missing values
graph_data[varname].extend([0.0]*arrLen)
else:
graph_data[varname].extend(vardict[varname])
out_granule = VizTransformMatplotlibGraphsAlgorithm.render_graphs(graph_data, stream_definition_id, fileName, resolution=resolution)
return out_granule
示例12: execute_acquire_data
def execute_acquire_data(self, *args):
"""
Creates a copy of self._dh_config, creates a publisher, and spawns a greenlet to perform a data acquisition cycle
If the args[0] is a dict, any entries keyed with one of the 'PATCHABLE_CONFIG_KEYS' are used to patch the config
Greenlet binds to BaseDataHandler._acquire_data and passes the publisher and config
Disallows multiple "new data" (unconstrained) requests using BaseDataHandler._semaphore lock
Called from:
InstrumentAgent._handler_observatory_execute_resource
|--> ExternalDataAgent._handler_streaming_execute_resource
@parameter args First argument can be a config dictionary
"""
log.debug('Executing acquire_data: args = {0}'.format(args))
# Make a copy of the config to ensure no cross-pollution
config = self._dh_config.copy()
# Patch the config if mods are passed in
try:
config_mods = args[0]
if not isinstance(config_mods, dict):
raise IndexError()
log.debug('Configuration modifications provided: {0}'.format(config_mods))
for k in self._params['PATCHABLE_CONFIG_KEYS']:
p=get_safe(config_mods, k)
if not p is None:
config[k] = p
except IndexError:
log.info('No configuration modifications were provided')
# Verify that there is a stream_id member in the config
stream_id = get_safe(config, 'stream_id')
if not stream_id:
raise ConfigurationError('Configuration does not contain required \'stream_id\' member')
isNew = get_safe(config, 'constraints') is None
if isNew and not self._semaphore.acquire(blocking=False):
log.warn('Already acquiring new data - action not duplicated')
return
ndc = None
if isNew:
# Get the NewDataCheck attachment and add it's content to the config
ext_ds_id = get_safe(config,'external_dataset_res_id')
if ext_ds_id:
ndc = self._find_new_data_check_attachment(ext_ds_id)
config['new_data_check'] = ndc
# Create a publisher to pass into the greenlet
publisher = self._stream_registrar.create_publisher(stream_id=stream_id)
# Spawn a greenlet to do the data acquisition and publishing
g = spawn(self._acquire_data, config, publisher, self._unlock_new_data_callback, self._update_new_data_check_attachment)
log.debug('** Spawned {0}'.format(g))
self._glet_queue.append(g)
示例13: _get_data
def _get_data(cls, config):
"""
Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len']
@param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count']
"""
ext_dset_res = get_safe(config, 'external_dataset_res', None)
# Get the Dataset object from the config (should have been instantiated in _init_acquisition_cycle)
ds = get_safe(config, 'dataset_object')
if ext_dset_res and ds:
t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension']
x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension']
y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension']
z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension']
var_lst = ext_dset_res.dataset_description.parameters['variables']
t_slice = get_safe(config, 'constraints.temporal_slice', (slice(0, 1)))
#TODO: Using 'eval' here is BAD - need to find a less sketchy way to pass constraints
if isinstance(t_slice, str):
t_slice = eval(t_slice)
lon = ds.variables[x_vname][:]
lat = ds.variables[y_vname][:]
z = ds.variables[z_vname][:]
t_arr = ds.variables[t_vname][t_slice]
data_arrays = {}
for varn in var_lst:
data_arrays[varn] = ds.variables[varn][t_slice]
max_rec = get_safe(config, 'max_records', 1)
#dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer')
stream_def = get_safe(config, 'stream_def')
cnt = calculate_iteration_count(t_arr.size, max_rec)
for x in xrange(cnt):
ta = t_arr[x * max_rec:(x + 1) * max_rec]
# Make a 'master' RecDict
rdt = RecordDictionaryTool(stream_definition_id=stream_def)
# Assign coordinate values to the RecDict
rdt[x_vname] = lon
rdt[y_vname] = lat
rdt[z_vname] = z
# Assign data values to the RecDict
rdt[t_vname] = ta
for key, arr in data_arrays.iteritems():
d = arr[x * max_rec:(x + 1) * max_rec]
rdt[key] = d
g = rdt.to_granule()
yield g
ds.close()
示例14: get_visualization_image
def get_visualization_image(self, data_product_id='', visualization_parameters=None, callback=''):
# Error check
if not data_product_id:
raise BadRequest("The data_product_id parameter is missing")
if visualization_parameters == {}:
visualization_parameters = None
# Extract the retrieval related parameters. Definitely init all parameters first
query = None
if visualization_parameters :
query = {'parameters':[]}
# Error check and damage control. Definitely need time
if 'parameters' in visualization_parameters:
if not 'time' in visualization_parameters['parameters']:
visualization_parameters['parameters'].append('time')
query['parameters'] = visualization_parameters['parameters']
if 'stride_time' in visualization_parameters:
query['stride_time'] = visualization_parameters['stride_time']
if 'start_time' in visualization_parameters:
query['start_time'] = visualization_parameters['start_time']
if 'end_time' in visualization_parameters:
query['end_time'] = visualization_parameters['end_time']
# get the dataset_id associated with the data_product. Need it to do the data retrieval
ds_ids,_ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.DataSet, True)
if ds_ids is None or not ds_ids:
return None
# Ideally just need the latest granule to figure out the list of images
#replay_granule = self.clients.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2})
retrieved_granule = self.clients.data_retriever.retrieve(ds_ids[0], query=query)
if retrieved_granule is None:
return None
# send the granule through the transform to get the matplotlib graphs
mpl_pdict_id = self.clients.dataset_management.read_parameter_dictionary_by_name('graph_image_param_dict',id_only=True)
mpl_stream_def = self.clients.pubsub_management.create_stream_definition('mpl', parameter_dictionary_id=mpl_pdict_id)
mpl_data_granule = VizTransformMatplotlibGraphsAlgorithm.execute(retrieved_granule, config=visualization_parameters, params=mpl_stream_def)
if mpl_data_granule == None:
return None
mpl_rdt = RecordDictionaryTool.load_from_granule(mpl_data_granule)
ret_dict = dict()
ret_dict['content_type'] = (get_safe(mpl_rdt, "content_type"))[0]
ret_dict['image_name'] = (get_safe(mpl_rdt, "image_name"))[0]
# reason for encoding as base64 string is otherwise message pack complains about the bit stream
ret_dict['image_obj'] = base64.encodestring((get_safe(mpl_rdt, "image_obj"))[0])
if callback == '':
return ret_dict
else:
return callback + "(" + simplejson.dumps(ret_dict) + ")"
示例15: _get_data
def _get_data(cls, config):
"""
Iterable function that acquires data from a source iteratively based on constraints provided by config
Passed into BaseDataHandler._publish_data and iterated to publish samples.
@param config dict containing configuration parameters, may include constraints, formatters, etc
@retval an iterable that returns well-formed Granule objects on each iteration
"""
new_flst = get_safe(config, "constraints.new_files", [])
parser_mod = get_safe(config, "parser_mod", "")
parser_cls = get_safe(config, "parser_cls", "")
module = __import__(parser_mod, fromlist=[parser_cls])
classobj = getattr(module, parser_cls)
for f in new_flst:
try:
size = os.stat(f[0]).st_size
try:
# find the new data check index in config
index = -1
for ndc in config["set_new_data_check"]:
if ndc[0] == f[0]:
index = config["set_new_data_check"].index(ndc)
break
except:
log.error("File name not found in attachment")
parser = classobj(f[0], f[3])
max_rec = get_safe(config, "max_records", 1)
stream_def = get_safe(config, "stream_def")
while True:
particles = parser.get_records(max_count=max_rec)
if not particles:
break
rdt = RecordDictionaryTool(stream_definition_id=stream_def)
populate_rdt(rdt, particles)
g = rdt.to_granule()
# TODO: record files already read for future additions...
# #update new data check with the latest file position
if "set_new_data_check" in config and index > -1:
# WRONG: should only record this after file finished parsing,
# but may not have another yield at that point to trigger update
config["set_new_data_check"][index] = (f[0], f[1], f[2], size)
yield g
# parser.close()
except Exception as ex:
# TODO: Decide what to do here, raise an exception or carry on
log.error("Error parsing data file '{0}': {1}".format(f, ex))