本文整理汇总了Python中interface.services.dm.iingestion_management_service.IngestionManagementServiceClient.persist_data_stream方法的典型用法代码示例。如果您正苦于以下问题:Python IngestionManagementServiceClient.persist_data_stream方法的具体用法?Python IngestionManagementServiceClient.persist_data_stream怎么用?Python IngestionManagementServiceClient.persist_data_stream使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类interface.services.dm.iingestion_management_service.IngestionManagementServiceClient
的用法示例。
在下文中一共展示了IngestionManagementServiceClient.persist_data_stream方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: TestDMEnd2End
# 需要导入模块: from interface.services.dm.iingestion_management_service import IngestionManagementServiceClient [as 别名]
# 或者: from interface.services.dm.iingestion_management_service.IngestionManagementServiceClient import persist_data_stream [as 别名]
#.........这里部分代码省略.........
dataset_id = self.create_dataset(pdict_id)
# self.get_datastore(dataset_id)
self.i += 1
return stream_id, route, stream_def_id, dataset_id
def publish_hifi(self,stream_id,stream_route,offset=0):
'''
Publish deterministic data
'''
pub = StandaloneStreamPublisher(stream_id, stream_route)
stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
stream_def_id = stream_def._id
rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
rdt['time'] = np.arange(10) + (offset * 10)
rdt['temp'] = np.arange(10) + (offset * 10)
pub.publish(rdt.to_granule())
def publish_fake_data(self,stream_id, route):
'''
Make four granules
'''
for i in xrange(4):
self.publish_hifi(stream_id,route,i)
def start_ingestion(self, stream_id, dataset_id):
'''
Starts ingestion/persistence for a given dataset
'''
ingest_config_id = self.get_ingestion_config()
self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)
def stop_ingestion(self, stream_id):
ingest_config_id = self.get_ingestion_config()
self.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id)
def validate_granule_subscription(self, msg, route, stream_id):
'''
Validation for granule format
'''
if msg == {}:
return
rdt = RecordDictionaryTool.load_from_granule(msg)
log.info('%s', rdt.pretty_print())
self.assertIsInstance(msg,Granule,'Message is improperly formatted. (%s)' % type(msg))
self.event.set()
def wait_until_we_have_enough_granules(self, dataset_id='',data_size=40):
'''
Loops until there is a sufficient amount of data in the dataset
'''
done = False
with gevent.Timeout(40):
while not done:
extents = self.dataset_management.dataset_extents(dataset_id, 'time')
granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1)
rdt = RecordDictionaryTool.load_from_granule(granule)
if rdt['time'] and rdt['time'][0] != rdt._pdict.get_context('time').fill_value and extents >= data_size:
done = True
else:
gevent.sleep(0.2)
示例2: TestDMEnd2End
# 需要导入模块: from interface.services.dm.iingestion_management_service import IngestionManagementServiceClient [as 别名]
# 或者: from interface.services.dm.iingestion_management_service.IngestionManagementServiceClient import persist_data_stream [as 别名]
#.........这里部分代码省略.........
dataset_id = self.create_dataset(pdict_id)
self.get_datastore(dataset_id)
self.i += 1
return stream_id, route, stream_def_id, dataset_id
def publish_hifi(self,stream_id,stream_route,offset=0):
'''
Publish deterministic data
'''
pub = StandaloneStreamPublisher(stream_id, stream_route)
stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
stream_def_id = stream_def._id
rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
rdt['time'] = np.arange(10) + (offset * 10)
rdt['temp'] = np.arange(10) + (offset * 10)
pub.publish(rdt.to_granule())
def publish_fake_data(self,stream_id, route):
'''
Make four granules
'''
for i in xrange(4):
self.publish_hifi(stream_id,route,i)
def start_ingestion(self, stream_id, dataset_id):
'''
Starts ingestion/persistence for a given dataset
'''
ingest_config_id = self.get_ingestion_config()
self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)
def stop_ingestion(self, stream_id):
ingest_config_id = self.get_ingestion_config()
self.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id)
def stop_all_ingestion(self):
try:
[self.stop_ingestion(sid) for sid in self.streams]
except:
pass
def validate_granule_subscription(self, msg, route, stream_id):
'''
Validation for granule format
'''
if msg == {}:
return
rdt = RecordDictionaryTool.load_from_granule(msg)
log.info('%s', rdt.pretty_print())
self.assertIsInstance(msg,Granule,'Message is improperly formatted. (%s)' % type(msg))
self.event.set()
def wait_until_we_have_enough_granules(self, dataset_id='',data_size=40):
'''
Loops until there is a sufficient amount of data in the dataset
'''
done = False
with gevent.Timeout(40):
while not done:
extents = self.dataset_management.dataset_extents(dataset_id, 'time')[0]
granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1)
rdt = RecordDictionaryTool.load_from_granule(granule)
示例3: TestDMEnd2End
# 需要导入模块: from interface.services.dm.iingestion_management_service import IngestionManagementServiceClient [as 别名]
# 或者: from interface.services.dm.iingestion_management_service.IngestionManagementServiceClient import persist_data_stream [as 别名]
#.........这里部分代码省略.........
dataset_id = self.create_dataset(pdict_id)
self.get_datastore(dataset_id)
self.i += 1
return stream_id, route, stream_def_id, dataset_id
def publish_hifi(self, stream_id, stream_route, offset=0):
"""
Publish deterministic data
"""
pub = StandaloneStreamPublisher(stream_id, stream_route)
stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
stream_def_id = stream_def._id
rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
rdt["time"] = np.arange(10) + (offset * 10)
rdt["temp"] = np.arange(10) + (offset * 10)
pub.publish(rdt.to_granule())
def publish_fake_data(self, stream_id, route):
"""
Make four granules
"""
for i in xrange(4):
self.publish_hifi(stream_id, route, i)
def start_ingestion(self, stream_id, dataset_id):
"""
Starts ingestion/persistence for a given dataset
"""
ingest_config_id = self.get_ingestion_config()
self.ingestion_management.persist_data_stream(
stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id
)
def stop_ingestion(self, stream_id):
ingest_config_id = self.get_ingestion_config()
self.ingestion_management.unpersist_data_stream(
stream_id=stream_id, ingestion_configuration_id=ingest_config_id
)
def stop_all_ingestion(self):
try:
[self.stop_ingestion(sid) for sid in self.streams]
except:
pass
def validate_granule_subscription(self, msg, route, stream_id):
"""
Validation for granule format
"""
if msg == {}:
return
rdt = RecordDictionaryTool.load_from_granule(msg)
log.info("%s", rdt.pretty_print())
self.assertIsInstance(msg, Granule, "Message is improperly formatted. (%s)" % type(msg))
self.event.set()
def wait_until_we_have_enough_granules(self, dataset_id="", data_size=40):
"""
Loops until there is a sufficient amount of data in the dataset
"""
done = False
with gevent.Timeout(40):
示例4: TestDMEnd2End
# 需要导入模块: from interface.services.dm.iingestion_management_service import IngestionManagementServiceClient [as 别名]
# 或者: from interface.services.dm.iingestion_management_service.IngestionManagementServiceClient import persist_data_stream [as 别名]
#.........这里部分代码省略.........
self.event.clear()
# Get a precompiled parameter dictionary with basic ctd fields
pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)
# Add a field that supports binary data input.
bin_context = ParameterContext('binary', param_type=ArrayType())
context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
# Add another field that supports dictionary elements.
rec_context = ParameterContext('records', param_type=RecordType())
context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))
pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
stream_definition = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id)
stream_id, route = self.pubsub_management.create_stream('producer', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)
#--------------------------------------------------------------------------------
# Start persisting the data on the stream
# - Get the ingestion configuration from the resource registry
# - Create the dataset
# - call persist_data_stream to setup the subscription for the ingestion workers
# on the stream that you specify which causes the data to be persisted
#--------------------------------------------------------------------------------
ingest_config_id = self.get_ingestion_config()
dataset_id = self.create_dataset(pdict_id)
self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)
#--------------------------------------------------------------------------------
# Now the granules are ingesting and persisted
#--------------------------------------------------------------------------------
self.launch_producer(stream_id)
self.wait_until_we_have_enough_granules(dataset_id,4)
#--------------------------------------------------------------------------------
# Now get the data in one chunk using an RPC Call to start_retreive
#--------------------------------------------------------------------------------
replay_data = self.data_retriever.retrieve(dataset_id)
self.assertIsInstance(replay_data, Granule)
rdt = RecordDictionaryTool.load_from_granule(replay_data)
self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),'%s' % rdt['time'][:])
self.assertTrue((rdt['binary'][:10] == np.array(['hi']*10, dtype='object')).all())
#--------------------------------------------------------------------------------
# Now to try the streamed approach
#--------------------------------------------------------------------------------
replay_stream_id, replay_route = self.pubsub_management.create_stream('replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)
self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream_id)
log.info('Process ID: %s', process_id)
replay_client = ReplayClient(process_id)
#--------------------------------------------------------------------------------
# Create the listening endpoint for the the retriever to talk to
#--------------------------------------------------------------------------------
示例5: TestInstrumentDataIngestion
# 需要导入模块: from interface.services.dm.iingestion_management_service import IngestionManagementServiceClient [as 别名]
# 或者: from interface.services.dm.iingestion_management_service.IngestionManagementServiceClient import persist_data_stream [as 别名]
#.........这里部分代码省略.........
parsed_sample = sample['parsed']
self.assertParsedSampleDict(parsed_sample)
self.assertRawSampleDict(raw_sample)
def get_ingestion_config(self):
#
# From test_dm_end_2_end.py as of 7/23/12
#
#--------------------------------------------------------------------------------
# Grab the ingestion configuration from the resource registry
#--------------------------------------------------------------------------------
# The ingestion configuration should have been created by the bootstrap service
# which is configured through r2deploy.yml
ingest_configs, _ = self.resource_registry.find_resources(
restype=RT.IngestionConfiguration,id_only=True)
return ingest_configs[0]
def prepare_ingestion(self):
#
# Takes pieces from test_dm_end_2_end.py as of 7/23/12
#
# Get the ingestion configuration from the resource registry
self.ingest_config_id = ingest_config_id = self.get_ingestion_config()
# to keep the (stream_id, dataset_id) associated with each stream_name
self.dataset_ids = {}
for stream_name, stream_config in self._stream_config.iteritems():
stream_id = stream_config['id']
dataset_id = self.ingestion_management.persist_data_stream(
stream_id=stream_id,
ingestion_configuration_id=ingest_config_id)
log.info("persisting stream_name=%s (stream_id=%s): dataset_id=%s" % (
stream_name, stream_id, dataset_id))
self.assertTrue(self.ingestion_management.is_persisted(stream_id))
self.dataset_ids[stream_name] = (stream_id, dataset_id)
def verify_granules_persisted(self):
#
# takes elements from ingestion_management_test.py as of 7/23/12
#
ingest_config_id = self.ingest_config_id
for stream_name, (stream_id, dataset_id) in self.dataset_ids.iteritems():
assoc = self.resource_registry.find_associations(
subject=ingest_config_id, predicate=PRED.hasSubscription)
sub = self.resource_registry.read(assoc[0].o)
self.assertTrue(sub.is_active)
dataset = self.resource_registry.read(dataset_id)
self.assertIsInstance(dataset, DataSet)
log.info("Data persisted for stream_name=%s (stream_id=%s, "
"dataset_id=%s) dataset=%s" % (stream_name, stream_id, dataset_id, dataset))
def test_poll_and_verify_granules_persisted(self):
示例6: TestDMEnd2End
# 需要导入模块: from interface.services.dm.iingestion_management_service import IngestionManagementServiceClient [as 别名]
# 或者: from interface.services.dm.iingestion_management_service.IngestionManagementServiceClient import persist_data_stream [as 别名]
#.........这里部分代码省略.........
datastore = self.get_datastore(dataset_id)
dataset = self.dataset_management.read_dataset(dataset_id)
now = time.time()
timeout = now + 10
done = False
while not done:
if now >= timeout:
raise Timeout("Granules are not populating in time.")
if len(datastore.query_view(dataset.view_name)) >= granules:
done = True
now = time.time()
def create_dataset(self):
craft = CoverageCraft
sdom, tdom = craft.create_domains()
sdom = sdom.dump()
tdom = tdom.dump()
pdict = craft.create_parameters()
pdict = pdict.dump()
dataset_id = self.dataset_management.create_dataset(
"test_dataset", parameter_dict=pdict, spatial_domain=sdom, temporal_domain=tdom
)
return dataset_id
def test_coverage_ingest(self):
stream_id = self.pubsub_management.create_stream()
dataset_id = self.create_dataset()
# I freaking hate this bug
self.get_datastore(dataset_id)
ingestion_config_id = self.get_ingestion_config()
self.ingestion_management.persist_data_stream(
stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id
)
black_box = CoverageCraft()
black_box.rdt["time"] = np.arange(20)
black_box.rdt["temp"] = np.random.random(20) * 10
black_box.sync_with_granule()
granule = black_box.to_granule()
publisher = SimpleStreamPublisher.new_publisher(self.container, self.exchange_point_name, stream_id)
publisher.publish(granule)
self.wait_until_we_have_enough_granules(dataset_id, 1)
coverage = DatasetManagementService._get_coverage(dataset_id)
black_box = CoverageCraft(coverage)
black_box.sync_rdt_with_coverage()
comp = black_box.rdt["time"] == np.arange(20)
self.assertTrue(comp.all())
black_box = CoverageCraft()
black_box.rdt["time"] = np.arange(20) + 20
black_box.rdt["temp"] = np.random.random(20) * 10
black_box.sync_with_granule()
granule = black_box.to_granule()
publisher.publish(granule)
self.wait_until_we_have_enough_granules(dataset_id, 2)
coverage = DatasetManagementService._get_coverage(dataset_id)