本文整理汇总了Python中interface.services.dm.idata_retriever_service.DataRetrieverServiceClient.define_replay方法的典型用法代码示例。如果您正苦于以下问题:Python DataRetrieverServiceClient.define_replay方法的具体用法?Python DataRetrieverServiceClient.define_replay怎么用?Python DataRetrieverServiceClient.define_replay使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类interface.services.dm.idata_retriever_service.DataRetrieverServiceClient
的用法示例。
在下文中一共展示了DataRetrieverServiceClient.define_replay方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: DataRetrieverIntTestAlpha
# 需要导入模块: from interface.services.dm.idata_retriever_service import DataRetrieverServiceClient [as 别名]
# 或者: from interface.services.dm.idata_retriever_service.DataRetrieverServiceClient import define_replay [as 别名]
class DataRetrieverIntTestAlpha(IonIntegrationTestCase):
def setUp(self):
super(DataRetrieverIntTestAlpha,self).setUp()
self._start_container()
config = DotDict()
config.bootstrap.processes.ingestion.module = 'ion.processes.data.ingestion.ingestion_worker_a'
config.bootstrap.processes.replay.module = 'ion.processes.data.replay.replay_process_a'
self.container.start_rel_from_url('res/deploy/r2dm.yml', config)
self.datastore_name = 'test_datasets'
self.datastore = self.container.datastore_manager.get_datastore(self.datastore_name, profile=DataStore.DS_PROFILE.SCIDATA)
self.data_retriever = DataRetrieverServiceClient()
self.dataset_management = DatasetManagementServiceClient()
self.resource_registry = ResourceRegistryServiceClient()
xs_dot_xp = CFG.core_xps.science_data
try:
self.XS, xp_base = xs_dot_xp.split('.')
self.XP = '.'.join([get_sys_name(), xp_base])
except ValueError:
raise StandardError('Invalid CFG for core_xps.science_data: "%s"; must have "xs.xp" structure' % xs_dot_xp)
@attr('LOCOINT')
@unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
def test_define_replay(self):
# Create a dataset to work with
dataset_id = self.dataset_management.create_dataset('fakestream', self.datastore_name)
replay_id, stream_id = self.data_retriever.define_replay(dataset_id=dataset_id)
# Verify that the replay instance was created
replay = self.resource_registry.read(replay_id)
pid = replay.process_id
process = self.container.proc_manager.procs[pid]
self.assertIsInstance(process,ReplayProcess, 'Incorrect process launched')
示例2: test_usgs_integration
# 需要导入模块: from interface.services.dm.idata_retriever_service import DataRetrieverServiceClient [as 别名]
# 或者: from interface.services.dm.idata_retriever_service.DataRetrieverServiceClient import define_replay [as 别名]
#.........这里部分代码省略.........
#---------------------------
# Set up the producers (CTD Simulators)
#---------------------------
# Launch five simulated CTD producers
for iteration in xrange(2):
# Make a stream to output on
stream_id = pubsub_management_service.create_stream(stream_definition_id=stream_def_id)
#---------------------------
# Set up the datasets
#---------------------------
dataset_id = dataset_management_service.create_dataset(
stream_id=stream_id,
datastore_name=datastore_name,
view_name='datasets/stream_join_granule'
)
# Keep track of the datasets
datasets.append(dataset_id)
stream_policy_id = ingestion_management_service.create_dataset_configuration(
dataset_id = dataset_id,
archive_data = True,
archive_metadata = True,
ingestion_configuration_id = ingestion_configuration_id
)
producer_definition = ProcessDefinition()
producer_definition.executable = {
'module':'ion.agents.eoi.handler.usgs_stream_publisher',
'class':'UsgsPublisher'
}
configuration = {
'process':{
'stream_id':stream_id,
}
}
procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition)
log.debug('LUKE_DEBUG: procdef_id: %s', procdef_id)
pid = process_dispatcher.schedule_process(process_definition_id=procdef_id, configuration=configuration)
# Keep track, we'll kill 'em later.
process_list.append(pid)
# Get about 4 seconds of data
time.sleep(4)
#---------------------------
# Stop producing data
#---------------------------
for process in process_list:
process_dispatcher.cancel_process(process)
#----------------------------------------------
# The replay and the transform, a love story.
#----------------------------------------------
# Happy Valentines to the clever coder who catches the above!
transform_definition = ProcessDefinition()
transform_definition.executable = {
'module':'ion.processes.data.transforms.transform_example',
'class':'TransformCapture'
}
transform_definition_id = process_dispatcher.create_process_definition(process_definition=transform_definition)
dataset_id = datasets.pop() # Just need one for now
replay_id, stream_id = data_retriever_service.define_replay(dataset_id=dataset_id)
#--------------------------------------------
# I'm Selling magazine subscriptions here!
#--------------------------------------------
subscription = pubsub_management_service.create_subscription(query=StreamQuery(stream_ids=[stream_id]),
exchange_name='transform_capture_point')
#--------------------------------------------
# Start the transform (capture)
#--------------------------------------------
transform_id = transform_management_service.create_transform(
name='capture_transform',
in_subscription_id=subscription,
process_definition_id=transform_definition_id
)
transform_management_service.activate_transform(transform_id=transform_id)
#--------------------------------------------
# BEGIN REPLAY!
#--------------------------------------------
data_retriever_service.start_replay(replay_id=replay_id)
#--------------------------------------------
# Lets get some boundaries
#--------------------------------------------
bounds = dataset_management_service.get_dataset_bounds(dataset_id=dataset_id)
示例3: TestDMEnd2End
# 需要导入模块: from interface.services.dm.idata_retriever_service import DataRetrieverServiceClient [as 别名]
# 或者: from interface.services.dm.idata_retriever_service.DataRetrieverServiceClient import define_replay [as 别名]
#.........这里部分代码省略.........
# - Get the ingestion configuration from the resource registry
# - Create the dataset
# - call persist_data_stream to setup the subscription for the ingestion workers
# on the stream that you specify which causes the data to be persisted
#--------------------------------------------------------------------------------
ingest_config_id = self.get_ingestion_config()
dataset_id = self.create_dataset(pdict_id)
self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)
self.addCleanup(self.stop_ingestion, stream_id)
#--------------------------------------------------------------------------------
# Now the granules are ingesting and persisted
#--------------------------------------------------------------------------------
self.launch_producer(stream_id)
self.wait_until_we_have_enough_granules(dataset_id,40)
#--------------------------------------------------------------------------------
# Now get the data in one chunk using an RPC Call to start_retreive
#--------------------------------------------------------------------------------
replay_data = self.data_retriever.retrieve(dataset_id)
self.assertIsInstance(replay_data, Granule)
rdt = RecordDictionaryTool.load_from_granule(replay_data)
self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),'%s' % rdt['time'][:])
self.assertTrue((rdt['binary'][:10] == np.array(['hi']*10, dtype='object')).all())
#--------------------------------------------------------------------------------
# Now to try the streamed approach
#--------------------------------------------------------------------------------
replay_stream_id, replay_route = self.pubsub_management.create_stream('replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)
self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream_id)
log.info('Process ID: %s', process_id)
replay_client = ReplayClient(process_id)
#--------------------------------------------------------------------------------
# Create the listening endpoint for the the retriever to talk to
#--------------------------------------------------------------------------------
sub_id = self.pubsub_management.create_subscription(self.exchange_space_name,stream_ids=[replay_stream_id])
self.addCleanup(self.pubsub_management.delete_subscription, sub_id)
self.pubsub_management.activate_subscription(sub_id)
self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)
subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
subscriber.start()
self.addCleanup(subscriber.stop)
self.data_retriever.start_replay_agent(self.replay_id)
self.assertTrue(replay_client.await_agent_ready(5), 'The process never launched')
replay_client.start_replay()
self.assertTrue(self.event.wait(10))
self.data_retriever.cancel_replay_agent(self.replay_id)
#--------------------------------------------------------------------------------
# Test the slicing capabilities
#--------------------------------------------------------------------------------
granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={'tdoa':slice(0,5)})
rdt = RecordDictionaryTool.load_from_granule(granule)
示例4: TestDMEnd2End
# 需要导入模块: from interface.services.dm.idata_retriever_service import DataRetrieverServiceClient [as 别名]
# 或者: from interface.services.dm.idata_retriever_service.DataRetrieverServiceClient import define_replay [as 别名]
#.........这里部分代码省略.........
# Start persisting the data on the stream
# - Get the ingestion configuration from the resource registry
# - Create the dataset
# - call persist_data_stream to setup the subscription for the ingestion workers
# on the stream that you specify which causes the data to be persisted
#--------------------------------------------------------------------------------
ingest_config_id = self.get_ingestion_config()
dataset_id = self.create_dataset(pdict_id)
self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)
#--------------------------------------------------------------------------------
# Now the granules are ingesting and persisted
#--------------------------------------------------------------------------------
self.launch_producer(stream_id)
self.wait_until_we_have_enough_granules(dataset_id,40)
#--------------------------------------------------------------------------------
# Now get the data in one chunk using an RPC Call to start_retreive
#--------------------------------------------------------------------------------
replay_data = self.data_retriever.retrieve(dataset_id)
self.assertIsInstance(replay_data, Granule)
rdt = RecordDictionaryTool.load_from_granule(replay_data)
self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),'%s' % rdt['time'][:])
self.assertTrue((rdt['binary'][:10] == np.array(['hi']*10, dtype='object')).all())
#--------------------------------------------------------------------------------
# Now to try the streamed approach
#--------------------------------------------------------------------------------
replay_stream_id, replay_route = self.pubsub_management.create_stream('replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)
self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream_id)
log.info('Process ID: %s', process_id)
replay_client = ReplayClient(process_id)
#--------------------------------------------------------------------------------
# Create the listening endpoint for the the retriever to talk to
#--------------------------------------------------------------------------------
xp = self.container.ex_manager.create_xp(self.exchange_point_name)
subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
self.queue_buffer.append(self.exchange_space_name)
subscriber.start()
subscriber.xn.bind(replay_route.routing_key, xp)
self.data_retriever.start_replay_agent(self.replay_id)
self.assertTrue(replay_client.await_agent_ready(5), 'The process never launched')
replay_client.start_replay()
self.assertTrue(self.event.wait(10))
subscriber.stop()
self.data_retriever.cancel_replay_agent(self.replay_id)
#--------------------------------------------------------------------------------
# Test the slicing capabilities
#--------------------------------------------------------------------------------
granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={'tdoa':slice(0,5)})
rdt = RecordDictionaryTool.load_from_granule(granule)
b = rdt['time'] == np.arange(5)
示例5: test_replay_integration
# 需要导入模块: from interface.services.dm.idata_retriever_service import DataRetrieverServiceClient [as 别名]
# 或者: from interface.services.dm.idata_retriever_service.DataRetrieverServiceClient import define_replay [as 别名]
def test_replay_integration(self):
'''
test_replay_integration
'''
import numpy as np
# Keep the import it's used in the vector comparison below even though pycharm says its unused.
cc = self.container
XP = self.XP
assertions = self.assertTrue
### Every thing below here can be run as a script:
log.debug('Got it')
pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
ingestion_management_service = IngestionManagementServiceClient(node=cc.node)
dataset_management_service = DatasetManagementServiceClient(node=cc.node)
data_retriever_service = DataRetrieverServiceClient(node=cc.node)
datastore_name = 'dm_test_replay_integration'
producer = Publisher(name=(XP,'stream producer'))
ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
exchange_point_id=XP,
couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'),
hdf_storage=HdfStorage(),
number_of_workers=1
)
ingestion_management_service.activate_ingestion_configuration(
ingestion_configuration_id=ingestion_configuration_id
)
definition = SBE37_CDM_stream_definition()
data_stream_id = definition.data_stream_id
encoding_id = definition.identifiables[data_stream_id].encoding_id
element_count_id = definition.identifiables[data_stream_id].element_count_id
stream_def_id = pubsub_management_service.create_stream_definition(
container=definition
)
stream_id = pubsub_management_service.create_stream(
stream_definition_id=stream_def_id
)
dataset_id = dataset_management_service.create_dataset(
stream_id=stream_id,
datastore_name=datastore_name,
view_name='datasets/dataset_by_id'
)
ingestion_management_service.create_dataset_configuration(
dataset_id=dataset_id,
archive_data=True,
archive_metadata=True,
ingestion_configuration_id = ingestion_configuration_id
)
definition.stream_resource_id = stream_id
packet = _create_packet(definition)
input_file = FileSystem.mktemp()
input_file.write(packet.identifiables[data_stream_id].values)
input_file_path = input_file.name
input_file.close()
fields=[
'conductivity',
'height',
'latitude',
'longitude',
'pressure',
'temperature',
'time'
]
input_vectors = acquire_data([input_file_path],fields , 2).next()
producer.publish(msg=packet, to_name=(XP,'%s.data' % stream_id))
replay_id, replay_stream_id = data_retriever_service.define_replay(dataset_id)
ar = gevent.event.AsyncResult()
def sub_listen(msg, headers):
assertions(isinstance(msg,StreamGranuleContainer),'replayed message is not a granule.')
hdf_string = msg.identifiables[data_stream_id].values
sha1 = hashlib.sha1(hdf_string).hexdigest().upper()
assertions(sha1 == msg.identifiables[encoding_id].sha1,'Checksum failed.')
assertions(msg.identifiables[element_count_id].value==1, 'record replay count is incorrect %d.' % msg.identifiables[element_count_id].value)
output_file = FileSystem.mktemp()
output_file.write(msg.identifiables[data_stream_id].values)
output_file_path = output_file.name
output_file.close()
output_vectors = acquire_data([output_file_path],fields,2).next()
for field in fields:
comparison = (input_vectors[field]['values']==output_vectors[field]['values'])
assertions(comparison.all(), 'vector mismatch: %s vs %s' %
(input_vectors[field]['values'],output_vectors[field]['values']))
FileSystem.unlink(output_file_path)
ar.set(True)
#.........这里部分代码省略.........
示例6: TestDMEnd2End
# 需要导入模块: from interface.services.dm.idata_retriever_service import DataRetrieverServiceClient [as 别名]
# 或者: from interface.services.dm.idata_retriever_service.DataRetrieverServiceClient import define_replay [as 别名]
#.........这里部分代码省略.........
# - call persist_data_stream to setup the subscription for the ingestion workers
# on the stream that you specify which causes the data to be persisted
# --------------------------------------------------------------------------------
ingest_config_id = self.get_ingestion_config()
dataset_id = self.create_dataset(pdict_id)
self.ingestion_management.persist_data_stream(
stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id
)
# --------------------------------------------------------------------------------
# Now the granules are ingesting and persisted
# --------------------------------------------------------------------------------
self.launch_producer(stream_id)
self.wait_until_we_have_enough_granules(dataset_id, 40)
# --------------------------------------------------------------------------------
# Now get the data in one chunk using an RPC Call to start_retreive
# --------------------------------------------------------------------------------
replay_data = self.data_retriever.retrieve(dataset_id)
self.assertIsInstance(replay_data, Granule)
rdt = RecordDictionaryTool.load_from_granule(replay_data)
self.assertTrue((rdt["time"][:10] == np.arange(10)).all(), "%s" % rdt["time"][:])
self.assertTrue((rdt["binary"][:10] == np.array(["hi"] * 10, dtype="object")).all())
# --------------------------------------------------------------------------------
# Now to try the streamed approach
# --------------------------------------------------------------------------------
replay_stream_id, replay_route = self.pubsub_management.create_stream(
"replay_out", exchange_point=self.exchange_point_name, stream_definition_id=stream_definition
)
self.replay_id, process_id = self.data_retriever.define_replay(
dataset_id=dataset_id, stream_id=replay_stream_id
)
log.info("Process ID: %s", process_id)
replay_client = ReplayClient(process_id)
# --------------------------------------------------------------------------------
# Create the listening endpoint for the the retriever to talk to
# --------------------------------------------------------------------------------
xp = self.container.ex_manager.create_xp(self.exchange_point_name)
subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
self.queue_buffer.append(self.exchange_space_name)
subscriber.start()
subscriber.xn.bind(replay_route.routing_key, xp)
self.data_retriever.start_replay_agent(self.replay_id)
self.assertTrue(replay_client.await_agent_ready(5), "The process never launched")
replay_client.start_replay()
self.assertTrue(self.event.wait(10))
subscriber.stop()
self.data_retriever.cancel_replay_agent(self.replay_id)
# --------------------------------------------------------------------------------
# Test the slicing capabilities
# --------------------------------------------------------------------------------
granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={"tdoa": slice(0, 5)})
rdt = RecordDictionaryTool.load_from_granule(granule)
b = rdt["time"] == np.arange(5)
示例7: test_blog_ingestion_replay
# 需要导入模块: from interface.services.dm.idata_retriever_service import DataRetrieverServiceClient [as 别名]
# 或者: from interface.services.dm.idata_retriever_service.DataRetrieverServiceClient import define_replay [as 别名]
#.........这里部分代码省略.........
break
###=======================================================
### This section is not scriptable
###=======================================================
if len(post_ids) < 3:
self.fail('Not enough comments returned by the blog scrappers in 30 seconds')
if len(captured_input.blogs) < 1:
self.fail('No data returned in ten seconds by the blog scrappers!')
###=======================================================
### End non-scriptable
###=======================================================
#------------------------------------------------------------------------------------------------------
# Create subscriber to listen to the replays
#------------------------------------------------------------------------------------------------------
captured_replays = {}
for idx, post_id in enumerate(post_ids):
# Create the stateful listener to hold the captured data for comparison with replay
dataset_id = dsm_cli.create_dataset(
stream_id=post_id,
datastore_name='dm_datastore',
view_name='posts/posts_join_comments')
replay_id, stream_id =dr_cli.define_replay(dataset_id)
query = StreamQuery(stream_ids=[stream_id])
captured_replay = BlogListener()
#------------------------------------------------------------------------------------------------------
# Create subscriber to listen to the messages published to the ingestion
#------------------------------------------------------------------------------------------------------
# Make a subscription to the input stream to ingestion
subscription_name = 'replay_capture_queue_%d' % idx
subscription_id = pubsub_cli.create_subscription(query = query, exchange_name=subscription_name ,name = subscription_name)
# It is not required or even generally a good idea to use the subscription resource name as the queue name, but it makes things simple here
# Normally the container creates and starts subscribers for you when a transform process is spawned
subscriber = subscriber_registrar.create_subscriber(exchange_name=subscription_name, callback=captured_replay.blog_store)
subscriber.start()
captured_replay.subscriber = subscriber
pubsub_cli.activate_subscription(subscription_id)
#------------------------------------------------------------------------------------------------------
# Start the replay and listen to the results!
#------------------------------------------------------------------------------------------------------
dr_cli.start_replay(replay_id)
captured_replays[post_id] = captured_replay
示例8: DataRetrieverServiceIntTest
# 需要导入模块: from interface.services.dm.idata_retriever_service import DataRetrieverServiceClient [as 别名]
# 或者: from interface.services.dm.idata_retriever_service.DataRetrieverServiceClient import define_replay [as 别名]
class DataRetrieverServiceIntTest(IonIntegrationTestCase):
def setUp(self):
super(DataRetrieverServiceIntTest,self).setUp()
self._start_container()
self.container.start_rel_from_url('res/deploy/r2dm.yml')
self.couch = self.container.datastore_manager.get_datastore('test_data_retriever', profile=DataStore.DS_PROFILE.EXAMPLES)
self.datastore_name = 'test_data_retriever'
self.dr_cli = DataRetrieverServiceClient(node=self.container.node)
self.dsm_cli = DatasetManagementServiceClient(node=self.container.node)
self.rr_cli = ResourceRegistryServiceClient(node=self.container.node)
self.ps_cli = PubsubManagementServiceClient(node=self.container.node)
def tearDown(self):
super(DataRetrieverServiceIntTest,self).tearDown()
@unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
def test_define_replay(self):
dataset_id = self.dsm_cli.create_dataset(
stream_id='12345',
datastore_name=self.datastore_name,
view_name='posts/posts_join_comments',
name='test define replay'
)
replay_id, stream_id = self.dr_cli.define_replay(dataset_id=dataset_id)
replay = self.rr_cli.read(replay_id)
# Assert that the process was created
self.assertTrue(self.container.proc_manager.procs[replay.process_id])
self.dr_cli.cancel_replay(replay_id)
@unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
def test_cancel_replay(self):
dataset_id = self.dsm_cli.create_dataset(
stream_id='12345',
datastore_name=self.datastore_name,
view_name='posts/posts_join_comments',
name='test define replay'
)
replay_id, stream_id = self.dr_cli.define_replay(dataset_id=dataset_id)
replay = self.rr_cli.read(replay_id)
# Assert that the process was created
self.assertTrue(self.container.proc_manager.procs[replay.process_id])
self.dr_cli.cancel_replay(replay_id)
# assert that the process is no more
self.assertFalse(replay.process_id in self.container.proc_manager.procs)
# assert that the resource no longer exists
with self.assertRaises(NotFound):
self.rr_cli.read(replay_id)
@unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
def test_start_replay(self):
post = BlogPost(title='test blog post', post_id='12345', author=BlogAuthor(name='Jon Doe'), content='this is a blog post',
updated=time.strftime("%Y-%m-%dT%H:%M%S-05"))
dataset_id = self.dsm_cli.create_dataset(
stream_id='12345',
datastore_name=self.datastore_name,
view_name='posts/posts_join_comments',
name='blog posts test'
)
self.couch.create(post)
replay_id, stream_id = self.dr_cli.define_replay(dataset_id)
replay = self.rr_cli.read(replay_id)
# assert that the process was created
self.assertTrue(self.container.proc_manager.procs[replay.process_id])
# pattern from Tim G
ar = gevent.event.AsyncResult()
def consume(message, headers):
ar.set(message)
stream_subscriber = StreamSubscriberRegistrar(process=self.container, node=self.container.node)
subscriber = stream_subscriber.create_subscriber(exchange_name='test_queue', callback=consume)
subscriber.start()
query = StreamQuery(stream_ids=[stream_id])
subscription_id = self.ps_cli.create_subscription(query=query,exchange_name='test_queue')
self.ps_cli.activate_subscription(subscription_id)
self.dr_cli.start_replay(replay_id)
self.assertEqual(ar.get(timeout=10).post_id,post.post_id)
#.........这里部分代码省略.........
示例9: DMCollaborationIntTest
# 需要导入模块: from interface.services.dm.idata_retriever_service import DataRetrieverServiceClient [as 别名]
# 或者: from interface.services.dm.idata_retriever_service.DataRetrieverServiceClient import define_replay [as 别名]
class DMCollaborationIntTest(IonIntegrationTestCase):
def setUp(self):
self._start_container()
config = DotDict()
config.bootstrap.processes.ingestion.module = 'ion.processes.data.ingestion.ingestion_worker_a'
config.bootstrap.processes.replay.module = 'ion.processes.data.replay.replay_process_a'
self.container.start_rel_from_url('res/deploy/r2dm.yml', config)
self.datastore_name = 'test_datasets'
self.pubsub_management = PubsubManagementServiceClient()
self.ingestion_management = IngestionManagementServiceClient()
self.dataset_management = DatasetManagementServiceClient()
self.process_dispatcher = ProcessDispatcherServiceClient()
self.data_retriever = DataRetrieverServiceClient()
def subscriber_action(self, msg, header):
if not hasattr(self,'received'):
self.received = 0
if not hasattr(self, 'async_done'):
self.async_done = AsyncResult()
self.received += 1
if self.received >= 2:
self.async_done.set(True)
def test_ingest_to_replay(self):
self.async_done = AsyncResult()
sysname = get_sys_name()
datastore = self.container.datastore_manager.get_datastore(self.datastore_name,'SCIDATA')
producer_definition = ProcessDefinition(name='Example Data Producer')
producer_definition.executable = {
'module':'ion.processes.data.example_data_producer',
'class' :'ExampleDataProducer'
}
process_definition_id = self.process_dispatcher.create_process_definition(process_definition=producer_definition)
ingestion_configuration_id = self.ingestion_management.create_ingestion_configuration(
exchange_point_id = 'science_data',
couch_storage=CouchStorage(datastore_name=self.datastore_name,datastore_profile='SCIDATA'),
number_of_workers=1
)
self.ingestion_management.activate_ingestion_configuration(
ingestion_configuration_id=ingestion_configuration_id)
stream_id = self.pubsub_management.create_stream(name='data stream')
dataset_id = self.dataset_management.create_dataset(
stream_id = stream_id,
datastore_name = self.datastore_name,
)
self.ingestion_management.create_dataset_configuration(
dataset_id = dataset_id,
archive_data = True,
archive_metadata = True,
ingestion_configuration_id = ingestion_configuration_id
)
configuration = {
'process': {
'stream_id' : stream_id
}
}
self.process_dispatcher.schedule_process(process_definition_id, configuration=configuration)
replay_id, stream_id = self.data_retriever.define_replay(dataset_id = dataset_id)
subscriber = Subscriber(name=('%s.science_data' % sysname, 'test_queue'), callback=self.subscriber_action, binding='%s.data' % stream_id)
gevent.spawn(subscriber.listen)
done = False
while not done:
results = datastore.query_view('manifest/by_dataset')
if len(results) >= 2:
done = True
self.data_retriever.start_replay(replay_id)
self.async_done.get(timeout=10)
示例10: TestDMEnd2End
# 需要导入模块: from interface.services.dm.idata_retriever_service import DataRetrieverServiceClient [as 别名]
# 或者: from interface.services.dm.idata_retriever_service.DataRetrieverServiceClient import define_replay [as 别名]
#.........这里部分代码省略.........
context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)
# Add a field that supports binary data input.
bin_context = ParameterContext('binary', param_type=ArrayType())
context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
# Add another field that supports dictionary elements.
rec_context = ParameterContext('records', param_type=RecordType())
context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))
pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id)
replay_stream, replay_route = self.pubsub_management.create_stream('replay', 'xp1', stream_definition_id=stream_def_id)
dataset_id = self.create_dataset(pdict_id)
scov = DatasetManagementService._get_coverage(dataset_id)
bb = CoverageCraft(scov)
bb.rdt['time'] = np.arange(100)
bb.rdt['temp'] = np.random.random(100) + 30
bb.sync_with_granule()
DatasetManagementService._persist_coverage(dataset_id, bb.coverage) # This invalidates it for multi-host configurations
# Set up the subscriber to verify the data
subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
xp = self.container.ex_manager.create_xp('xp1')
self.queue_buffer.append(self.exchange_space_name)
subscriber.start()
subscriber.xn.bind(replay_route.routing_key, xp)
# Set up the replay agent and the client wrapper
# 1) Define the Replay (dataset and stream to publish on)
self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream)
# 2) Make a client to the interact with the process (optionall provide it a process to bind with)
replay_client = ReplayClient(process_id)
# 3) Start the agent (launch the process)
self.data_retriever.start_replay_agent(self.replay_id)
# 4) Start replaying...
replay_client.start_replay()
# Wait till we get some granules
self.assertTrue(self.event.wait(5))
# We got granules, pause the replay, clear the queue and allow the process to finish consuming
replay_client.pause_replay()
gevent.sleep(1)
subscriber.xn.purge()
self.event.clear()
# Make sure there's no remaining messages being consumed
self.assertFalse(self.event.wait(1))
# Resume the replay and wait until we start getting granules again
replay_client.resume_replay()
self.assertTrue(self.event.wait(5))
# Stop the replay, clear the queues
replay_client.stop_replay()
gevent.sleep(1)
subscriber.xn.purge()
self.event.clear()
# Make sure that it did indeed stop
self.assertFalse(self.event.wait(1))
示例11: test_replay_integration
# 需要导入模块: from interface.services.dm.idata_retriever_service import DataRetrieverServiceClient [as 别名]
# 或者: from interface.services.dm.idata_retriever_service.DataRetrieverServiceClient import define_replay [as 别名]
#.........这里部分代码省略.........
#------------------------------------------------------------------------------------------------------
# Set up the test hooks for the gevent event AsyncResult object
#------------------------------------------------------------------------------------------------------
def ingestion_worker_received(message, headers):
ar.set(message)
proc_1.ingest_process_test_hook = ingestion_worker_received
#------------------------------------------------------------------------------------------------------
# Set up the producers (CTD Simulators)
#------------------------------------------------------------------------------------------------------
ctd_stream_def = ctd_stream_definition()
stream_def_id = pubsub_management_service.create_stream_definition(container=ctd_stream_def, name='Junk definition')
stream_id = pubsub_management_service.create_stream(stream_definition_id=stream_def_id)
#------------------------------------------------------------------------------------------------------
# Set up the dataset config
#------------------------------------------------------------------------------------------------------
dataset_id = dataset_management_service.create_dataset(
stream_id=stream_id,
datastore_name=datastore_name,
view_name='datasets/stream_join_granule'
)
dataset_config_id = ingestion_management_service.create_dataset_configuration(
dataset_id = dataset_id,
archive_data = True,
archive_metadata = True,
ingestion_configuration_id = ingestion_configuration_id
)
#------------------------------------------------------------------------------------------------------
# Launch a ctd_publisher
#------------------------------------------------------------------------------------------------------
publisher = publisher_registrar.create_publisher(stream_id=stream_id)
#------------------------------------------------------------------------
# Create a packet and publish it
#------------------------------------------------------------------------
ctd_packet = _create_packet(stream_id)
published_hdfstring = ctd_packet.identifiables['ctd_data'].values
publisher.publish(ctd_packet)
#------------------------------------------------------------------------------------------------------
# Catch what the ingestion worker gets! Assert it is the same packet that was published!
#------------------------------------------------------------------------------------------------------
packet = ar.get(timeout=2)
#------------------------------------------------------------------------------------------------------
# Create subscriber to listen to the replays
#------------------------------------------------------------------------------------------------------
replay_id, replay_stream_id = data_retriever_service.define_replay(dataset_id)
query = StreamQuery(stream_ids=[replay_stream_id])
subscription_id = pubsub_management_service.create_subscription(query = query, exchange_name='replay_capture_point' ,name = 'replay_capture_point')
# It is not required or even generally a good idea to use the subscription resource name as the queue name, but it makes things simple here
# Normally the container creates and starts subscribers for you when a transform process is spawned
subscriber = subscriber_registrar.create_subscriber(exchange_name='replay_capture_point', callback=_subscriber_call_back)
subscriber.start()
pubsub_management_service.activate_subscription(subscription_id)
#------------------------------------------------------------------------------------------------------
# Start the replay
#------------------------------------------------------------------------------------------------------
data_retriever_service.start_replay(replay_id)
#------------------------------------------------------------------------------------------------------
# Get the hdf string from the captured stream in the replay
#------------------------------------------------------------------------------------------------------
retrieved_hdf_string = ar2.get(timeout=2)
### Non scriptable portion of the test
#------------------------------------------------------------------------------------------------------
# Assert that it matches the message we sent
#------------------------------------------------------------------------------------------------------
self.assertEquals(packet.identifiables['stream_encoding'].sha1, ctd_packet.identifiables['stream_encoding'].sha1)
self.assertEquals(retrieved_hdf_string, published_hdfstring)
示例12: TestDMEnd2End
# 需要导入模块: from interface.services.dm.idata_retriever_service import DataRetrieverServiceClient [as 别名]
# 或者: from interface.services.dm.idata_retriever_service.DataRetrieverServiceClient import define_replay [as 别名]
#.........这里部分代码省略.........
self.launch_producer(stream_id)
# --------------------------------------------------------------------------------
# Start persisting the data on the stream
# - Get the ingestion configuration from the resource registry
# - Create the dataset
# - call persist_data_stream to setup the subscription for the ingestion workers
# on the stream that you specify which causes the data to be persisted
# --------------------------------------------------------------------------------
ingest_config_id = self.get_ingestion_config()
dataset_id = self.create_dataset()
self.ingestion_management.persist_data_stream(
stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id
)
# --------------------------------------------------------------------------------
# Now the granules are ingesting and persisted
# --------------------------------------------------------------------------------
self.wait_until_we_have_enough_granules(dataset_id, 4)
# --------------------------------------------------------------------------------
# Now get the data in one chunk using an RPC Call to start_retreive
# --------------------------------------------------------------------------------
replay_data = self.data_retriever.retrieve(dataset_id)
self.assertIsInstance(replay_data, Granule)
# --------------------------------------------------------------------------------
# Now to try the streamed approach
# --------------------------------------------------------------------------------
replay_id, stream_id = self.data_retriever.define_replay(dataset_id)
# --------------------------------------------------------------------------------
# Create the listening endpoint for the the retriever to talk to
# --------------------------------------------------------------------------------
xp = self.container.ex_manager.create_xp(self.exchange_point_name)
xn = self.container.ex_manager.create_xn_queue(self.exchange_space_name)
xn.bind("%s.data" % stream_id, xp)
subscriber = SimpleStreamSubscriber.new_subscriber(
self.container, self.exchange_space_name, self.validate_granule_subscription
)
subscriber.start()
self.data_retriever.start_replay(replay_id)
fail = False
try:
self.event.wait(10)
except gevent.Timeout:
fail = True
subscriber.stop()
self.assertTrue(not fail, "Failed to validate the data.")
def test_replay_by_time(self):
log.info("starting test...")
# --------------------------------------------------------------------------------
# Create the necessary configurations for the test
# --------------------------------------------------------------------------------
stream_id = self.pubsub_management.create_stream()
config_id = self.get_ingestion_config()