本文整理汇总了Python中interface.services.dm.idata_retriever_service.DataRetrieverServiceClient.start_replay方法的典型用法代码示例。如果您正苦于以下问题:Python DataRetrieverServiceClient.start_replay方法的具体用法?Python DataRetrieverServiceClient.start_replay怎么用?Python DataRetrieverServiceClient.start_replay使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类interface.services.dm.idata_retriever_service.DataRetrieverServiceClient
的用法示例。
在下文中一共展示了DataRetrieverServiceClient.start_replay方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_replay_integration
# 需要导入模块: from interface.services.dm.idata_retriever_service import DataRetrieverServiceClient [as 别名]
# 或者: from interface.services.dm.idata_retriever_service.DataRetrieverServiceClient import start_replay [as 别名]
#.........这里部分代码省略.........
### Every thing below here can be run as a script:
log.debug('Got it')
pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
ingestion_management_service = IngestionManagementServiceClient(node=cc.node)
dataset_management_service = DatasetManagementServiceClient(node=cc.node)
data_retriever_service = DataRetrieverServiceClient(node=cc.node)
datastore_name = 'dm_test_replay_integration'
producer = Publisher(name=(XP,'stream producer'))
ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
exchange_point_id=XP,
couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'),
hdf_storage=HdfStorage(),
number_of_workers=1
)
ingestion_management_service.activate_ingestion_configuration(
ingestion_configuration_id=ingestion_configuration_id
)
definition = SBE37_CDM_stream_definition()
data_stream_id = definition.data_stream_id
encoding_id = definition.identifiables[data_stream_id].encoding_id
element_count_id = definition.identifiables[data_stream_id].element_count_id
stream_def_id = pubsub_management_service.create_stream_definition(
container=definition
)
stream_id = pubsub_management_service.create_stream(
stream_definition_id=stream_def_id
)
dataset_id = dataset_management_service.create_dataset(
stream_id=stream_id,
datastore_name=datastore_name,
view_name='datasets/dataset_by_id'
)
ingestion_management_service.create_dataset_configuration(
dataset_id=dataset_id,
archive_data=True,
archive_metadata=True,
ingestion_configuration_id = ingestion_configuration_id
)
definition.stream_resource_id = stream_id
packet = _create_packet(definition)
input_file = FileSystem.mktemp()
input_file.write(packet.identifiables[data_stream_id].values)
input_file_path = input_file.name
input_file.close()
fields=[
'conductivity',
'height',
'latitude',
'longitude',
'pressure',
'temperature',
'time'
]
input_vectors = acquire_data([input_file_path],fields , 2).next()
producer.publish(msg=packet, to_name=(XP,'%s.data' % stream_id))
replay_id, replay_stream_id = data_retriever_service.define_replay(dataset_id)
ar = gevent.event.AsyncResult()
def sub_listen(msg, headers):
assertions(isinstance(msg,StreamGranuleContainer),'replayed message is not a granule.')
hdf_string = msg.identifiables[data_stream_id].values
sha1 = hashlib.sha1(hdf_string).hexdigest().upper()
assertions(sha1 == msg.identifiables[encoding_id].sha1,'Checksum failed.')
assertions(msg.identifiables[element_count_id].value==1, 'record replay count is incorrect %d.' % msg.identifiables[element_count_id].value)
output_file = FileSystem.mktemp()
output_file.write(msg.identifiables[data_stream_id].values)
output_file_path = output_file.name
output_file.close()
output_vectors = acquire_data([output_file_path],fields,2).next()
for field in fields:
comparison = (input_vectors[field]['values']==output_vectors[field]['values'])
assertions(comparison.all(), 'vector mismatch: %s vs %s' %
(input_vectors[field]['values'],output_vectors[field]['values']))
FileSystem.unlink(output_file_path)
ar.set(True)
subscriber = Subscriber(name=(XP,'replay listener'),callback=sub_listen)
g = gevent.Greenlet(subscriber.listen, binding='%s.data' % replay_stream_id)
g.start()
data_retriever_service.start_replay(replay_id)
ar.get(timeout=10)
FileSystem.unlink(input_file_path)
示例2: test_usgs_integration
# 需要导入模块: from interface.services.dm.idata_retriever_service import DataRetrieverServiceClient [as 别名]
# 或者: from interface.services.dm.idata_retriever_service.DataRetrieverServiceClient import start_replay [as 别名]
#.........这里部分代码省略.........
#---------------------------
# Set up the producers (CTD Simulators)
#---------------------------
# Launch five simulated CTD producers
for iteration in xrange(2):
# Make a stream to output on
stream_id = pubsub_management_service.create_stream(stream_definition_id=stream_def_id)
#---------------------------
# Set up the datasets
#---------------------------
dataset_id = dataset_management_service.create_dataset(
stream_id=stream_id,
datastore_name=datastore_name,
view_name='datasets/stream_join_granule'
)
# Keep track of the datasets
datasets.append(dataset_id)
stream_policy_id = ingestion_management_service.create_dataset_configuration(
dataset_id = dataset_id,
archive_data = True,
archive_metadata = True,
ingestion_configuration_id = ingestion_configuration_id
)
producer_definition = ProcessDefinition()
producer_definition.executable = {
'module':'ion.agents.eoi.handler.usgs_stream_publisher',
'class':'UsgsPublisher'
}
configuration = {
'process':{
'stream_id':stream_id,
}
}
procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition)
log.debug('LUKE_DEBUG: procdef_id: %s', procdef_id)
pid = process_dispatcher.schedule_process(process_definition_id=procdef_id, configuration=configuration)
# Keep track, we'll kill 'em later.
process_list.append(pid)
# Get about 4 seconds of data
time.sleep(4)
#---------------------------
# Stop producing data
#---------------------------
for process in process_list:
process_dispatcher.cancel_process(process)
#----------------------------------------------
# The replay and the transform, a love story.
#----------------------------------------------
# Happy Valentines to the clever coder who catches the above!
transform_definition = ProcessDefinition()
transform_definition.executable = {
'module':'ion.processes.data.transforms.transform_example',
'class':'TransformCapture'
}
transform_definition_id = process_dispatcher.create_process_definition(process_definition=transform_definition)
dataset_id = datasets.pop() # Just need one for now
replay_id, stream_id = data_retriever_service.define_replay(dataset_id=dataset_id)
#--------------------------------------------
# I'm Selling magazine subscriptions here!
#--------------------------------------------
subscription = pubsub_management_service.create_subscription(query=StreamQuery(stream_ids=[stream_id]),
exchange_name='transform_capture_point')
#--------------------------------------------
# Start the transform (capture)
#--------------------------------------------
transform_id = transform_management_service.create_transform(
name='capture_transform',
in_subscription_id=subscription,
process_definition_id=transform_definition_id
)
transform_management_service.activate_transform(transform_id=transform_id)
#--------------------------------------------
# BEGIN REPLAY!
#--------------------------------------------
data_retriever_service.start_replay(replay_id=replay_id)
#--------------------------------------------
# Lets get some boundaries
#--------------------------------------------
bounds = dataset_management_service.get_dataset_bounds(dataset_id=dataset_id)
示例3: test_blog_ingestion_replay
# 需要导入模块: from interface.services.dm.idata_retriever_service import DataRetrieverServiceClient [as 别名]
# 或者: from interface.services.dm.idata_retriever_service.DataRetrieverServiceClient import start_replay [as 别名]
#.........这里部分代码省略.........
break
###=======================================================
### This section is not scriptable
###=======================================================
if len(post_ids) < 3:
self.fail('Not enough comments returned by the blog scrappers in 30 seconds')
if len(captured_input.blogs) < 1:
self.fail('No data returned in ten seconds by the blog scrappers!')
###=======================================================
### End non-scriptable
###=======================================================
#------------------------------------------------------------------------------------------------------
# Create subscriber to listen to the replays
#------------------------------------------------------------------------------------------------------
captured_replays = {}
for idx, post_id in enumerate(post_ids):
# Create the stateful listener to hold the captured data for comparison with replay
dataset_id = dsm_cli.create_dataset(
stream_id=post_id,
datastore_name='dm_datastore',
view_name='posts/posts_join_comments')
replay_id, stream_id =dr_cli.define_replay(dataset_id)
query = StreamQuery(stream_ids=[stream_id])
captured_replay = BlogListener()
#------------------------------------------------------------------------------------------------------
# Create subscriber to listen to the messages published to the ingestion
#------------------------------------------------------------------------------------------------------
# Make a subscription to the input stream to ingestion
subscription_name = 'replay_capture_queue_%d' % idx
subscription_id = pubsub_cli.create_subscription(query = query, exchange_name=subscription_name ,name = subscription_name)
# It is not required or even generally a good idea to use the subscription resource name as the queue name, but it makes things simple here
# Normally the container creates and starts subscribers for you when a transform process is spawned
subscriber = subscriber_registrar.create_subscriber(exchange_name=subscription_name, callback=captured_replay.blog_store)
subscriber.start()
captured_replay.subscriber = subscriber
pubsub_cli.activate_subscription(subscription_id)
#------------------------------------------------------------------------------------------------------
# Start the replay and listen to the results!
#------------------------------------------------------------------------------------------------------
dr_cli.start_replay(replay_id)
captured_replays[post_id] = captured_replay
###=======================================================
### The rest is not scriptable
###=======================================================
# wait five seconds for some data to come in...
log.warn('Sleeping for 5 seconds to wait for some output')
time.sleep(5)
matched_comments={}
for post_id, captured_replay in captured_replays.iteritems():
# There should be only one blog in here!
self.assertEqual(len(captured_replay.blogs),1)
replayed_blog = captured_replay.blogs[post_id]
input_blog = captured_input.blogs[post_id]
self.assertEqual(replayed_blog['post'].content, input_blog['post'].content)
# can't deterministically assert that the number of comments is the same...
matched_comments[post_id] = 0
for updated, comment in replayed_blog.get('comments',{}).iteritems():
self.assertIn(updated, input_blog['comments'])
matched_comments[post_id] += 1
# Assert that we got some comments back!
self.assertTrue(sum(matched_comments.values()) > 0)
log.info('Matched comments on the following blogs: %s' % matched_comments)
示例4: DataRetrieverServiceIntTest
# 需要导入模块: from interface.services.dm.idata_retriever_service import DataRetrieverServiceClient [as 别名]
# 或者: from interface.services.dm.idata_retriever_service.DataRetrieverServiceClient import start_replay [as 别名]
class DataRetrieverServiceIntTest(IonIntegrationTestCase):
def setUp(self):
super(DataRetrieverServiceIntTest,self).setUp()
self._start_container()
self.container.start_rel_from_url('res/deploy/r2dm.yml')
self.couch = self.container.datastore_manager.get_datastore('test_data_retriever', profile=DataStore.DS_PROFILE.EXAMPLES)
self.datastore_name = 'test_data_retriever'
self.dr_cli = DataRetrieverServiceClient(node=self.container.node)
self.dsm_cli = DatasetManagementServiceClient(node=self.container.node)
self.rr_cli = ResourceRegistryServiceClient(node=self.container.node)
self.ps_cli = PubsubManagementServiceClient(node=self.container.node)
def tearDown(self):
super(DataRetrieverServiceIntTest,self).tearDown()
@unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
def test_define_replay(self):
dataset_id = self.dsm_cli.create_dataset(
stream_id='12345',
datastore_name=self.datastore_name,
view_name='posts/posts_join_comments',
name='test define replay'
)
replay_id, stream_id = self.dr_cli.define_replay(dataset_id=dataset_id)
replay = self.rr_cli.read(replay_id)
# Assert that the process was created
self.assertTrue(self.container.proc_manager.procs[replay.process_id])
self.dr_cli.cancel_replay(replay_id)
@unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
def test_cancel_replay(self):
dataset_id = self.dsm_cli.create_dataset(
stream_id='12345',
datastore_name=self.datastore_name,
view_name='posts/posts_join_comments',
name='test define replay'
)
replay_id, stream_id = self.dr_cli.define_replay(dataset_id=dataset_id)
replay = self.rr_cli.read(replay_id)
# Assert that the process was created
self.assertTrue(self.container.proc_manager.procs[replay.process_id])
self.dr_cli.cancel_replay(replay_id)
# assert that the process is no more
self.assertFalse(replay.process_id in self.container.proc_manager.procs)
# assert that the resource no longer exists
with self.assertRaises(NotFound):
self.rr_cli.read(replay_id)
@unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
def test_start_replay(self):
post = BlogPost(title='test blog post', post_id='12345', author=BlogAuthor(name='Jon Doe'), content='this is a blog post',
updated=time.strftime("%Y-%m-%dT%H:%M%S-05"))
dataset_id = self.dsm_cli.create_dataset(
stream_id='12345',
datastore_name=self.datastore_name,
view_name='posts/posts_join_comments',
name='blog posts test'
)
self.couch.create(post)
replay_id, stream_id = self.dr_cli.define_replay(dataset_id)
replay = self.rr_cli.read(replay_id)
# assert that the process was created
self.assertTrue(self.container.proc_manager.procs[replay.process_id])
# pattern from Tim G
ar = gevent.event.AsyncResult()
def consume(message, headers):
ar.set(message)
stream_subscriber = StreamSubscriberRegistrar(process=self.container, node=self.container.node)
subscriber = stream_subscriber.create_subscriber(exchange_name='test_queue', callback=consume)
subscriber.start()
query = StreamQuery(stream_ids=[stream_id])
subscription_id = self.ps_cli.create_subscription(query=query,exchange_name='test_queue')
self.ps_cli.activate_subscription(subscription_id)
self.dr_cli.start_replay(replay_id)
self.assertEqual(ar.get(timeout=10).post_id,post.post_id)
#.........这里部分代码省略.........
示例5: DMCollaborationIntTest
# 需要导入模块: from interface.services.dm.idata_retriever_service import DataRetrieverServiceClient [as 别名]
# 或者: from interface.services.dm.idata_retriever_service.DataRetrieverServiceClient import start_replay [as 别名]
class DMCollaborationIntTest(IonIntegrationTestCase):
def setUp(self):
self._start_container()
config = DotDict()
config.bootstrap.processes.ingestion.module = 'ion.processes.data.ingestion.ingestion_worker_a'
config.bootstrap.processes.replay.module = 'ion.processes.data.replay.replay_process_a'
self.container.start_rel_from_url('res/deploy/r2dm.yml', config)
self.datastore_name = 'test_datasets'
self.pubsub_management = PubsubManagementServiceClient()
self.ingestion_management = IngestionManagementServiceClient()
self.dataset_management = DatasetManagementServiceClient()
self.process_dispatcher = ProcessDispatcherServiceClient()
self.data_retriever = DataRetrieverServiceClient()
def subscriber_action(self, msg, header):
if not hasattr(self,'received'):
self.received = 0
if not hasattr(self, 'async_done'):
self.async_done = AsyncResult()
self.received += 1
if self.received >= 2:
self.async_done.set(True)
def test_ingest_to_replay(self):
self.async_done = AsyncResult()
sysname = get_sys_name()
datastore = self.container.datastore_manager.get_datastore(self.datastore_name,'SCIDATA')
producer_definition = ProcessDefinition(name='Example Data Producer')
producer_definition.executable = {
'module':'ion.processes.data.example_data_producer',
'class' :'ExampleDataProducer'
}
process_definition_id = self.process_dispatcher.create_process_definition(process_definition=producer_definition)
ingestion_configuration_id = self.ingestion_management.create_ingestion_configuration(
exchange_point_id = 'science_data',
couch_storage=CouchStorage(datastore_name=self.datastore_name,datastore_profile='SCIDATA'),
number_of_workers=1
)
self.ingestion_management.activate_ingestion_configuration(
ingestion_configuration_id=ingestion_configuration_id)
stream_id = self.pubsub_management.create_stream(name='data stream')
dataset_id = self.dataset_management.create_dataset(
stream_id = stream_id,
datastore_name = self.datastore_name,
)
self.ingestion_management.create_dataset_configuration(
dataset_id = dataset_id,
archive_data = True,
archive_metadata = True,
ingestion_configuration_id = ingestion_configuration_id
)
configuration = {
'process': {
'stream_id' : stream_id
}
}
self.process_dispatcher.schedule_process(process_definition_id, configuration=configuration)
replay_id, stream_id = self.data_retriever.define_replay(dataset_id = dataset_id)
subscriber = Subscriber(name=('%s.science_data' % sysname, 'test_queue'), callback=self.subscriber_action, binding='%s.data' % stream_id)
gevent.spawn(subscriber.listen)
done = False
while not done:
results = datastore.query_view('manifest/by_dataset')
if len(results) >= 2:
done = True
self.data_retriever.start_replay(replay_id)
self.async_done.get(timeout=10)
示例6: test_replay_integration
# 需要导入模块: from interface.services.dm.idata_retriever_service import DataRetrieverServiceClient [as 别名]
# 或者: from interface.services.dm.idata_retriever_service.DataRetrieverServiceClient import start_replay [as 别名]
#.........这里部分代码省略.........
#------------------------------------------------------------------------------------------------------
# Set up the test hooks for the gevent event AsyncResult object
#------------------------------------------------------------------------------------------------------
def ingestion_worker_received(message, headers):
ar.set(message)
proc_1.ingest_process_test_hook = ingestion_worker_received
#------------------------------------------------------------------------------------------------------
# Set up the producers (CTD Simulators)
#------------------------------------------------------------------------------------------------------
ctd_stream_def = ctd_stream_definition()
stream_def_id = pubsub_management_service.create_stream_definition(container=ctd_stream_def, name='Junk definition')
stream_id = pubsub_management_service.create_stream(stream_definition_id=stream_def_id)
#------------------------------------------------------------------------------------------------------
# Set up the dataset config
#------------------------------------------------------------------------------------------------------
dataset_id = dataset_management_service.create_dataset(
stream_id=stream_id,
datastore_name=datastore_name,
view_name='datasets/stream_join_granule'
)
dataset_config_id = ingestion_management_service.create_dataset_configuration(
dataset_id = dataset_id,
archive_data = True,
archive_metadata = True,
ingestion_configuration_id = ingestion_configuration_id
)
#------------------------------------------------------------------------------------------------------
# Launch a ctd_publisher
#------------------------------------------------------------------------------------------------------
publisher = publisher_registrar.create_publisher(stream_id=stream_id)
#------------------------------------------------------------------------
# Create a packet and publish it
#------------------------------------------------------------------------
ctd_packet = _create_packet(stream_id)
published_hdfstring = ctd_packet.identifiables['ctd_data'].values
publisher.publish(ctd_packet)
#------------------------------------------------------------------------------------------------------
# Catch what the ingestion worker gets! Assert it is the same packet that was published!
#------------------------------------------------------------------------------------------------------
packet = ar.get(timeout=2)
#------------------------------------------------------------------------------------------------------
# Create subscriber to listen to the replays
#------------------------------------------------------------------------------------------------------
replay_id, replay_stream_id = data_retriever_service.define_replay(dataset_id)
query = StreamQuery(stream_ids=[replay_stream_id])
subscription_id = pubsub_management_service.create_subscription(query = query, exchange_name='replay_capture_point' ,name = 'replay_capture_point')
# It is not required or even generally a good idea to use the subscription resource name as the queue name, but it makes things simple here
# Normally the container creates and starts subscribers for you when a transform process is spawned
subscriber = subscriber_registrar.create_subscriber(exchange_name='replay_capture_point', callback=_subscriber_call_back)
subscriber.start()
pubsub_management_service.activate_subscription(subscription_id)
#------------------------------------------------------------------------------------------------------
# Start the replay
#------------------------------------------------------------------------------------------------------
data_retriever_service.start_replay(replay_id)
#------------------------------------------------------------------------------------------------------
# Get the hdf string from the captured stream in the replay
#------------------------------------------------------------------------------------------------------
retrieved_hdf_string = ar2.get(timeout=2)
### Non scriptable portion of the test
#------------------------------------------------------------------------------------------------------
# Assert that it matches the message we sent
#------------------------------------------------------------------------------------------------------
self.assertEquals(packet.identifiables['stream_encoding'].sha1, ctd_packet.identifiables['stream_encoding'].sha1)
self.assertEquals(retrieved_hdf_string, published_hdfstring)
示例7: TestDMEnd2End
# 需要导入模块: from interface.services.dm.idata_retriever_service import DataRetrieverServiceClient [as 别名]
# 或者: from interface.services.dm.idata_retriever_service.DataRetrieverServiceClient import start_replay [as 别名]
#.........这里部分代码省略.........
stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id
)
# --------------------------------------------------------------------------------
# Now the granules are ingesting and persisted
# --------------------------------------------------------------------------------
self.wait_until_we_have_enough_granules(dataset_id, 4)
# --------------------------------------------------------------------------------
# Now get the data in one chunk using an RPC Call to start_retreive
# --------------------------------------------------------------------------------
replay_data = self.data_retriever.retrieve(dataset_id)
self.assertIsInstance(replay_data, Granule)
# --------------------------------------------------------------------------------
# Now to try the streamed approach
# --------------------------------------------------------------------------------
replay_id, stream_id = self.data_retriever.define_replay(dataset_id)
# --------------------------------------------------------------------------------
# Create the listening endpoint for the the retriever to talk to
# --------------------------------------------------------------------------------
xp = self.container.ex_manager.create_xp(self.exchange_point_name)
xn = self.container.ex_manager.create_xn_queue(self.exchange_space_name)
xn.bind("%s.data" % stream_id, xp)
subscriber = SimpleStreamSubscriber.new_subscriber(
self.container, self.exchange_space_name, self.validate_granule_subscription
)
subscriber.start()
self.data_retriever.start_replay(replay_id)
fail = False
try:
self.event.wait(10)
except gevent.Timeout:
fail = True
subscriber.stop()
self.assertTrue(not fail, "Failed to validate the data.")
def test_replay_by_time(self):
log.info("starting test...")
# --------------------------------------------------------------------------------
# Create the necessary configurations for the test
# --------------------------------------------------------------------------------
stream_id = self.pubsub_management.create_stream()
config_id = self.get_ingestion_config()
dataset_id = self.create_dataset()
self.ingestion_management.persist_data_stream(
stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id
)
# --------------------------------------------------------------------------------
# Create the datastore first,
# --------------------------------------------------------------------------------
# There is a race condition sometimes between the services and the process for
# the creation of the datastore and it's instance, this ensures the datastore
# exists before the process is even subscribing to data.
self.get_datastore(dataset_id)
self.publish_fake_data(stream_id)