本文整理汇总了Python中interface.services.dm.iingestion_management_service.IngestionManagementServiceClient.create_dataset_configuration方法的典型用法代码示例。如果您正苦于以下问题:Python IngestionManagementServiceClient.create_dataset_configuration方法的具体用法?Python IngestionManagementServiceClient.create_dataset_configuration怎么用?Python IngestionManagementServiceClient.create_dataset_configuration使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类interface.services.dm.iingestion_management_service.IngestionManagementServiceClient
的用法示例。
在下文中一共展示了IngestionManagementServiceClient.create_dataset_configuration方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_usgs_integration
# 需要导入模块: from interface.services.dm.iingestion_management_service import IngestionManagementServiceClient [as 别名]
# 或者: from interface.services.dm.iingestion_management_service.IngestionManagementServiceClient import create_dataset_configuration [as 别名]
def test_usgs_integration(self):
'''
test_usgs_integration
Test full DM Services Integration using usgs
'''
cc = self.container
assertions = self.assertTrue
#-----------------------------
# Copy below here
#-----------------------------
pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
ingestion_management_service = IngestionManagementServiceClient(node=cc.node)
dataset_management_service = DatasetManagementServiceClient(node=cc.node)
data_retriever_service = DataRetrieverServiceClient(node=cc.node)
transform_management_service = TransformManagementServiceClient(node=cc.node)
process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)
process_list = []
datasets = []
datastore_name = 'test_usgs_integration'
#---------------------------
# Set up ingestion
#---------------------------
# Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
log.debug('Calling create_ingestion_configuration')
ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
exchange_point_id='science_data',
couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'),
number_of_workers=8
)
#
ingestion_management_service.activate_ingestion_configuration(
ingestion_configuration_id=ingestion_configuration_id)
usgs_stream_def = USGS_stream_definition()
stream_def_id = pubsub_management_service.create_stream_definition(container=usgs_stream_def, name='Junk definition')
#---------------------------
# Set up the producers (CTD Simulators)
#---------------------------
# Launch five simulated CTD producers
for iteration in xrange(2):
# Make a stream to output on
stream_id = pubsub_management_service.create_stream(stream_definition_id=stream_def_id)
#---------------------------
# Set up the datasets
#---------------------------
dataset_id = dataset_management_service.create_dataset(
stream_id=stream_id,
datastore_name=datastore_name,
view_name='datasets/stream_join_granule'
)
# Keep track of the datasets
datasets.append(dataset_id)
stream_policy_id = ingestion_management_service.create_dataset_configuration(
dataset_id = dataset_id,
archive_data = True,
archive_metadata = True,
ingestion_configuration_id = ingestion_configuration_id
)
producer_definition = ProcessDefinition()
producer_definition.executable = {
'module':'ion.agents.eoi.handler.usgs_stream_publisher',
'class':'UsgsPublisher'
}
configuration = {
'process':{
'stream_id':stream_id,
}
}
procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition)
log.debug('LUKE_DEBUG: procdef_id: %s', procdef_id)
pid = process_dispatcher.schedule_process(process_definition_id=procdef_id, configuration=configuration)
# Keep track, we'll kill 'em later.
process_list.append(pid)
# Get about 4 seconds of data
time.sleep(4)
#---------------------------
# Stop producing data
#---------------------------
for process in process_list:
process_dispatcher.cancel_process(process)
#----------------------------------------------
# The replay and the transform, a love story.
#.........这里部分代码省略.........
示例2: test_replay_integration
# 需要导入模块: from interface.services.dm.iingestion_management_service import IngestionManagementServiceClient [as 别名]
# 或者: from interface.services.dm.iingestion_management_service.IngestionManagementServiceClient import create_dataset_configuration [as 别名]
def test_replay_integration(self):
'''
test_replay_integration
'''
import numpy as np
# Keep the import it's used in the vector comparison below even though pycharm says its unused.
cc = self.container
XP = self.XP
assertions = self.assertTrue
### Every thing below here can be run as a script:
log.debug('Got it')
pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
ingestion_management_service = IngestionManagementServiceClient(node=cc.node)
dataset_management_service = DatasetManagementServiceClient(node=cc.node)
data_retriever_service = DataRetrieverServiceClient(node=cc.node)
datastore_name = 'dm_test_replay_integration'
producer = Publisher(name=(XP,'stream producer'))
ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
exchange_point_id=XP,
couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'),
hdf_storage=HdfStorage(),
number_of_workers=1
)
ingestion_management_service.activate_ingestion_configuration(
ingestion_configuration_id=ingestion_configuration_id
)
definition = SBE37_CDM_stream_definition()
data_stream_id = definition.data_stream_id
encoding_id = definition.identifiables[data_stream_id].encoding_id
element_count_id = definition.identifiables[data_stream_id].element_count_id
stream_def_id = pubsub_management_service.create_stream_definition(
container=definition
)
stream_id = pubsub_management_service.create_stream(
stream_definition_id=stream_def_id
)
dataset_id = dataset_management_service.create_dataset(
stream_id=stream_id,
datastore_name=datastore_name,
view_name='datasets/dataset_by_id'
)
ingestion_management_service.create_dataset_configuration(
dataset_id=dataset_id,
archive_data=True,
archive_metadata=True,
ingestion_configuration_id = ingestion_configuration_id
)
definition.stream_resource_id = stream_id
packet = _create_packet(definition)
input_file = FileSystem.mktemp()
input_file.write(packet.identifiables[data_stream_id].values)
input_file_path = input_file.name
input_file.close()
fields=[
'conductivity',
'height',
'latitude',
'longitude',
'pressure',
'temperature',
'time'
]
input_vectors = acquire_data([input_file_path],fields , 2).next()
producer.publish(msg=packet, to_name=(XP,'%s.data' % stream_id))
replay_id, replay_stream_id = data_retriever_service.define_replay(dataset_id)
ar = gevent.event.AsyncResult()
def sub_listen(msg, headers):
assertions(isinstance(msg,StreamGranuleContainer),'replayed message is not a granule.')
hdf_string = msg.identifiables[data_stream_id].values
sha1 = hashlib.sha1(hdf_string).hexdigest().upper()
assertions(sha1 == msg.identifiables[encoding_id].sha1,'Checksum failed.')
assertions(msg.identifiables[element_count_id].value==1, 'record replay count is incorrect %d.' % msg.identifiables[element_count_id].value)
output_file = FileSystem.mktemp()
output_file.write(msg.identifiables[data_stream_id].values)
output_file_path = output_file.name
output_file.close()
output_vectors = acquire_data([output_file_path],fields,2).next()
for field in fields:
comparison = (input_vectors[field]['values']==output_vectors[field]['values'])
assertions(comparison.all(), 'vector mismatch: %s vs %s' %
(input_vectors[field]['values'],output_vectors[field]['values']))
FileSystem.unlink(output_file_path)
ar.set(True)
#.........这里部分代码省略.........
示例3: test_dm_integration
# 需要导入模块: from interface.services.dm.iingestion_management_service import IngestionManagementServiceClient [as 别名]
# 或者: from interface.services.dm.iingestion_management_service.IngestionManagementServiceClient import create_dataset_configuration [as 别名]
def test_dm_integration(self):
'''
test_salinity_transform
Test full DM Services Integration
'''
cc = self.container
assertions = self.assertTrue
#-----------------------------
# Copy below here to run as a script (don't forget the imports of course!)
#-----------------------------
# Create some service clients...
pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
ingestion_management_service = IngestionManagementServiceClient(node=cc.node)
dataset_management_service = DatasetManagementServiceClient(node=cc.node)
data_retriever_service = DataRetrieverServiceClient(node=cc.node)
transform_management_service = TransformManagementServiceClient(node=cc.node)
process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)
# declare some handy variables
datastore_name = 'test_dm_integration'
###
### In the beginning there were two stream definitions...
###
# create a stream definition for the data from the ctd simulator
ctd_stream_def = SBE37_CDM_stream_definition()
ctd_stream_def_id = pubsub_management_service.create_stream_definition(container=ctd_stream_def, name='Simulated CTD data')
# create a stream definition for the data from the salinity Transform
sal_stream_def_id = pubsub_management_service.create_stream_definition(container=SalinityTransform.outgoing_stream_def, name='Scalar Salinity data stream')
###
### And two process definitions...
###
# one for the ctd simulator...
producer_definition = ProcessDefinition()
producer_definition.executable = {
'module':'ion.processes.data.ctd_stream_publisher',
'class':'SimpleCtdPublisher'
}
ctd_sim_procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition)
# one for the salinity transform
producer_definition = ProcessDefinition()
producer_definition.executable = {
'module':'ion.processes.data.transforms.ctd.ctd_L2_salinity',
'class':'SalinityTransform'
}
salinity_transform_procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition)
#---------------------------
# Set up ingestion - this is an operator concern - not done by SA in a deployed system
#---------------------------
# Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
log.debug('Calling create_ingestion_configuration')
ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
exchange_point_id='science_data',
couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'),
number_of_workers=1
)
#
ingestion_management_service.activate_ingestion_configuration(
ingestion_configuration_id=ingestion_configuration_id)
#---------------------------
# Set up the producer (CTD Simulator)
#---------------------------
# Create the stream
ctd_stream_id = pubsub_management_service.create_stream(stream_definition_id=ctd_stream_def_id)
# Set up the datasets
ctd_dataset_id = dataset_management_service.create_dataset(
stream_id=ctd_stream_id,
datastore_name=datastore_name,
view_name='datasets/stream_join_granule'
)
# Configure ingestion of this dataset
ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration(
dataset_id = ctd_dataset_id,
archive_data = True,
archive_metadata = True,
ingestion_configuration_id = ingestion_configuration_id, # you need to know the ingestion configuration id!
#.........这里部分代码省略.........
示例4: DMCollaborationIntTest
# 需要导入模块: from interface.services.dm.iingestion_management_service import IngestionManagementServiceClient [as 别名]
# 或者: from interface.services.dm.iingestion_management_service.IngestionManagementServiceClient import create_dataset_configuration [as 别名]
class DMCollaborationIntTest(IonIntegrationTestCase):
def setUp(self):
self._start_container()
config = DotDict()
config.bootstrap.processes.ingestion.module = 'ion.processes.data.ingestion.ingestion_worker_a'
config.bootstrap.processes.replay.module = 'ion.processes.data.replay.replay_process_a'
self.container.start_rel_from_url('res/deploy/r2dm.yml', config)
self.datastore_name = 'test_datasets'
self.pubsub_management = PubsubManagementServiceClient()
self.ingestion_management = IngestionManagementServiceClient()
self.dataset_management = DatasetManagementServiceClient()
self.process_dispatcher = ProcessDispatcherServiceClient()
self.data_retriever = DataRetrieverServiceClient()
def subscriber_action(self, msg, header):
if not hasattr(self,'received'):
self.received = 0
if not hasattr(self, 'async_done'):
self.async_done = AsyncResult()
self.received += 1
if self.received >= 2:
self.async_done.set(True)
def test_ingest_to_replay(self):
self.async_done = AsyncResult()
sysname = get_sys_name()
datastore = self.container.datastore_manager.get_datastore(self.datastore_name,'SCIDATA')
producer_definition = ProcessDefinition(name='Example Data Producer')
producer_definition.executable = {
'module':'ion.processes.data.example_data_producer',
'class' :'ExampleDataProducer'
}
process_definition_id = self.process_dispatcher.create_process_definition(process_definition=producer_definition)
ingestion_configuration_id = self.ingestion_management.create_ingestion_configuration(
exchange_point_id = 'science_data',
couch_storage=CouchStorage(datastore_name=self.datastore_name,datastore_profile='SCIDATA'),
number_of_workers=1
)
self.ingestion_management.activate_ingestion_configuration(
ingestion_configuration_id=ingestion_configuration_id)
stream_id = self.pubsub_management.create_stream(name='data stream')
dataset_id = self.dataset_management.create_dataset(
stream_id = stream_id,
datastore_name = self.datastore_name,
)
self.ingestion_management.create_dataset_configuration(
dataset_id = dataset_id,
archive_data = True,
archive_metadata = True,
ingestion_configuration_id = ingestion_configuration_id
)
configuration = {
'process': {
'stream_id' : stream_id
}
}
self.process_dispatcher.schedule_process(process_definition_id, configuration=configuration)
replay_id, stream_id = self.data_retriever.define_replay(dataset_id = dataset_id)
subscriber = Subscriber(name=('%s.science_data' % sysname, 'test_queue'), callback=self.subscriber_action, binding='%s.data' % stream_id)
gevent.spawn(subscriber.listen)
done = False
while not done:
results = datastore.query_view('manifest/by_dataset')
if len(results) >= 2:
done = True
self.data_retriever.start_replay(replay_id)
self.async_done.get(timeout=10)
示例5: test_replay_integration
# 需要导入模块: from interface.services.dm.iingestion_management_service import IngestionManagementServiceClient [as 别名]
# 或者: from interface.services.dm.iingestion_management_service.IngestionManagementServiceClient import create_dataset_configuration [as 别名]
def test_replay_integration(self):
'''
Test full DM Services Integration
'''
cc = self.container
### Every thing below here can be run as a script:
pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
ingestion_management_service = IngestionManagementServiceClient(node=cc.node)
dataset_management_service = DatasetManagementServiceClient(node=cc.node)
data_retriever_service = DataRetrieverServiceClient(node=cc.node)
resource_registry_service = ResourceRegistryServiceClient(node=cc.node)
#------------------------------------------------------------------------------------------------------
# Datastore name
#------------------------------------------------------------------------------------------------------
datastore_name = 'test_replay_integration'
#------------------------------------------------------------------------------------------------------
# Spawn process
#------------------------------------------------------------------------------------------------------
pid = cc.spawn_process(name='dummy_process_for_test',
module='pyon.ion.process',
cls='SimpleProcess',
config={})
dummy_process = cc.proc_manager.procs[pid]
#------------------------------------------------------------------------------------------------------
# Set up subscriber
#------------------------------------------------------------------------------------------------------
# Normally the user does not see or create the publisher, this is part of the containers business.
# For the test we need to set it up explicitly
publisher_registrar = StreamPublisherRegistrar(process=dummy_process, node=cc.node)
subscriber_registrar = StreamSubscriberRegistrar(process=cc, node=cc.node)
#------------------------------------------------------------------------------------------------------
# Set up ingestion
#------------------------------------------------------------------------------------------------------
# Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
exchange_point_id='science_data',
couch_storage=CouchStorage(datastore_name=datastore_name, datastore_profile='SCIDATA'),
hdf_storage=HdfStorage(),
number_of_workers=1,
)
ingestion_management_service.activate_ingestion_configuration(
ingestion_configuration_id=ingestion_configuration_id)
#------------------------------------------------------------------------------------------------------
# Grab the transforms acting as ingestion workers
#------------------------------------------------------------------------------------------------------
transforms = [resource_registry_service.read(assoc.o)
for assoc in resource_registry_service.find_associations(ingestion_configuration_id, PRED.hasTransform)]
proc_1 = cc.proc_manager.procs[transforms[0].process_id]
log.info("PROCESS 1: %s" % str(proc_1))
#------------------------------------------------------------------------------------------------------
# Set up the test hooks for the gevent event AsyncResult object
#------------------------------------------------------------------------------------------------------
def ingestion_worker_received(message, headers):
ar.set(message)
proc_1.ingest_process_test_hook = ingestion_worker_received
#------------------------------------------------------------------------------------------------------
# Set up the producers (CTD Simulators)
#------------------------------------------------------------------------------------------------------
ctd_stream_def = ctd_stream_definition()
stream_def_id = pubsub_management_service.create_stream_definition(container=ctd_stream_def, name='Junk definition')
stream_id = pubsub_management_service.create_stream(stream_definition_id=stream_def_id)
#------------------------------------------------------------------------------------------------------
# Set up the dataset config
#------------------------------------------------------------------------------------------------------
dataset_id = dataset_management_service.create_dataset(
stream_id=stream_id,
datastore_name=datastore_name,
view_name='datasets/stream_join_granule'
)
dataset_config_id = ingestion_management_service.create_dataset_configuration(
#.........这里部分代码省略.........
示例6: test_raw_stream_integration
# 需要导入模块: from interface.services.dm.iingestion_management_service import IngestionManagementServiceClient [as 别名]
# 或者: from interface.services.dm.iingestion_management_service.IngestionManagementServiceClient import create_dataset_configuration [as 别名]
def test_raw_stream_integration(self):
cc = self.container
assertions = self.assertTrue
# -----------------------------
# Copy below here to run as a script (don't forget the imports of course!)
# -----------------------------
# Create some service clients...
pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
ingestion_management_service = IngestionManagementServiceClient(node=cc.node)
dataset_management_service = DatasetManagementServiceClient(node=cc.node)
process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)
# declare some handy variables
datastore_name = "test_dm_integration"
datastore = cc.datastore_manager.get_datastore(datastore_name, profile=DataStore.DS_PROFILE.SCIDATA)
###
### And two process definitions...
###
# one for the ctd simulator...
producer_definition = ProcessDefinition(name="Example Data Producer")
producer_definition.executable = {
"module": "ion.processes.data.example_data_producer",
"class": "ExampleDataProducer",
}
producer_procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition)
# ---------------------------
# Set up ingestion - this is an operator concern - not done by SA in a deployed system
# ---------------------------
# Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
log.debug("Calling create_ingestion_configuration")
ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
exchange_point_id="science_data",
couch_storage=CouchStorage(datastore_name=datastore_name, datastore_profile="SCIDATA"),
number_of_workers=1,
)
#
ingestion_management_service.activate_ingestion_configuration(
ingestion_configuration_id=ingestion_configuration_id
)
# ---------------------------
# Set up the producer (CTD Simulator)
# ---------------------------
# Create the stream
stream_id = pubsub_management_service.create_stream(name="A data stream")
# Set up the datasets
dataset_id = dataset_management_service.create_dataset(
stream_id=stream_id, datastore_name=datastore_name, view_name="Undefined!"
)
# Configure ingestion of this dataset
dataset_ingest_config_id = ingestion_management_service.create_dataset_configuration(
dataset_id=dataset_id,
archive_data=True,
archive_metadata=True,
ingestion_configuration_id=ingestion_configuration_id, # you need to know the ingestion configuration id!
)
# Hold onto dataset_ingest_config_id if you want to stop/start ingestion of that dataset by the ingestion service
# Start the ctd simulator to produce some data
configuration = {"process": {"stream_id": stream_id}}
producer_pid = process_dispatcher.schedule_process(
process_definition_id=producer_procdef_id, configuration=configuration
)
found = False
processes = cc.proc_manager.procs.values()
for proc in processes:
if isinstance(proc, IngestionWorker):
found = True
break
self.assertTrue(found, "%s" % cc.proc_manager.procs)
done = False
while not done:
results = datastore.query_view("manifest/by_dataset")
if len(results) >= 5:
done = True