本文整理汇总了Python中tensorflow.python.distribute.cluster_resolver.TPUClusterResolver类的典型用法代码示例。如果您正苦于以下问题:Python TPUClusterResolver类的具体用法?Python TPUClusterResolver怎么用?Python TPUClusterResolver使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了TPUClusterResolver类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testSimpleSuccessfulRetrieval
def testSimpleSuccessfulRetrieval(self):
tpu_map = {
'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': {
'ipAddress': '10.1.2.3',
'port': '8470',
'health': 'HEALTHY'
}
}
resolver = TPUClusterResolver(
project='test-project',
zone='us-central1-c',
tpu=['test-tpu-1'],
coordinator_name='coordinator',
coordinator_address='10.128.1.5:10203',
credentials=None,
service=self.mock_service_client(tpu_map=tpu_map))
actual_cluster_spec = resolver.cluster_spec()
expected_proto = """
job { name: 'coordinator' tasks { key: 0 value: '10.128.1.5:10203' } }
job { name: 'worker' tasks { key: 0 value: '10.1.2.3:8470' } }
"""
self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
self.assertEqual(resolver.master(), 'grpc://10.1.2.3:8470')
示例2: testGkeEnvironmentForPod
def testGkeEnvironmentForPod(self):
os.environ['KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS'] = ('grpc://10.120.27.5:8470,'
'grpc://10.120.27.6:8470,'
'grpc://10.120.27.7:8470,'
'grpc://10.120.27.8:8470')
self.assertIn('KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS', os.environ)
self.assertTrue(TPUClusterResolver._inGke())
self.assertEqual(
compat.as_bytes('grpc://10.120.27.5:8470,'
'grpc://10.120.27.6:8470,'
'grpc://10.120.27.7:8470,'
'grpc://10.120.27.8:8470'),
compat.as_bytes(TPUClusterResolver._gkeEndpoints()))
resolver = TPUClusterResolver()
self.assertEqual(
compat.as_bytes('grpc://10.120.27.5:8470'),
compat.as_bytes(resolver.master()))
actual_cluster_spec = resolver.cluster_spec()
expected_proto = """
job {
name: 'worker'
tasks { key: 0 value: '10.120.27.5:8470' }
tasks { key: 1 value: '10.120.27.6:8470' }
tasks { key: 2 value: '10.120.27.7:8470' }
tasks { key: 3 value: '10.120.27.8:8470' }
}
"""
self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
del os.environ['KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS']
示例3: testNewNetworkEndpointFormat
def testNewNetworkEndpointFormat(self):
tpu_map = {
'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': {
'health': 'HEALTHY',
'networkEndpoints': [{
'ipAddress': '10.2.3.4',
'port': 8470,
}]
}
}
resolver = TPUClusterResolver(
project='test-project',
zone='us-central1-c',
tpu='test-tpu-1',
coordinator_name='coordinator',
coordinator_address='10.128.1.5:10203',
credentials=None,
service=self.mock_service_client(tpu_map=tpu_map))
actual_cluster_spec = resolver.cluster_spec()
expected_proto = """
job { name: 'coordinator' tasks { key: 0 value: '10.128.1.5:10203' } }
job { name: 'worker' tasks { key: 0 value: '10.2.3.4:8470' } }
"""
self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
self.assertEqual('grpc://10.2.3.4:8470', resolver.master())
示例4: testRetrieveProjectAndZoneFromMetadata
def testRetrieveProjectAndZoneFromMetadata(self):
tpu_map = {
'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': {
'ipAddress': '10.1.2.3',
'port': '8470',
'health': 'HEALTHY'
}
}
resolver = TPUClusterResolver(
project=None,
zone=None,
tpu=['test-tpu-1'],
credentials=None,
service=self.mock_service_client(tpu_map=tpu_map),
coordinator_name='coordinator')
actual_cluster_spec = resolver.cluster_spec()
expected_proto = """
job {
name: 'coordinator'
tasks { key: 0 value: '10.128.1.2:%s' }
}
job {
name: 'worker'
tasks { key: 0 value: '10.1.2.3:8470' }
}
""" % resolver._coordinator_port
self._verifyClusterSpecEquality(actual_cluster_spec, str(expected_proto))
self.assertEqual(resolver.master(), 'grpc://10.1.2.3:8470')
示例5: testNumAcceleratorsRetryFailure
def testNumAcceleratorsRetryFailure(self, mock_list_devices,
mock_eager_list_devices):
resolver = TPUClusterResolver(tpu='')
mock_list_devices.side_effect = errors.DeadlineExceededError(
None, None, 'timeout')
mock_eager_list_devices.side_effect = errors.DeadlineExceededError(
None, None, 'timeout')
with self.assertRaises(RuntimeError):
resolver.num_accelerators()
示例6: verifyShouldResolve
def verifyShouldResolve(self, tpu, should_resolve):
resolver = TPUClusterResolver(
project='test-project',
zone='us-central1-c',
tpu=tpu,
coordinator_name=None,
credentials=None,
service=self.mock_service_client(tpu_map={}))
self.assertEqual(should_resolve, resolver._shouldResolve(),
"TPU: '%s'" % tpu)
示例7: testVerifySameCoreCount
def testVerifySameCoreCount(self):
self.assertEqual(
TPUClusterResolver._verify_and_return_same_core_count(
{0: [0, 1, 2, 3, 4, 5, 6, 7]}), 8)
self.assertEqual(
TPUClusterResolver._verify_and_return_same_core_count(
{0: [0, 1], 1: [2, 3]}), 2)
with self.assertRaises(RuntimeError):
TPUClusterResolver._verify_and_return_same_core_count(
{0: [0], 1: [1, 2]})
示例8: testPodResolution
def testPodResolution(self):
tpu_map = {
'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': {
'health':
'HEALTHY',
'networkEndpoints': [
{
'ipAddress': '10.2.3.4',
'port': 8470,
},
{
'ipAddress': '10.2.3.5',
'port': 8470,
},
{
'ipAddress': '10.2.3.6',
'port': 8470,
},
{
'ipAddress': '10.2.3.7',
'port': 8470,
},
]
}
}
resolver = TPUClusterResolver(
tpu='test-tpu-1',
credentials=None,
service=self.mock_service_client(tpu_map=tpu_map),
coordinator_name='coordinator')
actual_cluster_spec = resolver.cluster_spec()
expected_proto = """
job {
name: 'coordinator',
tasks { key: 0 value: '10.128.1.2:%s'}
}
job {
name: 'worker'
tasks { key: 0 value: '10.2.3.4:8470' }
tasks { key: 1 value: '10.2.3.5:8470' }
tasks { key: 2 value: '10.2.3.6:8470' }
tasks { key: 3 value: '10.2.3.7:8470' }
}
""" % resolver._coordinator_port
self._verifyClusterSpecEquality(actual_cluster_spec, str(expected_proto))
self.assertEqual(resolver.master(), 'grpc://10.2.3.4:8470')
示例9: initialize_tpu_system
def initialize_tpu_system(cluster_resolver=None):
"""Initialize the TPU devices in a separate session and graph.
Args:
cluster_resolver: A tf.contrib.cluster_resolver.TPUClusterResolver,
which provides information about the TPU cluster.
Returns:
The tf.contrib.tpu.Topology object for the topology of the TPU cluster.
"""
if cluster_resolver is None:
cluster_resolver = TPUClusterResolver("")
master = cluster_resolver.master()
logging.info("Initializing the TPU system.")
if context.executing_eagerly():
# This function looks as it is for the following non-intuitive reasons.
# tpu.initialize_system creates a dummy op whose sole purpose is to trigger
# DistributedTPURewritePass. This pass actually adds real ops that
# initialize the TPU system. Thus, we can't simply run tpu.initialize_system
# eagerly. We need to wrap it in defun and trigger the rewrite passes on it.
# The easiest way to trigger a rewrite is to run the function with
# TPUPartitionedCallOp.
@function.defun
def _tpu_init_fn():
return tpu.initialize_system()
# We can't call _tpu_init_fn normally (because it contains just a dummy op,
# see above) but need to define it to get it added to eager context
# and get its assigned name.
# pylint: disable=protected-access
graph_func = _tpu_init_fn._get_concrete_function_internal()
func_name = compat.as_str(graph_func._inference_function.name)
# pylint: enable=protected-access
output = tpu_functional_ops.TPUPartitionedCall(
args=[], device_ordinal=0, Tout=[dtypes.string], f=func_name)
serialized_topology = output[0].numpy()
else:
session_config = config_pb2.ConfigProto(allow_soft_placement=True)
with ops.Graph().as_default():
with session_lib.Session(config=session_config, target=master) as sess:
serialized_topology = sess.run(tpu.initialize_system())
logging.info("Finished initializing TPU system.")
return topology.Topology(serialized=serialized_topology)
示例10: testNotReadyCloudTpu
def testNotReadyCloudTpu(self):
tpu_map = {
'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': {
'ipAddress': '10.1.2.3',
'port': '8470',
'state': 'CREATING'
}
}
resolver = TPUClusterResolver(
project=None,
zone=None,
tpu='test-tpu-1',
coordinator_name=None,
credentials=None,
service=self.mock_service_client(tpu_map=tpu_map))
with self.assertRaises(RuntimeError):
resolver.cluster_spec()
示例11: testNumAcceleratorsSuccess
def testNumAcceleratorsSuccess(self, mock_list_devices):
device_names = [
'/job:tpu_worker/task:0/device:TPU:0',
'/job:tpu_worker/task:1/device:TPU:1',
'/job:tpu_worker/task:2/device:TPU:0',
'/job:tpu_worker/task:3/device:TPU:1',
'/job:tpu_worker/task:0/device:TPU:4',
'/job:tpu_worker/task:1/device:TPU:5',
'/job:tpu_worker/task:2/device:TPU:4',
'/job:tpu_worker/task:3/device:TPU:5',
]
device_list = [
session._DeviceAttributes(
name, 'TPU', 1024, 0) for name in device_names
]
mock_list_devices.return_value = device_list
resolver = TPUClusterResolver(tpu='')
self.assertEqual(resolver.num_accelerators(), 2)
示例12: initialize_tpu_system
def initialize_tpu_system(cluster_resolver=None):
"""Initialize the TPU devices in a separate session and graph.
Args:
cluster_resolver: A tf.contrib.cluster_resolver.TPUClusterResolver,
which provides information about the TPU cluster.
Returns:
The tf.contrib.tpu.Topology object for the topology of the TPU cluster.
"""
if cluster_resolver is None:
cluster_resolver = TPUClusterResolver("")
master = cluster_resolver.master()
logging.info("Initializing the TPU system.")
session_config = config_pb2.ConfigProto(allow_soft_placement=True)
with ops.Graph().as_default():
with session_lib.Session(config=session_config, target=master) as sess:
serialized_topology = sess.run(tpu.initialize_system())
logging.info("Finished initializing TPU system.")
return topology.Topology(serialized=serialized_topology)
示例13: testGetDeviceDictAndCoresWithCPUsAndGPUs
def testGetDeviceDictAndCoresWithCPUsAndGPUs(self):
device_names = [
'/job:tpu_worker/task:0/device:CPU:0',
'/job:tpu_worker/task:1/device:CPU:0',
'/job:tpu_worker/task:2/device:CPU:0',
'/job:tpu_worker/task:3/device:CPU:0',
'/job:tpu_worker/task:0/device:GPU:1',
'/job:tpu_worker/task:1/device:GPU:1',
'/job:tpu_worker/task:2/device:GPU:1',
'/job:tpu_worker/task:3/device:GPU:1',
]
device_list = [
session._DeviceAttributes(
name, 'XLA', 1024, 0) for name in device_names
]
device_dict, num_cores = TPUClusterResolver._get_device_dict_and_cores(
device_list)
self.assertEqual(num_cores, 0)
self.assertEqual(device_dict, {})
示例14: testOverrideTaskTypeAndIndexAndGetMaster
def testOverrideTaskTypeAndIndexAndGetMaster(self):
tpu_map = {
'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': {
'health':
'HEALTHY',
'networkEndpoints': [
{
'ipAddress': '10.2.3.4',
'port': 8470,
},
{
'ipAddress': '10.2.3.5',
'port': 8470,
},
{
'ipAddress': '10.2.3.6',
'port': 8470,
},
{
'ipAddress': '10.2.3.7',
'port': 8470,
},
]
}
}
resolver = TPUClusterResolver(
project='test-project',
zone='us-central1-c',
tpu='test-tpu-1',
coordinator_name=None,
credentials=None,
service=self.mock_service_client(tpu_map=tpu_map))
self.assertEqual(resolver.master(), 'grpc://10.2.3.4:8470')
resolver.task_type = 'worker'
resolver.task_id = 3
self.assertEqual(resolver.master(), 'grpc://10.2.3.7:8470')
self.assertEqual(
resolver.master(
task_type='worker', task_id=2, rpc_layer='test'),
'test://10.2.3.6:8470')
示例15: testGetDeviceDictAndCoresWithTPUs
def testGetDeviceDictAndCoresWithTPUs(self):
device_names = [
'/job:tpu_worker/task:0/device:TPU:0',
'/job:tpu_worker/task:1/device:TPU:1',
'/job:tpu_worker/task:2/device:TPU:0',
'/job:tpu_worker/task:3/device:TPU:1',
'/job:tpu_worker/task:0/device:TPU:4',
'/job:tpu_worker/task:1/device:TPU:5',
'/job:tpu_worker/task:2/device:TPU:4',
'/job:tpu_worker/task:3/device:TPU:5',
]
device_list = [
session._DeviceAttributes(
name, 'TPU', 1024, 0) for name in device_names
]
device_details = TPUClusterResolver._get_device_dict_and_cores(
device_list)
self.assertEqual(device_details.total_cores, 8)
self.assertEqual(device_details.device_map,
{'0': ['0', '4'],
'1': ['1', '5'],
'2': ['0', '4'],
'3': ['1', '5']})