本文整理汇总了Python中tensorflow.python.data.experimental.ops.interleave_ops.parallel_interleave函数的典型用法代码示例。如果您正苦于以下问题:Python parallel_interleave函数的具体用法?Python parallel_interleave怎么用?Python parallel_interleave使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了parallel_interleave函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: dataset_fn
def dataset_fn(self, input_values, cycle_length, block_length, sloppy,
buffer_output_elements, prefetch_input_elements):
def map_py_fn(x):
self.write_coordination_events[x].wait()
self.write_coordination_events[x].clear()
self.read_coordination_events[x].release()
if self.error:
err = self.error
self.error = None
raise err # pylint: disable=raising-bad-type
return x * x
def map_fn(x):
return script_ops.py_func(map_py_fn, [x], x.dtype)
def interleave_fn(x):
dataset = dataset_ops.Dataset.from_tensors(x)
dataset = dataset.repeat(x)
return dataset.map(map_fn)
return dataset_ops.Dataset.from_tensor_slices(input_values).repeat(
self.repeat_count).apply(
interleave_ops.parallel_interleave(
interleave_fn, cycle_length, block_length, sloppy,
buffer_output_elements, prefetch_input_elements))
示例2: _make_parallel_scan_dataset
def _make_parallel_scan_dataset(self, ds, num_parallel_scans,
normalized_probability, normalized_columns):
"""Builds a parallel dataset from a given range.
Args:
ds: A `_BigtableSampleKeyPairsDataset` returning ranges of keys to use.
num_parallel_scans: The number of concurrent parallel scans to use.
normalized_probability: A number between 0 and 1 for the keep probability.
normalized_columns: The column families and column qualifiers to retrieve.
Returns:
A `tf.data.Dataset` representing the result of the parallel scan.
"""
if num_parallel_scans is None:
num_parallel_scans = 50
ds = ds.shuffle(buffer_size=10000) # TODO(saeta): Make configurable.
def _interleave_fn(start, end):
return _BigtableScanDataset(
self,
prefix="",
start=start,
end=end,
normalized=normalized_columns,
probability=normalized_probability)
# Note prefetch_input_elements must be set in order to avoid rpc timeouts.
ds = ds.apply(
interleave_ops.parallel_interleave(
_interleave_fn,
cycle_length=num_parallel_scans,
sloppy=True,
prefetch_input_elements=1))
return ds
示例3: testWorkersGreaterThanNumFiles
def testWorkersGreaterThanNumFiles(self):
dataset = dataset_ops.Dataset.list_files(self.test_filenames)
dataset = dataset.apply(
interleave_ops.parallel_interleave(core_readers.TFRecordDataset, 10))
dataset = dataset.batch(5)
dataset = distribute._AutoShardDataset(dataset, 500, 499)
self.assertDatasetProduces(dataset, [])
示例4: testShutdownRace
def testShutdownRace(self):
dataset = dataset_ops.Dataset.range(20)
map_fn = lambda x: dataset_ops.Dataset.range(20 * x, 20 * (x + 1))
dataset = dataset.apply(
interleave_ops.parallel_interleave(
map_fn,
cycle_length=3,
sloppy=False,
buffer_output_elements=1,
prefetch_input_elements=0))
dataset = dataset.batch(32)
iterator = dataset.make_initializable_iterator()
next_element = iterator.get_next()
results = []
with self.cached_session() as sess:
for _ in range(2):
elements = []
self.evaluate(iterator.initializer)
try:
while True:
elements.extend(sess.run(next_element))
except errors.OutOfRangeError:
pass
results.append(elements)
self.assertAllEqual(results[0], results[1])
示例5: testZipReaderPipeline
def testZipReaderPipeline(self):
dataset1 = dataset_ops.Dataset.list_files(
self.test_filenames, shuffle=False)
dataset1 = dataset1.apply(
interleave_ops.parallel_interleave(core_readers.TFRecordDataset, 10))
dataset2 = dataset_ops.Dataset.list_files(
self.test_filenames, shuffle=False)
dataset2 = dataset2.apply(
interleave_ops.parallel_interleave(core_readers.TFRecordDataset, 10))
dataset = dataset_ops.Dataset.zip((dataset1, dataset2))
dataset = distribute._AutoShardDataset(dataset, 5, 3)
expected = [
(b"Record %d of file %d" % (r, f), b"Record %d of file %d" % (r, f)) # pylint:disable=g-complex-comprehension
for r in range(0, 10)
for f in (3, 8)
]
self.assertDatasetProduces(dataset, expected)
示例6: parallel_interleave
def parallel_interleave(map_func,
cycle_length,
block_length=1,
sloppy=False,
buffer_output_elements=None,
prefetch_input_elements=None):
"""A parallel version of the `Dataset.interleave()` transformation.
`parallel_interleave()` maps `map_func` across its input to produce nested
datasets, and outputs their elements interleaved. Unlike
`tf.data.Dataset.interleave`, it gets elements from `cycle_length` nested
datasets in parallel, which increases the throughput, especially in the
presence of stragglers. Furthermore, the `sloppy` argument can be used to
improve performance, by relaxing the requirement that the outputs are produced
in a deterministic order, and allowing the implementation to skip over nested
datasets whose elements are not readily available when requested.
Example usage:
```python
# Preprocess 4 files concurrently.
filenames = tf.data.Dataset.list_files("/path/to/data/train*.tfrecords")
dataset = filenames.apply(
tf.data.experimental.parallel_interleave(
lambda filename: tf.data.TFRecordDataset(filename),
cycle_length=4))
```
WARNING: If `sloppy` is `True`, the order of produced elements is not
deterministic.
Args:
map_func: A function mapping a nested structure of tensors to a `Dataset`.
cycle_length: The number of input `Dataset`s to interleave from in parallel.
block_length: The number of consecutive elements to pull from an input
`Dataset` before advancing to the next input `Dataset`.
sloppy: If false, elements are produced in deterministic order. Otherwise,
the implementation is allowed, for the sake of expediency, to produce
elements in a non-deterministic order.
buffer_output_elements: The number of elements each iterator being
interleaved should buffer (similar to the `.prefetch()` transformation for
each interleaved iterator).
prefetch_input_elements: The number of input elements to transform to
iterators before they are needed for interleaving.
Returns:
A `Dataset` transformation function, which can be passed to
`tf.data.Dataset.apply`.
"""
return interleave_ops.parallel_interleave(
map_func, cycle_length, block_length, sloppy, buffer_output_elements,
prefetch_input_elements)
示例7: testConcatenateReaderPipeline
def testConcatenateReaderPipeline(self, shuffle):
dataset1 = dataset_ops.Dataset.list_files(
self.test_filenames, shuffle=shuffle)
dataset1 = dataset1.apply(
interleave_ops.parallel_interleave(core_readers.TFRecordDataset, 10))
dataset1 = dataset1.batch(5)
dataset2 = dataset_ops.Dataset.list_files(
self.test_filenames, shuffle=shuffle)
dataset2 = dataset2.apply(
interleave_ops.parallel_interleave(core_readers.TFRecordDataset, 10))
dataset2 = dataset2.batch(5)
dataset = dataset1.concatenate(dataset2)
dataset = distribute._AutoShardDataset(dataset, 5, 3)
expected = [
b"Record %d of file %d" % (r, f) # pylint:disable=g-complex-comprehension
for r in range(0, 10)
for f in (3, 8)
]
expected += expected
self.assertDatasetProducesWithShuffle(dataset, expected, 5, 8, shuffle)
示例8: sloppy_interleave
def sloppy_interleave(map_func, cycle_length, block_length=1):
"""A non-deterministic version of the `Dataset.interleave()` transformation.
`sloppy_interleave()` maps `map_func` across `dataset`, and
non-deterministically interleaves the results.
The resulting dataset is almost identical to `interleave`. The key
difference is that if retrieving a value from a given output iterator would
cause `get_next` to block, that iterator will be skipped, and consumed
when next available. If consuming from all iterators would cause the
`get_next` call to block, the `get_next` call blocks until the first value is
available.
If the underlying datasets produce elements as fast as they are consumed, the
`sloppy_interleave` transformation behaves identically to `interleave`.
However, if an underlying dataset would block the consumer,
`sloppy_interleave` can violate the round-robin order (that `interleave`
strictly obeys), producing an element from a different underlying
dataset instead.
Example usage:
```python
# Preprocess 4 files concurrently.
filenames = tf.data.Dataset.list_files("/path/to/data/train*.tfrecords")
dataset = filenames.apply(
tf.contrib.data.sloppy_interleave(
lambda filename: tf.data.TFRecordDataset(filename),
cycle_length=4))
```
WARNING: The order of elements in the resulting dataset is not
deterministic. Use `Dataset.interleave()` if you want the elements to have a
deterministic order.
Args:
map_func: A function mapping a nested structure of tensors (having shapes
and types defined by `self.output_shapes` and `self.output_types`) to a
`Dataset`.
cycle_length: The number of input `Dataset`s to interleave from in parallel.
block_length: The number of consecutive elements to pull from an input
`Dataset` before advancing to the next input `Dataset`. Note:
`sloppy_interleave` will skip the remainder of elements in the
`block_length` in order to avoid blocking.
Returns:
A `Dataset` transformation function, which can be passed to
`tf.data.Dataset.apply`.
"""
return interleave_ops.parallel_interleave(
map_func, cycle_length, block_length, sloppy=True)
示例9: testPipelineWithMap
def testPipelineWithMap(self, shuffle):
dataset = dataset_ops.Dataset.list_files(self.test_filenames, shuffle=False)
dataset = dataset.apply(
interleave_ops.parallel_interleave(core_readers.TFRecordDataset, 10))
dataset = dataset.map(lambda x: string_ops.substr_v2(x, 2, 1000))
dataset = dataset.batch(5)
dataset = distribute._AutoShardDataset(dataset, 5, 3)
expected = [
b"cord %d of file %d" % (r, f) # pylint:disable=g-complex-comprehension
for r in range(0, 10)
for f in (3, 8)
]
self.assertDatasetProducesWithShuffle(dataset, expected, 5, 4, shuffle)
示例10: testErrorsInInputFn
def testErrorsInInputFn(self):
def map_py_fn(x):
if x == 5:
raise ValueError()
return x
def map_fn(x):
return script_ops.py_func(map_py_fn, [x], x.dtype)
def interleave_fn(x):
dataset = dataset_ops.Dataset.from_tensors(x)
dataset = dataset.repeat(x)
return dataset
self.dataset = (
dataset_ops.Dataset.from_tensor_slices(self.input_values).map(map_fn)
.repeat(self.repeat_count).apply(
interleave_ops.parallel_interleave(interleave_fn, self.cycle_length,
self.block_length, self.sloppy,
self.buffer_output_elements,
self.prefetch_input_elements)))
self.iterator = self.dataset.make_initializable_iterator()
self.init_op = self.iterator.initializer
self.next_element = self.iterator.get_next()
with self.cached_session() as sess:
sess.run(
self.init_op,
feed_dict={
self.input_values: [4, 5, 6],
self.cycle_length: 2,
self.block_length: 1,
self.sloppy: False,
self.buffer_output_elements: 1,
self.prefetch_input_elements: 0,
})
for i, expected_element in enumerate(
self._interleave([[4] * 4, [5], [6] * 6] * self.repeat_count, 2, 1)):
if expected_element == 5:
with self.assertRaises(errors.InvalidArgumentError):
sess.run(self.next_element)
else:
actual_element = sess.run(self.next_element)
self.assertEqual(expected_element, actual_element,
"At index %s: %s expected, got: %s" %
(i, expected_element, actual_element))
with self.assertRaises(errors.OutOfRangeError):
sess.run(self.next_element)
示例11: setUp
def setUp(self):
self.input_values = array_ops.placeholder(dtypes.int64, shape=[None])
self.cycle_length = array_ops.placeholder(dtypes.int64, shape=[])
self.block_length = array_ops.placeholder(dtypes.int64, shape=[])
self.sloppy = array_ops.placeholder(dtypes.bool, shape=[])
self.buffer_output_elements = array_ops.placeholder(dtypes.int64, shape=[])
self.prefetch_input_elements = array_ops.placeholder(dtypes.int64, shape=[])
self.error = None
self.repeat_count = 2
# Set up threading events used to sequence when items are produced that
# are subsequently interleaved. These events allow us to deterministically
# simulate slowdowns and force sloppiness.
self.read_coordination_events = {}
self.write_coordination_events = {}
# input values [4, 5, 6] are the common case for the tests; set defaults
for i in range(4, 7):
self.read_coordination_events[i] = threading.Semaphore(0)
self.write_coordination_events[i] = threading.Event()
def map_py_fn(x):
self.write_coordination_events[x].wait()
self.write_coordination_events[x].clear()
self.read_coordination_events[x].release()
if self.error:
err = self.error
self.error = None
raise err # pylint: disable=raising-bad-type
return x * x
def map_fn(x):
return script_ops.py_func(map_py_fn, [x], x.dtype)
def interleave_fn(x):
dataset = dataset_ops.Dataset.from_tensors(x)
dataset = dataset.repeat(x)
return dataset.map(map_fn)
self.dataset = (
dataset_ops.Dataset.from_tensor_slices(self.input_values)
.repeat(self.repeat_count).apply(
interleave_ops.parallel_interleave(interleave_fn, self.cycle_length,
self.block_length, self.sloppy,
self.buffer_output_elements,
self.prefetch_input_elements)))
self.iterator = self.dataset.make_initializable_iterator()
self.init_op = self.iterator.initializer
self.next_element = self.iterator.get_next()
示例12: testValidPipelineWithRangeDataset
def testValidPipelineWithRangeDataset(self, shuffle):
dataset = dataset_ops.Dataset.range(self._num_files)
dataset = dataset.map(lambda n: string_ops.string_join( # pylint:disable=g-long-lambda
[self.get_temp_dir(),
string_ops.string_format("/tf_record.{}.txt", [n])]))
dataset = dataset.apply(
interleave_ops.parallel_interleave(core_readers.TFRecordDataset, 10))
dataset = dataset.map(lambda x: string_ops.substr_v2(x, 2, 1000))
dataset = dataset.batch(5)
dataset = distribute._AutoShardDataset(dataset, 5, 3)
expected = [
b"cord %d of file %d" % (r, f) # pylint:disable=g-complex-comprehension
for r in range(0, 10)
for f in (3, 8)
]
self.assertDatasetProducesWithShuffle(dataset, expected, 5, 4, shuffle)
示例13: testSparse
def testSparse(self):
def _map_fn(i):
return sparse_tensor.SparseTensor(
indices=[[0, 0], [1, 1]], values=(i * [1, -1]), dense_shape=[2, 2])
def _interleave_fn(x):
return dataset_ops.Dataset.from_tensor_slices(
sparse_ops.sparse_to_dense(x.indices, x.dense_shape, x.values))
dataset = dataset_ops.Dataset.range(10).map(_map_fn).apply(
interleave_ops.parallel_interleave(_interleave_fn, cycle_length=1))
get_next = self.getNext(dataset)
for i in range(10):
for j in range(2):
expected = [i, 0] if j % 2 == 0 else [0, -i]
self.assertAllEqual(expected, self.evaluate(get_next()))
with self.assertRaises(errors.OutOfRangeError):
self.evaluate(get_next())
示例14: _testTooManyReaders
def _testTooManyReaders(self, sloppy=False):
def interleave_fn(x):
dataset = dataset_ops.Dataset.from_tensors(x)
dataset = dataset.repeat(math_ops.cast(x, dtype=dtypes.int64))
return dataset
dataset = dataset_ops.Dataset.from_tensor_slices([4, 5, 6])
dataset = dataset.repeat(self.repeat_count)
dataset = dataset.apply(
interleave_ops.parallel_interleave(
interleave_fn, cycle_length=16, block_length=2, sloppy=sloppy))
get_next = self.getNext(dataset)
output_values = []
for _ in range(30):
output_values.append(self.evaluate(get_next()))
expected_values = self._interleave(
[[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 1, 2)
self.assertItemsEqual(output_values, expected_values)
示例15: testShutdownRace
def testShutdownRace(self):
dataset = dataset_ops.Dataset.range(20)
map_fn = lambda x: dataset_ops.Dataset.range(20 * x, 20 * (x + 1))
dataset = dataset.apply(
interleave_ops.parallel_interleave(
map_fn,
cycle_length=3,
sloppy=False,
buffer_output_elements=1,
prefetch_input_elements=0))
dataset = dataset.batch(32)
results = []
for _ in range(2):
elements = []
next_element = self.getNext(dataset)
try:
while True:
elements.extend(self.evaluate(next_element()))
except errors.OutOfRangeError:
pass
results.append(elements)
self.assertAllEqual(results[0], results[1])