本文整理汇总了Python中tensorflow.contrib.data.python.ops.batching.batch_and_drop_remainder函数的典型用法代码示例。如果您正苦于以下问题:Python batch_and_drop_remainder函数的具体用法?Python batch_and_drop_remainder怎么用?Python batch_and_drop_remainder使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了batch_and_drop_remainder函数的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testBatchAndDropRemainder
def testBatchAndDropRemainder(self):
components = (np.arange(7),
np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis],
np.array(37.0) * np.arange(7))
batch_size = array_ops.placeholder(dtypes.int64, shape=[])
iterator = (
dataset_ops.Dataset.from_tensor_slices(components).apply(
batching.batch_and_drop_remainder(batch_size))
.make_initializable_iterator())
next_element = iterator.get_next()
with self.cached_session() as sess:
for test_batch_size in [1, 3, 7, 10]:
sess.run(iterator.initializer, feed_dict={batch_size: test_batch_size})
num_batches = 7 // test_batch_size
for i in range(num_batches):
result = sess.run(next_element)
for component, result_component in zip(components, result):
for j in range(test_batch_size):
self.assertAllEqual(component[(i * test_batch_size + j)],
result_component[j])
with self.assertRaises(errors.OutOfRangeError):
sess.run(next_element)
示例2: testBatchAndDropRemainderSparseError
def testBatchAndDropRemainderSparseError(self):
def _map_fn(i):
return sparse_tensor.SparseTensor(
indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i
with self.assertRaises(TypeError):
_ = dataset_ops.Dataset.range(10).map(_map_fn).apply(
batching.batch_and_drop_remainder(10))
示例3: testBatchAndDropRemainderShapeInference
def testBatchAndDropRemainderShapeInference(self):
components = (array_ops.placeholder(dtypes.int32), (array_ops.placeholder(
dtypes.int32, shape=[None]), array_ops.placeholder(
dtypes.int32, shape=[20, 30])))
# Test with a statically known batch size.
dataset = (dataset_ops.Dataset.from_tensor_slices(components).apply(
batching.batch_and_drop_remainder(128)))
self.assertIs(None, dataset.output_shapes[0].ndims)
self.assertEqual([128], dataset.output_shapes[1][0].as_list())
self.assertEqual([128, 30], dataset.output_shapes[1][1].as_list())
# Test with a dynamic batch size: the static shape will be unknown, because
# `batch_size` is a placeholder.
batch_size = array_ops.placeholder(dtypes.int64)
dataset = (dataset_ops.Dataset.from_tensor_slices(components).apply(
batching.batch_and_drop_remainder(batch_size)))
self.assertIs(None, dataset.output_shapes[0].ndims)
self.assertEqual([None], dataset.output_shapes[1][0].as_list())
self.assertEqual([None, 30], dataset.output_shapes[1][1].as_list())
示例4: __init__
def __init__(self, dataset, devices, prefetch_on_device=None):
self._devices = devices
# Default to using prefetching in graph mode, unless specified.
# TODO(priyag): Enable prefetching in eager mode.
self._prefetch_on_device = prefetch_on_device
if self._prefetch_on_device is None:
self._prefetch_on_device = not context.executing_eagerly()
assert not (self._prefetch_on_device and context.executing_eagerly()), (
"Prefetching is only supported in graph mode currently")
if self._prefetch_on_device:
self._dataset = dataset
else:
# TODO(priyag): If dropping remainder is not appropriate, find another
# approach to distributing the dataset when not possible to divide evenly.
# Possibly not an issue when we start using PartitionedDataset.
self._dataset = dataset.apply(
batching.batch_and_drop_remainder(len(devices)))
示例5: testBatchAndDropRemainderSparse
def testBatchAndDropRemainderSparse(self):
def _sparse(i):
return sparse_tensor.SparseTensorValue(
indices=[[0]], values=(i * [1]), dense_shape=[1])
iterator = dataset_ops.Dataset.range(12).map(_sparse).apply(
batching.batch_and_drop_remainder(5)).make_initializable_iterator()
init_op = iterator.initializer
get_next = iterator.get_next()
with self.cached_session() as sess:
sess.run(init_op)
for i in range(2):
actual = sess.run(get_next)
expected = sparse_tensor.SparseTensorValue(
indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4],
dense_shape=[5, 1])
self.assertTrue(sparse_tensor.is_sparse(actual))
self.assertSparseValuesEqual(actual, expected)
with self.assertRaises(errors.OutOfRangeError):
sess.run(get_next)
示例6: make_batched_features_dataset
#.........这里部分代码省略.........
```
features: {
"age": FixedLenFeature([], dtype=tf.int64, default_value=-1),
"gender": FixedLenFeature([], dtype=tf.string),
"kws": VarLenFeature(dtype=tf.string),
}
```
And the expected output is:
```python
{
"age": [[0], [-1]],
"gender": [["f"], ["f"]],
"kws": SparseTensor(
indices=[[0, 0], [0, 1], [1, 0]],
values=["code", "art", "sports"]
dense_shape=[2, 2]),
}
```
Args:
file_pattern: List of files or patterns of file paths containing
`Example` records. See `tf.gfile.Glob` for pattern rules.
batch_size: An int representing the number of records to combine
in a single batch.
features: A `dict` mapping feature keys to `FixedLenFeature` or
`VarLenFeature` values. See `tf.parse_example`.
reader: A function or class that can be
called with a `filenames` tensor and (optional) `reader_args` and returns
a `Dataset` of `Example` tensors. Defaults to `tf.data.TFRecordDataset`.
reader_args: Additional arguments to pass to the reader class.
num_epochs: Integer specifying the number of times to read through the
dataset. If None, cycles through the dataset forever. Defaults to `None`.
shuffle: A boolean, indicates whether the input should be shuffled. Defaults
to `True`.
shuffle_buffer_size: Buffer size of the ShuffleDataset. A large capacity
ensures better shuffling but would increase memory usage and startup time.
shuffle_seed: Randomization seed to use for shuffling.
prefetch_buffer_size: Number of feature batches to prefetch in order to
improve performance. Recommended value is the number of batches consumed
per training step (default is 1).
reader_num_threads: Number of threads used to read `Example` records. If >1,
the results will be interleaved.
parser_num_threads: Number of threads to use for parsing `Example` tensors
into a dictionary of `Feature` tensors.
sloppy_ordering: If `True`, reading performance will be improved at
the cost of non-deterministic ordering. If `False`, the order of elements
produced is deterministic prior to shuffling (elements are still
randomized if `shuffle=True`. Note that if the seed is set, then order
of elements after shuffling is deterministic). Defaults to `False`.
drop_final_batch: If `True`, and the batch size does not evenly divide the
input dataset size, the final smaller batch will be dropped. Defaults to
`False`.
Returns:
A dataset of `dict` elements. Each `dict` maps feature keys to
`Tensor` or `SparseTensor` objects.
"""
# Create dataset of all matching filenames
filenames = _get_file_names(file_pattern, False)
dataset = dataset_ops.Dataset.from_tensor_slices(filenames)
if shuffle:
dataset = dataset.shuffle(len(filenames), shuffle_seed)
# Read `Example` records from files as tensor objects.
if reader_args is None:
reader_args = []
# Read files sequentially (if reader_num_threads=1) or in parallel
dataset = dataset.apply(
interleave_ops.parallel_interleave(
lambda filename: reader(filename, *reader_args),
cycle_length=reader_num_threads,
sloppy=sloppy_ordering))
# Extract values if the `Example` tensors are stored as key-value tuples.
if dataset.output_types == (dtypes.string, dtypes.string):
dataset = dataset.map(lambda _, v: v)
# Apply dataset repeat and shuffle transformations.
dataset = _maybe_shuffle_and_repeat(
dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed)
if drop_final_batch:
dataset = dataset.apply(batching.batch_and_drop_remainder(batch_size))
else:
dataset = dataset.batch(batch_size)
# Parse `Example` tensors to a dictionary of `Feature` tensors.
dataset = dataset.map(
lambda x: parsing_ops.parse_example(x, features),
num_parallel_calls=parser_num_threads)
# TODO(rachelim): Add an optional label_name argument for extracting the label
# from the features dictionary, to comply with the type expected by the
# input_fn to a `tf.Estimator.train` or `tf.Estimator.evaluate` function.
dataset = dataset.prefetch(prefetch_buffer_size)
return dataset
示例7: make_tf_record_dataset
def make_tf_record_dataset(
file_pattern,
batch_size,
parser_fn=None,
num_epochs=None,
shuffle=True,
shuffle_buffer_size=None,
shuffle_seed=None,
prefetch_buffer_size=None,
num_parallel_reads=None,
num_parallel_parser_calls=None,
drop_final_batch=False):
"""Reads and optionally parses TFRecord files into a dataset.
Provides common functionality such as batching, optional parsing, shuffling,
and performant defaults.
Args:
file_pattern: List of files or patterns of TFRecord file paths.
See @{tf.gfile.Glob} for pattern rules.
batch_size: An int representing the number of records to combine
in a single batch.
parser_fn: (Optional.) A function accepting string input to parse
and process the record contents. This function must map records
to components of a fixed shape, so they may be batched. By
default, uses the record contents unmodified.
num_epochs: (Optional.) An int specifying the number of times this
dataset is repeated. If None (the default), cycles through the
dataset forever.
shuffle: (Optional.) A bool that indicates whether the input
should be shuffled. Defaults to `True`.
shuffle_buffer_size: (Optional.) Buffer size to use for
shuffling. A large buffer size ensures better shuffling, but
increases memory usage and startup time.
shuffle_seed: (Optional.) Randomization seed to use for shuffling.
prefetch_buffer_size: (Optional.) An int specifying the number of
feature batches to prefetch for performance improvement.
Defaults to auto-tune. Set to 0 to disable prefetching.
num_parallel_reads: (Optional.) Number of threads used to read
records from files. By default or if set to a value >1, the
results will be interleaved.
num_parallel_parser_calls: (Optional.) Number of parallel
records to parse in parallel. Defaults to an automatic selection.
drop_final_batch: (Optional.) Whether the last batch should be
dropped in case its size is smaller than `batch_size`; the
default behavior is not to drop the smaller batch.
Returns:
A dataset, where each element matches the output of `parser_fn`
except it will have an additional leading `batch-size` dimension,
or a `batch_size`-length 1-D tensor of strings if `parser_fn` is
unspecified.
"""
files = dataset_ops.Dataset.list_files(
file_pattern, shuffle=shuffle, seed=shuffle_seed)
if num_parallel_reads is None:
# Note: We considered auto-tuning this value, but there is a concern
# that this affects the mixing of records from different files, which
# could affect training convergence/accuracy, so we are defaulting to
# a constant for now.
num_parallel_reads = 24
dataset = core_readers.TFRecordDataset(
files, num_parallel_reads=num_parallel_reads)
if shuffle_buffer_size is None:
# TODO(josh11b): Auto-tune this value when not specified
shuffle_buffer_size = 10000
dataset = _maybe_shuffle_and_repeat(
dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed)
if parser_fn is None:
if drop_final_batch:
dataset = dataset.apply(batching.batch_and_drop_remainder(batch_size))
else:
dataset = dataset.batch(batch_size)
else:
# TODO(josh11b): if num_parallel_parser_calls is None, use some function
# of num cores instead of map_and_batch's default behavior of one batch.
dataset = dataset.apply(batching.map_and_batch(
parser_fn, batch_size, num_parallel_calls=num_parallel_parser_calls,
drop_remainder=drop_final_batch))
if prefetch_buffer_size is None:
prefetch_buffer_size = -1 # tf.config.data.AUTOTUNE
if prefetch_buffer_size == 0:
return dataset
else:
return dataset.prefetch(buffer_size=prefetch_buffer_size)
示例8: StreamingFilesDataset
#.........这里部分代码省略.........
number to increase throughput. Set to a very small number to reduce memory
consumption. Set to False to skip batching.
sloppy: (Optional.) If `True`, read input data as fast as possible, without
maintaining a deterministic order. Defaults to `False`.
Returns:
A `tf.data.Dataset` with an infinite stream of elements generated by a
parallel interleaving of the set of files matched (or generated) by `files`
with a type is the output of the dataset specified by `filetype`.
Raises:
ValueError: if any argument is not of the expected type.
"""
if filetype is None:
filetype = 'tfrecord'
if isinstance(filetype, str):
if filetype not in _FILETYPE_MAP:
raise ValueError('Unexpected filetype: %s' % filetype)
reader_fn = _FILETYPE_MAP[filetype]
elif callable(filetype):
reader_fn = filetype
else:
raise ValueError('filetype should be a string or a callable')
file_reader_job = file_reader_job or 'coordinator'
worker_job = worker_job or 'tpu_worker'
if filename_shuffle_buffer_size is None:
filename_shuffle_buffer_size = 4096
num_parallel_reads = num_parallel_reads or 8
if batch_transfer_size is None:
batch_transfer_size = 1024
if sloppy is None:
sloppy = False
with ops.device('/job:%s' % file_reader_job):
if isinstance(files, str):
source_dataset = dataset_ops.Dataset.list_files(files)
elif isinstance(files, dataset_ops.Dataset):
source_dataset = files
else:
raise ValueError('files was not a string or a dataset: %s' % files)
if filename_shuffle_buffer_size:
source_dataset = source_dataset.shuffle(
buffer_size=filename_shuffle_buffer_size)
# NOTE: We perform the `repeat` on the source dataset, because the output
# dataset does not currently have enough information to recreate an iterator
# over the source dataset when it reaches the end.
source_dataset = source_dataset.repeat(num_epochs)
source_dataset = source_dataset.apply(
interleave_ops.parallel_interleave(
reader_fn, cycle_length=num_parallel_reads, sloppy=sloppy))
if batch_transfer_size:
# Note: we can safely call batch_and_drop_remainder because we have an
# infinite stream of TFRecords.
source_dataset = source_dataset.apply(
batching.batch_and_drop_remainder(batch_transfer_size))
source_dataset = source_dataset.prefetch(1)
source_iterator = source_dataset.make_one_shot_iterator()
source_handle = source_iterator.string_handle()
@function.Defun(dtypes.string)
def LoadingFunc(h):
remote_iterator = iterator_ops.Iterator.from_string_handle(
h, source_dataset.output_types, source_dataset.output_shapes)
return remote_iterator.get_next()
def MapFn(unused_input):
return functional_ops.remote_call(
args=[source_handle],
Tout=[dtypes.string],
f=LoadingFunc,
target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job)
with ops.device('/job:%s' % worker_job):
# TODO(saeta,mrry): Switch to using _GeneratorDataset.
# identity = lambda x: x
# dummy = constant_op.constant(0)
# output_dataset = dataset_ops._GeneratorDataset(dummy, identity, MapFn,
# identity)
output_dataset = dataset_ops.Dataset.range(2).repeat().map(MapFn)
output_dataset = output_dataset.prefetch(1)
if batch_transfer_size:
# Undo the batching used during the transfer.
output_dataset = output_dataset.apply(batching.unbatch()).prefetch(1)
return output_dataset
示例9: _dataset_fn
def _dataset_fn():
dataset = dataset_ops.Dataset.range(1000).map(math_ops.to_float)
# Want to produce a fixed, known shape, so drop remainder when batching.
dataset = dataset.apply(batching.batch_and_drop_remainder(4))
return dataset