当前位置: 首页>>代码示例>>Python>>正文


Python batching.batch_and_drop_remainder函数代码示例

本文整理汇总了Python中tensorflow.contrib.data.python.ops.batching.batch_and_drop_remainder函数的典型用法代码示例。如果您正苦于以下问题:Python batch_and_drop_remainder函数的具体用法?Python batch_and_drop_remainder怎么用?Python batch_and_drop_remainder使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了batch_and_drop_remainder函数的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: testBatchAndDropRemainder

  def testBatchAndDropRemainder(self):
    components = (np.arange(7),
                  np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis],
                  np.array(37.0) * np.arange(7))

    batch_size = array_ops.placeholder(dtypes.int64, shape=[])

    iterator = (
        dataset_ops.Dataset.from_tensor_slices(components).apply(
            batching.batch_and_drop_remainder(batch_size))
        .make_initializable_iterator())

    next_element = iterator.get_next()

    with self.cached_session() as sess:
      for test_batch_size in [1, 3, 7, 10]:
        sess.run(iterator.initializer, feed_dict={batch_size: test_batch_size})
        num_batches = 7 // test_batch_size
        for i in range(num_batches):
          result = sess.run(next_element)
          for component, result_component in zip(components, result):
            for j in range(test_batch_size):
              self.assertAllEqual(component[(i * test_batch_size + j)],
                                  result_component[j])
        with self.assertRaises(errors.OutOfRangeError):
          sess.run(next_element)
开发者ID:Jordan1237,项目名称:tensorflow,代码行数:26,代码来源:batch_dataset_op_test.py

示例2: testBatchAndDropRemainderSparseError

  def testBatchAndDropRemainderSparseError(self):

    def _map_fn(i):
      return sparse_tensor.SparseTensor(
          indices=[[0, 0]], values=(i * [1]), dense_shape=[1, 1]), i

    with self.assertRaises(TypeError):
      _ = dataset_ops.Dataset.range(10).map(_map_fn).apply(
          batching.batch_and_drop_remainder(10))
开发者ID:SylChan,项目名称:tensorflow,代码行数:9,代码来源:batch_dataset_op_test.py

示例3: testBatchAndDropRemainderShapeInference

  def testBatchAndDropRemainderShapeInference(self):
    components = (array_ops.placeholder(dtypes.int32), (array_ops.placeholder(
        dtypes.int32, shape=[None]), array_ops.placeholder(
            dtypes.int32, shape=[20, 30])))

    # Test with a statically known batch size.
    dataset = (dataset_ops.Dataset.from_tensor_slices(components).apply(
        batching.batch_and_drop_remainder(128)))

    self.assertIs(None, dataset.output_shapes[0].ndims)
    self.assertEqual([128], dataset.output_shapes[1][0].as_list())
    self.assertEqual([128, 30], dataset.output_shapes[1][1].as_list())

    # Test with a dynamic batch size: the static shape will be unknown, because
    # `batch_size` is a placeholder.
    batch_size = array_ops.placeholder(dtypes.int64)
    dataset = (dataset_ops.Dataset.from_tensor_slices(components).apply(
        batching.batch_and_drop_remainder(batch_size)))

    self.assertIs(None, dataset.output_shapes[0].ndims)
    self.assertEqual([None], dataset.output_shapes[1][0].as_list())
    self.assertEqual([None, 30], dataset.output_shapes[1][1].as_list())
开发者ID:Crazyonxh,项目名称:tensorflow,代码行数:22,代码来源:batch_dataset_op_test.py

示例4: __init__

  def __init__(self, dataset, devices, prefetch_on_device=None):
    self._devices = devices

    # Default to using prefetching in graph mode, unless specified.
    # TODO(priyag): Enable prefetching in eager mode.
    self._prefetch_on_device = prefetch_on_device
    if self._prefetch_on_device is None:
      self._prefetch_on_device = not context.executing_eagerly()
    assert not (self._prefetch_on_device and context.executing_eagerly()), (
        "Prefetching is only supported in graph mode currently")

    if self._prefetch_on_device:
      self._dataset = dataset
    else:
      # TODO(priyag): If dropping remainder is not appropriate, find another
      # approach to distributing the dataset when not possible to divide evenly.
      # Possibly not an issue when we start using PartitionedDataset.
      self._dataset = dataset.apply(
          batching.batch_and_drop_remainder(len(devices)))
开发者ID:bikong2,项目名称:tensorflow,代码行数:19,代码来源:values.py

示例5: testBatchAndDropRemainderSparse

  def testBatchAndDropRemainderSparse(self):

    def _sparse(i):
      return sparse_tensor.SparseTensorValue(
          indices=[[0]], values=(i * [1]), dense_shape=[1])

    iterator = dataset_ops.Dataset.range(12).map(_sparse).apply(
        batching.batch_and_drop_remainder(5)).make_initializable_iterator()
    init_op = iterator.initializer
    get_next = iterator.get_next()

    with self.cached_session() as sess:
      sess.run(init_op)
      for i in range(2):
        actual = sess.run(get_next)
        expected = sparse_tensor.SparseTensorValue(
            indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
            values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4],
            dense_shape=[5, 1])
        self.assertTrue(sparse_tensor.is_sparse(actual))
        self.assertSparseValuesEqual(actual, expected)
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(get_next)
开发者ID:Jordan1237,项目名称:tensorflow,代码行数:23,代码来源:batch_dataset_op_test.py

示例6: make_batched_features_dataset


#.........这里部分代码省略.........

  ```
  features: {
    "age": FixedLenFeature([], dtype=tf.int64, default_value=-1),
    "gender": FixedLenFeature([], dtype=tf.string),
    "kws": VarLenFeature(dtype=tf.string),
  }
  ```

  And the expected output is:

  ```python
  {
    "age": [[0], [-1]],
    "gender": [["f"], ["f"]],
    "kws": SparseTensor(
      indices=[[0, 0], [0, 1], [1, 0]],
      values=["code", "art", "sports"]
      dense_shape=[2, 2]),
  }
  ```

  Args:
    file_pattern: List of files or patterns of file paths containing
      `Example` records. See `tf.gfile.Glob` for pattern rules.
    batch_size: An int representing the number of records to combine
      in a single batch.
    features: A `dict` mapping feature keys to `FixedLenFeature` or
      `VarLenFeature` values. See `tf.parse_example`.
    reader: A function or class that can be
      called with a `filenames` tensor and (optional) `reader_args` and returns
      a `Dataset` of `Example` tensors. Defaults to `tf.data.TFRecordDataset`.
    reader_args: Additional arguments to pass to the reader class.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If None, cycles through the dataset forever. Defaults to `None`.
    shuffle: A boolean, indicates whether the input should be shuffled. Defaults
      to `True`.
    shuffle_buffer_size: Buffer size of the ShuffleDataset. A large capacity
      ensures better shuffling but would increase memory usage and startup time.
    shuffle_seed: Randomization seed to use for shuffling.
    prefetch_buffer_size: Number of feature batches to prefetch in order to
      improve performance. Recommended value is the number of batches consumed
      per training step (default is 1).
    reader_num_threads: Number of threads used to read `Example` records. If >1,
      the results will be interleaved.
    parser_num_threads: Number of threads to use for parsing `Example` tensors
      into a dictionary of `Feature` tensors.
    sloppy_ordering: If `True`, reading performance will be improved at
      the cost of non-deterministic ordering. If `False`, the order of elements
      produced is deterministic prior to shuffling (elements are still
      randomized if `shuffle=True`. Note that if the seed is set, then order
      of elements after shuffling is deterministic). Defaults to `False`.
    drop_final_batch: If `True`, and the batch size does not evenly divide the
      input dataset size, the final smaller batch will be dropped. Defaults to
      `False`.

  Returns:
    A dataset of `dict` elements. Each `dict` maps feature keys to
    `Tensor` or `SparseTensor` objects.
  """
  # Create dataset of all matching filenames
  filenames = _get_file_names(file_pattern, False)
  dataset = dataset_ops.Dataset.from_tensor_slices(filenames)
  if shuffle:
    dataset = dataset.shuffle(len(filenames), shuffle_seed)

  # Read `Example` records from files as tensor objects.
  if reader_args is None:
    reader_args = []

  # Read files sequentially (if reader_num_threads=1) or in parallel
  dataset = dataset.apply(
      interleave_ops.parallel_interleave(
          lambda filename: reader(filename, *reader_args),
          cycle_length=reader_num_threads,
          sloppy=sloppy_ordering))

  # Extract values if the `Example` tensors are stored as key-value tuples.
  if dataset.output_types == (dtypes.string, dtypes.string):
    dataset = dataset.map(lambda _, v: v)

  # Apply dataset repeat and shuffle transformations.
  dataset = _maybe_shuffle_and_repeat(
      dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed)

  if drop_final_batch:
    dataset = dataset.apply(batching.batch_and_drop_remainder(batch_size))
  else:
    dataset = dataset.batch(batch_size)

  # Parse `Example` tensors to a dictionary of `Feature` tensors.
  dataset = dataset.map(
      lambda x: parsing_ops.parse_example(x, features),
      num_parallel_calls=parser_num_threads)

  # TODO(rachelim): Add an optional label_name argument for extracting the label
  # from the features dictionary, to comply with the type expected by the
  # input_fn to a `tf.Estimator.train` or `tf.Estimator.evaluate` function.
  dataset = dataset.prefetch(prefetch_buffer_size)
  return dataset
开发者ID:jfreedman0,项目名称:tensorflow,代码行数:101,代码来源:readers.py

示例7: make_tf_record_dataset

def make_tf_record_dataset(
    file_pattern,
    batch_size,
    parser_fn=None,
    num_epochs=None,
    shuffle=True,
    shuffle_buffer_size=None,
    shuffle_seed=None,
    prefetch_buffer_size=None,
    num_parallel_reads=None,
    num_parallel_parser_calls=None,
    drop_final_batch=False):
  """Reads and optionally parses TFRecord files into a dataset.

  Provides common functionality such as batching, optional parsing, shuffling,
  and performant defaults.

  Args:
    file_pattern: List of files or patterns of TFRecord file paths.
      See @{tf.gfile.Glob} for pattern rules.
    batch_size: An int representing the number of records to combine
      in a single batch.
    parser_fn: (Optional.) A function accepting string input to parse
      and process the record contents. This function must map records
      to components of a fixed shape, so they may be batched. By
      default, uses the record contents unmodified.
    num_epochs: (Optional.) An int specifying the number of times this
      dataset is repeated.  If None (the default), cycles through the
      dataset forever.
    shuffle: (Optional.) A bool that indicates whether the input
      should be shuffled. Defaults to `True`.
    shuffle_buffer_size: (Optional.) Buffer size to use for
      shuffling. A large buffer size ensures better shuffling, but
      increases memory usage and startup time.
    shuffle_seed: (Optional.) Randomization seed to use for shuffling.
    prefetch_buffer_size: (Optional.) An int specifying the number of
      feature batches to prefetch for performance improvement.
      Defaults to auto-tune. Set to 0 to disable prefetching.
    num_parallel_reads: (Optional.) Number of threads used to read
      records from files. By default or if set to a value >1, the
      results will be interleaved.
    num_parallel_parser_calls: (Optional.) Number of parallel
      records to parse in parallel. Defaults to an automatic selection.
    drop_final_batch: (Optional.) Whether the last batch should be
      dropped in case its size is smaller than `batch_size`; the
      default behavior is not to drop the smaller batch.

  Returns:
    A dataset, where each element matches the output of `parser_fn`
    except it will have an additional leading `batch-size` dimension,
    or a `batch_size`-length 1-D tensor of strings if `parser_fn` is
    unspecified.
  """
  files = dataset_ops.Dataset.list_files(
      file_pattern, shuffle=shuffle, seed=shuffle_seed)

  if num_parallel_reads is None:
    # Note: We considered auto-tuning this value, but there is a concern
    # that this affects the mixing of records from different files, which
    # could affect training convergence/accuracy, so we are defaulting to
    # a constant for now.
    num_parallel_reads = 24
  dataset = core_readers.TFRecordDataset(
      files, num_parallel_reads=num_parallel_reads)

  if shuffle_buffer_size is None:
    # TODO(josh11b): Auto-tune this value when not specified
    shuffle_buffer_size = 10000
  dataset = _maybe_shuffle_and_repeat(
      dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed)

  if parser_fn is None:
    if drop_final_batch:
      dataset = dataset.apply(batching.batch_and_drop_remainder(batch_size))
    else:
      dataset = dataset.batch(batch_size)
  else:
    # TODO(josh11b): if num_parallel_parser_calls is None, use some function
    # of num cores instead of map_and_batch's default behavior of one batch.
    dataset = dataset.apply(batching.map_and_batch(
        parser_fn, batch_size, num_parallel_calls=num_parallel_parser_calls,
        drop_remainder=drop_final_batch))

  if prefetch_buffer_size is None:
    prefetch_buffer_size = -1  # tf.config.data.AUTOTUNE
  if prefetch_buffer_size == 0:
    return dataset
  else:
    return dataset.prefetch(buffer_size=prefetch_buffer_size)
开发者ID:jfreedman0,项目名称:tensorflow,代码行数:89,代码来源:readers.py

示例8: StreamingFilesDataset


#.........这里部分代码省略.........
      number to increase throughput. Set to a very small number to reduce memory
      consumption. Set to False to skip batching.
    sloppy: (Optional.) If `True`, read input data as fast as possible, without
      maintaining a deterministic order. Defaults to `False`.
  Returns:
    A `tf.data.Dataset` with an infinite stream of elements generated by a
    parallel interleaving of the set of files matched (or generated) by `files`
    with a type is the output of the dataset specified by `filetype`.

  Raises:
    ValueError: if any argument is not of the expected type.
  """
  if filetype is None:
    filetype = 'tfrecord'

  if isinstance(filetype, str):
    if filetype not in _FILETYPE_MAP:
      raise ValueError('Unexpected filetype: %s' % filetype)
    reader_fn = _FILETYPE_MAP[filetype]
  elif callable(filetype):
    reader_fn = filetype
  else:
    raise ValueError('filetype should be a string or a callable')

  file_reader_job = file_reader_job or 'coordinator'

  worker_job = worker_job or 'tpu_worker'

  if filename_shuffle_buffer_size is None:
    filename_shuffle_buffer_size = 4096

  num_parallel_reads = num_parallel_reads or 8

  if batch_transfer_size is None:
    batch_transfer_size = 1024

  if sloppy is None:
    sloppy = False

  with ops.device('/job:%s' % file_reader_job):
    if isinstance(files, str):
      source_dataset = dataset_ops.Dataset.list_files(files)
    elif isinstance(files, dataset_ops.Dataset):
      source_dataset = files
    else:
      raise ValueError('files was not a string or a dataset: %s' % files)

    if filename_shuffle_buffer_size:
      source_dataset = source_dataset.shuffle(
          buffer_size=filename_shuffle_buffer_size)

    # NOTE: We perform the `repeat` on the source dataset, because the output
    # dataset does not currently have enough information to recreate an iterator
    # over the source dataset when it reaches the end.
    source_dataset = source_dataset.repeat(num_epochs)

    source_dataset = source_dataset.apply(
        interleave_ops.parallel_interleave(
            reader_fn, cycle_length=num_parallel_reads, sloppy=sloppy))

    if batch_transfer_size:
      # Note: we can safely call batch_and_drop_remainder because we have an
      # infinite stream of TFRecords.
      source_dataset = source_dataset.apply(
          batching.batch_and_drop_remainder(batch_transfer_size))

    source_dataset = source_dataset.prefetch(1)

    source_iterator = source_dataset.make_one_shot_iterator()
    source_handle = source_iterator.string_handle()

  @function.Defun(dtypes.string)
  def LoadingFunc(h):
    remote_iterator = iterator_ops.Iterator.from_string_handle(
        h, source_dataset.output_types, source_dataset.output_shapes)
    return remote_iterator.get_next()

  def MapFn(unused_input):
    return functional_ops.remote_call(
        args=[source_handle],
        Tout=[dtypes.string],
        f=LoadingFunc,
        target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job)

  with ops.device('/job:%s' % worker_job):
    # TODO(saeta,mrry): Switch to using _GeneratorDataset.

    # identity = lambda x: x
    # dummy = constant_op.constant(0)
    # output_dataset = dataset_ops._GeneratorDataset(dummy, identity, MapFn,
    #                                                identity)

    output_dataset = dataset_ops.Dataset.range(2).repeat().map(MapFn)
    output_dataset = output_dataset.prefetch(1)

    if batch_transfer_size:
      # Undo the batching used during the transfer.
      output_dataset = output_dataset.apply(batching.unbatch()).prefetch(1)

  return output_dataset
开发者ID:QiangCai,项目名称:tensorflow,代码行数:101,代码来源:datasets.py

示例9: _dataset_fn

 def _dataset_fn():
   dataset = dataset_ops.Dataset.range(1000).map(math_ops.to_float)
   # Want to produce a fixed, known shape, so drop remainder when batching.
   dataset = dataset.apply(batching.batch_and_drop_remainder(4))
   return dataset
开发者ID:BhaskarNallani,项目名称:tensorflow,代码行数:5,代码来源:metrics_v1_test.py


注:本文中的tensorflow.contrib.data.python.ops.batching.batch_and_drop_remainder函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。