Python tensorflow_datasets.as_numpy方法代码示例

本文整理汇总了Python中tensorflow_datasets.as_numpy方法的典型用法代码示例。如果您正苦于以下问题：Python tensorflow_datasets.as_numpy方法的具体用法？Python tensorflow_datasets.as_numpy怎么用？Python tensorflow_datasets.as_numpy使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorflow_datasets的用法示例。

在下文中一共展示了tensorflow_datasets.as_numpy方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_generic_text_dataset_preprocess_fn

# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def test_generic_text_dataset_preprocess_fn(self):
    dataset = _load_dataset('squad')

    example, = tfds.as_numpy(dataset.take(1))

    self.assertNotIn('inputs', example)
    self.assertNotIn('targets', example)

    proc_dataset = tf_inputs.generic_text_dataset_preprocess_fn(
        dataset, spm_path=_spm_path(),
        text_preprocess_fns=[lambda ds, training: t5_processors.squad(ds)],
        copy_plaintext=True,
        debug_print_examples=True,
        debug_print_examples_rate=1.0)

    proc_example, = tfds.as_numpy(proc_dataset.take(1))

    self.assertIn('inputs', proc_example)
    self.assertIn('targets', proc_example)

    self.assertEqual(proc_example['inputs'].dtype, np.int64)
    self.assertEqual(proc_example['targets'].dtype, np.int64)

  # TODO(afrozm): Why does this test take so much time?

开发者ID:google，项目名称:trax，代码行数:26，代码来源:tf_inputs_test.py

示例2: _log_padding_fractions

# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def _log_padding_fractions(dataset, sequence_length, num_examples=100):
  """Empirically compute the fraction of padding - log the results.

  Args:
    dataset: a tf.data.Dataset
    sequence_length: dict from string to int (packed lengths)
    num_examples: an integer
  """
  logging.info("computing padding fractions")
  keys = sequence_length.keys()
  padding_frac = {k: 0 for k in keys}
  for ex in tfds.as_numpy(dataset.take(num_examples)):
    for k in keys:
      padding_frac[k] += 1 - (sequence_length[k] / len(ex[k]))
  for k in keys:
    logging.info("%s padding fraction = %g", k, padding_frac[k])

开发者ID:google-research，项目名称:text-to-text-transfer-transformer，代码行数:18，代码来源:utils.py

示例3: _get_comparable_examples_from_ds

# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def _get_comparable_examples_from_ds(ds):
  """Puts dataset into format that allows examples to be compared in Py2/3."""
  examples = []
  def _clean_value(v):
    if isinstance(v, bytes):
      return tf.compat.as_text(v)
    if isinstance(v, np.ndarray):
      if isinstance(v[0], bytes):
        return tuple(tf.compat.as_text(s) for s in v)
      return tuple(v)
    return v

  for ex in tfds.as_numpy(ds):
    examples.append(
        tuple((k, _clean_value(v)) for k, v in sorted(ex.items())))
  return examples

开发者ID:google-research，项目名称:text-to-text-transfer-transformer，代码行数:18，代码来源:test_utils.py

示例4: _emit_tokenized_examples

# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def _emit_tokenized_examples(self, shard_instruction):
    """Emits examples keyed by shard path and index for a single shard."""
    _import_modules(self._modules_to_import)
    logging.info("Processing shard: %s", shard_instruction)
    self._increment_counter("input-shards")

    ds = self._task.tfds_dataset.load_shard(shard_instruction)

    if self._max_input_examples:
      num_shard_examples = int(
          self._max_input_examples / len(self.files))
      ds = ds.repeat().take(num_shard_examples)

    ds = self._task.preprocess_text(ds)
    ds = t5.data.encode_string_features(
        ds, self._task.output_features, keys=self._task.output_features,
        copy_plaintext=True)

    for ex in tfds.as_numpy(ds):
      self._increment_counter("examples")
      yield ex

开发者ID:google-research，项目名称:text-to-text-transfer-transformer，代码行数:23，代码来源:cache_tasks_main.py

示例5: test_batch_fun

# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def test_batch_fun(self):
    dataset = test_dataset_ints([32])
    dataset = dataset.repeat(10)
    batches = inputs.batch_fun(
        dataset, True, ([None], [None]), [], 1, batch_size=10)
    count = 0
    for example in tfds.as_numpy(batches):
      count += 1
      self.assertEqual(example[0].shape[0], 10)  # Batch size = 10.
    self.assertEqual(count, 1)  # Just one batch here.

开发者ID:yyht，项目名称:BERT，代码行数:12，代码来源:inputs_test.py

示例6: test_batch_fun_n_devices

# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def test_batch_fun_n_devices(self):
    dataset = test_dataset_ints([32])
    dataset = dataset.repeat(9)
    batches = inputs.batch_fun(
        dataset, True, ([None], [None]), [], 9, batch_size=10)
    count = 0
    for example in tfds.as_numpy(batches):
      count += 1
      # Batch size adjusted to be divisible by n_devices.
      self.assertEqual(example[0].shape[0], 9)
    self.assertEqual(count, 1)  # Just one batch here.

开发者ID:yyht，项目名称:BERT，代码行数:13，代码来源:inputs_test.py

示例7: test_dataset_items

# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def test_dataset_items(self):
    builder = shapenet.Shapenet(data_dir=self.tmp_dir)
    self._download_and_prepare_as_dataset(builder)
    for split_name in self.SPLITS:
      items = tfds.as_numpy(builder.as_dataset(split=split_name))
      for item in items:
        expected = self.EXPECTED_ITEMS[split_name][item['model_id']]
        self.assertEqual(item['label'],
                         self._encode_synset(builder, expected['synset']))
        self.assertLen(item['trimesh']['vertices'], expected['num_vertices'])
        self.assertLen(item['trimesh']['faces'], expected['num_faces'])

开发者ID:tensorflow，项目名称:graphics，代码行数:13，代码来源:shapenet_test.py

示例8: test_c4_bare_preprocess_fn

# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def test_c4_bare_preprocess_fn(self):
    dataset = _c4_dataset()

    example = list(tfds.as_numpy(dataset.take(1)))[0]

    # Targets are NOT in the example.
    self.assertNotIn('targets', example)
    self.assertIn('text', example)
    text = example['text']

    # This should convert the dataset to an inputs/targets that are tokenized.
    dataset = tf_inputs.c4_bare_preprocess_fn(dataset, spm_path=_spm_path())

    example = list(tfds.as_numpy(dataset.take(1)))[0]

    # Earlier text is now stored in targets_plaintext
    self.assertIn('targets_plaintext', example)
    self.assertEqual(example['targets_plaintext'], text)

    # Targets are now tokenized.
    self.assertIn('targets', example)
    self.assertIsInstance(example['targets'], np.ndarray)
    self.assertEqual(example['targets'].dtype, np.int64)
    self.assertGreater(len(example['targets']), 0)
    self.assertEqual(example['targets'][-1], 1)  # we add EOS at the end.

    # Inputs exist but is empty because t5 preprocessors' unsupervised wasn't
    # gin configured with any.
    self.assertIn('inputs', example)
    self.assertEqual(len(example['inputs']), 0)

开发者ID:google，项目名称:trax，代码行数:32，代码来源:tf_inputs_test.py

示例9: test_c4_bare_preprocess_fn_denoising_objective

# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def test_c4_bare_preprocess_fn_denoising_objective(self):
    _t5_gin_config()

    dataset = _c4_dataset()
    dataset = tf_inputs.c4_bare_preprocess_fn(dataset, spm_path=_spm_path())

    example = list(tfds.as_numpy(dataset.take(1)))[0]

    # Assertions now.

    self.assertIn('targets', example)
    targets = example['targets']
    self.assertIsInstance(targets, np.ndarray)
    self.assertEqual(targets.dtype, np.int64)
    self.assertGreater(len(targets), 0)

    self.assertIn('inputs', example)
    _inputs = example['inputs']  # pylint: disable=invalid-name
    self.assertIsInstance(_inputs, np.ndarray)
    self.assertEqual(_inputs.dtype, np.int64)
    self.assertGreater(len(_inputs), 0)

    # WHP inputs will have the bulk of the text.
    self.assertGreater(len(_inputs), len(targets))

    # WHP there will be two sentinel tokens in the inputs and targets.
    inputs_counter = collections.Counter(_inputs.tolist())
    targets_counter = collections.Counter(targets.tolist())
    self.assertEqual(1, inputs_counter[31999])
    self.assertEqual(1, inputs_counter[31998])
    self.assertEqual(1, targets_counter[31999])
    self.assertEqual(1, targets_counter[31998])

开发者ID:google，项目名称:trax，代码行数:34，代码来源:tf_inputs_test.py

示例10: _train_and_eval_dataset_v1

# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def _train_and_eval_dataset_v1(problem_name, data_dir,
                               train_shuffle_files, eval_shuffle_files):
  """Return train and evaluation datasets, feature info and supervised keys."""
  with tf.device('cpu:0'):
    problem = t2t_problems().problem(problem_name)
    hparams = None
    if problem_name == 'video_bair_robot_pushing':
      hparams = problem.get_hparams()
      bair_robot_pushing_hparams(hparams)
    train_dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN, data_dir,
                                    shuffle_files=train_shuffle_files,
                                    hparams=hparams)
    train_dataset = train_dataset.map(_select_features)
    eval_dataset = problem.dataset(tf.estimator.ModeKeys.EVAL, data_dir,
                                   shuffle_files=eval_shuffle_files,
                                   hparams=hparams)
    eval_dataset = eval_dataset.map(_select_features)
    # TODO(lukaszkaiser): remove this need for one example, just input_key.
    examples = list(tfds.as_numpy(train_dataset.take(1)))
  # We use 'inputs' as input except for purely auto-regressive tasks like
  # language models where 'targets' are used as input_key.
  input_key = 'inputs' if 'inputs' in examples[0] else 'targets'
  supervised_keys = ([input_key], ['targets'])
  return train_dataset, eval_dataset, supervised_keys


# Makes the function accessible in gin configs, even with all args blacklisted.

开发者ID:google，项目名称:trax，代码行数:29，代码来源:tf_inputs.py

示例11: load_tfds

# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def load_tfds(
    name: str = "mnist"
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """
    Returns a data set from `tfds`.

    Parameters
    ----------
    name : str
        The name of the TensorFlow data set to load.

    Returns
    -------
    train_features : np.ndarray
        The train features.
    test_features : np.ndarray
        The test features.
    train_labels : np.ndarray
        The train labels.
    test_labels : np.ndarray
        The test labels.
    """
    train_dataset = tfds.load(name=name, split=tfds.Split.TRAIN, batch_size=-1)
    train_dataset = tfds.as_numpy(train_dataset)

    train_features = train_dataset["image"]
    train_labels = train_dataset["label"]

    train_features = train_features.astype("float32")
    train_features = train_features / 255.0

    test_dataset = tfds.load(name=name, split=tfds.Split.TEST, batch_size=-1)
    test_dataset = tfds.as_numpy(test_dataset)

    test_features = test_dataset["image"]
    test_labels = test_dataset["label"]

    test_features = test_features.astype("float32")
    test_features = test_features / 255.0

    return train_features, test_features, train_labels, test_labels

开发者ID:AFAgarap，项目名称:cnn-svm，代码行数:43，代码来源:data.py

示例12: count_examples

# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def count_examples(examples_path, tfds_name, data_converter,
                   file_reader=tf.python_io.tf_record_iterator):
  """Counts the number of examples produced by the converter from files."""
  def _file_generator():
    filenames = tf.gfile.Glob(examples_path)
    for f in filenames:
      tf.logging.info('Counting examples in %s.', f)
      reader = file_reader(f)
      for item_str in reader:
        yield data_converter.str_to_item_fn(item_str)

  def _tfds_generator():
    ds = tfds.as_numpy(
        tfds.load(tfds_name, split=tfds.Split.VALIDATION, try_gcs=True))
    # TODO(adarob): Generalize to other data types if needed.
    for ex in ds:
      yield note_seq.midi_to_note_sequence(ex['midi'])

  num_examples = 0

  generator = _tfds_generator if tfds_name else _file_generator
  for item in generator():
    tensors = data_converter.to_tensors(item)
    num_examples += len(tensors.inputs)
  tf.logging.info('Total examples: %d', num_examples)
  return num_examples

开发者ID:magenta，项目名称:magenta，代码行数:28，代码来源:data.py

示例13: mnist_images

# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def mnist_images():
    # https://github.com/google/jax/blob/master/docs/gpu_memory_allocation.rst
    import tensorflow as tf
    tf.config.experimental.set_visible_devices([], "GPU")

    import tensorflow_datasets as tfds
    prep = lambda d: np.reshape(np.float32(next(tfds.as_numpy(d))['image']) / 256, (-1, 784))
    dataset = tfds.load("mnist:1.0.0")
    return (prep(dataset['train'].shuffle(50000).batch(50000)),
            prep(dataset['test'].batch(10000)))

开发者ID:JuliusKunze，项目名称:jaxnet，代码行数:12，代码来源:mnist_vae.py

示例14: dataset

# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def dataset(batch_size):
    import tensorflow_datasets as tfds
    import tensorflow as tf

    tf.random.set_random_seed(0)
    cifar = tfds.load('cifar10')

    def get_train_batches():
        return tfds.as_numpy(cifar['train'].map(lambda el: tf.cast(el['image'], image_dtype)).
                             shuffle(1000).batch(batch_size).prefetch(1))

    test_batches = tfds.as_numpy(cifar['test'].map(lambda el: tf.cast(el['image'], image_dtype)).
                                 repeat().shuffle(1000).batch(batch_size).prefetch(1))
    return get_train_batches, test_batches

开发者ID:JuliusKunze，项目名称:jaxnet，代码行数:16，代码来源:pixelcnn.py

示例15: mnist

# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def mnist():
    # https://github.com/google/jax/blob/master/docs/gpu_memory_allocation.rst
    import tensorflow as tf
    tf.config.experimental.set_visible_devices([], "GPU")

    import tensorflow_datasets as tfds
    dataset = tfds.load("mnist:1.0.0")
    images = lambda d: np.reshape(np.float32(d['image']) / 256, (-1, 784))
    labels = lambda d: _one_hot(d['label'], 10)
    train = next(tfds.as_numpy(dataset['train'].shuffle(50000).batch(50000)))
    test = next(tfds.as_numpy(dataset['test'].batch(10000)))
    return images(train), labels(train), images(test), labels(test)

开发者ID:JuliusKunze，项目名称:jaxnet，代码行数:14，代码来源:mnist_classifier.py

注：本文中的tensorflow_datasets.as_numpy方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。