本文整理汇总了Python中tensorflow_datasets.as_numpy方法的典型用法代码示例。如果您正苦于以下问题:Python tensorflow_datasets.as_numpy方法的具体用法?Python tensorflow_datasets.as_numpy怎么用?Python tensorflow_datasets.as_numpy使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorflow_datasets
的用法示例。
在下文中一共展示了tensorflow_datasets.as_numpy方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_generic_text_dataset_preprocess_fn
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def test_generic_text_dataset_preprocess_fn(self):
dataset = _load_dataset('squad')
example, = tfds.as_numpy(dataset.take(1))
self.assertNotIn('inputs', example)
self.assertNotIn('targets', example)
proc_dataset = tf_inputs.generic_text_dataset_preprocess_fn(
dataset, spm_path=_spm_path(),
text_preprocess_fns=[lambda ds, training: t5_processors.squad(ds)],
copy_plaintext=True,
debug_print_examples=True,
debug_print_examples_rate=1.0)
proc_example, = tfds.as_numpy(proc_dataset.take(1))
self.assertIn('inputs', proc_example)
self.assertIn('targets', proc_example)
self.assertEqual(proc_example['inputs'].dtype, np.int64)
self.assertEqual(proc_example['targets'].dtype, np.int64)
# TODO(afrozm): Why does this test take so much time?
示例2: _log_padding_fractions
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def _log_padding_fractions(dataset, sequence_length, num_examples=100):
"""Empirically compute the fraction of padding - log the results.
Args:
dataset: a tf.data.Dataset
sequence_length: dict from string to int (packed lengths)
num_examples: an integer
"""
logging.info("computing padding fractions")
keys = sequence_length.keys()
padding_frac = {k: 0 for k in keys}
for ex in tfds.as_numpy(dataset.take(num_examples)):
for k in keys:
padding_frac[k] += 1 - (sequence_length[k] / len(ex[k]))
for k in keys:
logging.info("%s padding fraction = %g", k, padding_frac[k])
示例3: _get_comparable_examples_from_ds
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def _get_comparable_examples_from_ds(ds):
"""Puts dataset into format that allows examples to be compared in Py2/3."""
examples = []
def _clean_value(v):
if isinstance(v, bytes):
return tf.compat.as_text(v)
if isinstance(v, np.ndarray):
if isinstance(v[0], bytes):
return tuple(tf.compat.as_text(s) for s in v)
return tuple(v)
return v
for ex in tfds.as_numpy(ds):
examples.append(
tuple((k, _clean_value(v)) for k, v in sorted(ex.items())))
return examples
示例4: _emit_tokenized_examples
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def _emit_tokenized_examples(self, shard_instruction):
"""Emits examples keyed by shard path and index for a single shard."""
_import_modules(self._modules_to_import)
logging.info("Processing shard: %s", shard_instruction)
self._increment_counter("input-shards")
ds = self._task.tfds_dataset.load_shard(shard_instruction)
if self._max_input_examples:
num_shard_examples = int(
self._max_input_examples / len(self.files))
ds = ds.repeat().take(num_shard_examples)
ds = self._task.preprocess_text(ds)
ds = t5.data.encode_string_features(
ds, self._task.output_features, keys=self._task.output_features,
copy_plaintext=True)
for ex in tfds.as_numpy(ds):
self._increment_counter("examples")
yield ex
示例5: test_batch_fun
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def test_batch_fun(self):
dataset = test_dataset_ints([32])
dataset = dataset.repeat(10)
batches = inputs.batch_fun(
dataset, True, ([None], [None]), [], 1, batch_size=10)
count = 0
for example in tfds.as_numpy(batches):
count += 1
self.assertEqual(example[0].shape[0], 10) # Batch size = 10.
self.assertEqual(count, 1) # Just one batch here.
示例6: test_batch_fun_n_devices
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def test_batch_fun_n_devices(self):
dataset = test_dataset_ints([32])
dataset = dataset.repeat(9)
batches = inputs.batch_fun(
dataset, True, ([None], [None]), [], 9, batch_size=10)
count = 0
for example in tfds.as_numpy(batches):
count += 1
# Batch size adjusted to be divisible by n_devices.
self.assertEqual(example[0].shape[0], 9)
self.assertEqual(count, 1) # Just one batch here.
示例7: test_dataset_items
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def test_dataset_items(self):
builder = shapenet.Shapenet(data_dir=self.tmp_dir)
self._download_and_prepare_as_dataset(builder)
for split_name in self.SPLITS:
items = tfds.as_numpy(builder.as_dataset(split=split_name))
for item in items:
expected = self.EXPECTED_ITEMS[split_name][item['model_id']]
self.assertEqual(item['label'],
self._encode_synset(builder, expected['synset']))
self.assertLen(item['trimesh']['vertices'], expected['num_vertices'])
self.assertLen(item['trimesh']['faces'], expected['num_faces'])
示例8: test_c4_bare_preprocess_fn
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def test_c4_bare_preprocess_fn(self):
dataset = _c4_dataset()
example = list(tfds.as_numpy(dataset.take(1)))[0]
# Targets are NOT in the example.
self.assertNotIn('targets', example)
self.assertIn('text', example)
text = example['text']
# This should convert the dataset to an inputs/targets that are tokenized.
dataset = tf_inputs.c4_bare_preprocess_fn(dataset, spm_path=_spm_path())
example = list(tfds.as_numpy(dataset.take(1)))[0]
# Earlier text is now stored in targets_plaintext
self.assertIn('targets_plaintext', example)
self.assertEqual(example['targets_plaintext'], text)
# Targets are now tokenized.
self.assertIn('targets', example)
self.assertIsInstance(example['targets'], np.ndarray)
self.assertEqual(example['targets'].dtype, np.int64)
self.assertGreater(len(example['targets']), 0)
self.assertEqual(example['targets'][-1], 1) # we add EOS at the end.
# Inputs exist but is empty because t5 preprocessors' unsupervised wasn't
# gin configured with any.
self.assertIn('inputs', example)
self.assertEqual(len(example['inputs']), 0)
示例9: test_c4_bare_preprocess_fn_denoising_objective
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def test_c4_bare_preprocess_fn_denoising_objective(self):
_t5_gin_config()
dataset = _c4_dataset()
dataset = tf_inputs.c4_bare_preprocess_fn(dataset, spm_path=_spm_path())
example = list(tfds.as_numpy(dataset.take(1)))[0]
# Assertions now.
self.assertIn('targets', example)
targets = example['targets']
self.assertIsInstance(targets, np.ndarray)
self.assertEqual(targets.dtype, np.int64)
self.assertGreater(len(targets), 0)
self.assertIn('inputs', example)
_inputs = example['inputs'] # pylint: disable=invalid-name
self.assertIsInstance(_inputs, np.ndarray)
self.assertEqual(_inputs.dtype, np.int64)
self.assertGreater(len(_inputs), 0)
# WHP inputs will have the bulk of the text.
self.assertGreater(len(_inputs), len(targets))
# WHP there will be two sentinel tokens in the inputs and targets.
inputs_counter = collections.Counter(_inputs.tolist())
targets_counter = collections.Counter(targets.tolist())
self.assertEqual(1, inputs_counter[31999])
self.assertEqual(1, inputs_counter[31998])
self.assertEqual(1, targets_counter[31999])
self.assertEqual(1, targets_counter[31998])
示例10: _train_and_eval_dataset_v1
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def _train_and_eval_dataset_v1(problem_name, data_dir,
train_shuffle_files, eval_shuffle_files):
"""Return train and evaluation datasets, feature info and supervised keys."""
with tf.device('cpu:0'):
problem = t2t_problems().problem(problem_name)
hparams = None
if problem_name == 'video_bair_robot_pushing':
hparams = problem.get_hparams()
bair_robot_pushing_hparams(hparams)
train_dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN, data_dir,
shuffle_files=train_shuffle_files,
hparams=hparams)
train_dataset = train_dataset.map(_select_features)
eval_dataset = problem.dataset(tf.estimator.ModeKeys.EVAL, data_dir,
shuffle_files=eval_shuffle_files,
hparams=hparams)
eval_dataset = eval_dataset.map(_select_features)
# TODO(lukaszkaiser): remove this need for one example, just input_key.
examples = list(tfds.as_numpy(train_dataset.take(1)))
# We use 'inputs' as input except for purely auto-regressive tasks like
# language models where 'targets' are used as input_key.
input_key = 'inputs' if 'inputs' in examples[0] else 'targets'
supervised_keys = ([input_key], ['targets'])
return train_dataset, eval_dataset, supervised_keys
# Makes the function accessible in gin configs, even with all args blacklisted.
示例11: load_tfds
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def load_tfds(
name: str = "mnist"
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""
Returns a data set from `tfds`.
Parameters
----------
name : str
The name of the TensorFlow data set to load.
Returns
-------
train_features : np.ndarray
The train features.
test_features : np.ndarray
The test features.
train_labels : np.ndarray
The train labels.
test_labels : np.ndarray
The test labels.
"""
train_dataset = tfds.load(name=name, split=tfds.Split.TRAIN, batch_size=-1)
train_dataset = tfds.as_numpy(train_dataset)
train_features = train_dataset["image"]
train_labels = train_dataset["label"]
train_features = train_features.astype("float32")
train_features = train_features / 255.0
test_dataset = tfds.load(name=name, split=tfds.Split.TEST, batch_size=-1)
test_dataset = tfds.as_numpy(test_dataset)
test_features = test_dataset["image"]
test_labels = test_dataset["label"]
test_features = test_features.astype("float32")
test_features = test_features / 255.0
return train_features, test_features, train_labels, test_labels
示例12: count_examples
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def count_examples(examples_path, tfds_name, data_converter,
file_reader=tf.python_io.tf_record_iterator):
"""Counts the number of examples produced by the converter from files."""
def _file_generator():
filenames = tf.gfile.Glob(examples_path)
for f in filenames:
tf.logging.info('Counting examples in %s.', f)
reader = file_reader(f)
for item_str in reader:
yield data_converter.str_to_item_fn(item_str)
def _tfds_generator():
ds = tfds.as_numpy(
tfds.load(tfds_name, split=tfds.Split.VALIDATION, try_gcs=True))
# TODO(adarob): Generalize to other data types if needed.
for ex in ds:
yield note_seq.midi_to_note_sequence(ex['midi'])
num_examples = 0
generator = _tfds_generator if tfds_name else _file_generator
for item in generator():
tensors = data_converter.to_tensors(item)
num_examples += len(tensors.inputs)
tf.logging.info('Total examples: %d', num_examples)
return num_examples
示例13: mnist_images
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def mnist_images():
# https://github.com/google/jax/blob/master/docs/gpu_memory_allocation.rst
import tensorflow as tf
tf.config.experimental.set_visible_devices([], "GPU")
import tensorflow_datasets as tfds
prep = lambda d: np.reshape(np.float32(next(tfds.as_numpy(d))['image']) / 256, (-1, 784))
dataset = tfds.load("mnist:1.0.0")
return (prep(dataset['train'].shuffle(50000).batch(50000)),
prep(dataset['test'].batch(10000)))
示例14: dataset
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def dataset(batch_size):
import tensorflow_datasets as tfds
import tensorflow as tf
tf.random.set_random_seed(0)
cifar = tfds.load('cifar10')
def get_train_batches():
return tfds.as_numpy(cifar['train'].map(lambda el: tf.cast(el['image'], image_dtype)).
shuffle(1000).batch(batch_size).prefetch(1))
test_batches = tfds.as_numpy(cifar['test'].map(lambda el: tf.cast(el['image'], image_dtype)).
repeat().shuffle(1000).batch(batch_size).prefetch(1))
return get_train_batches, test_batches
示例15: mnist
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import as_numpy [as 别名]
def mnist():
# https://github.com/google/jax/blob/master/docs/gpu_memory_allocation.rst
import tensorflow as tf
tf.config.experimental.set_visible_devices([], "GPU")
import tensorflow_datasets as tfds
dataset = tfds.load("mnist:1.0.0")
images = lambda d: np.reshape(np.float32(d['image']) / 256, (-1, 784))
labels = lambda d: _one_hot(d['label'], 10)
train = next(tfds.as_numpy(dataset['train'].shuffle(50000).batch(50000)))
test = next(tfds.as_numpy(dataset['test'].batch(10000)))
return images(train), labels(train), images(test), labels(test)