本文整理汇总了Python中tensorflow_datasets.load方法的典型用法代码示例。如果您正苦于以下问题:Python tensorflow_datasets.load方法的具体用法?Python tensorflow_datasets.load怎么用?Python tensorflow_datasets.load使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorflow_datasets
的用法示例。
在下文中一共展示了tensorflow_datasets.load方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import load [as 别名]
def load(data_set_name, **kwargs):
"""
:param data_set_name: data set name--call tfds.list_builders() for options
:return:
train_ds: TensorFlow Dataset object for the training data
test_ds: TensorFlow Dataset object for the testing data
info: data set info object
"""
# get data and its info
ds, info = tfds.load(name=data_set_name, split=tfds.Split.ALL, with_info=True)
# configure the data sets
if 'train' in info.splits:
train_ds = configure_data_set(ds=ds, info=info, is_training=True, **kwargs)
else:
train_ds = None
if 'test' in info.splits:
test_ds = configure_data_set(ds=ds, info=info, is_training=False, **kwargs)
else:
test_ds = None
return train_ds, test_ds, info
示例2: build_dataset
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import load [as 别名]
def build_dataset(
shape: Tuple[int, int],
name: str="mnist",
train_batch_size: int=32,
valid_batch_size: int=32
):
dataset = {}
builder = tfds.builder(name)
dataset["num_train"] = builder.info.splits['train'].num_examples
dataset["num_test"] = builder.info.splits['test'].num_examples
[ds_train, ds_test], info = tfds.load(name=name, split=["train", "test"], with_info=True)
dataset["num_classes"] = info.features["label"].num_classes
dataset["channels"] = ds_train.output_shapes["image"][-1].value
ds_train = ds_train.shuffle(1024).repeat()
ds_train = ds_train.map(lambda data: _parse_function(data, shape, dataset["num_classes"], dataset["channels"]))
dataset["train"] = ds_train.batch(train_batch_size)
ds_test = ds_test.shuffle(1024).repeat()
ds_test = ds_test.map(lambda data: _parse_function(data, shape, dataset["num_classes"], dataset["channels"]))
dataset["test"] = ds_test.batch(valid_batch_size)
return dataset
示例3: _build
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import load [as 别名]
def _build(self):
dataset = tfds.load(name=self._dataset_name, split=self._mode)
minibatch = dataset.map(parse).repeat()
if self._shuffle:
minibatch = minibatch.shuffle(self._batch_size*100)
minibatch = minibatch.batch(
self._batch_size).make_one_shot_iterator().get_next()
minibatch['sentiment'].set_shape([self._batch_size])
minibatch['sentence'] = tf.SparseTensor(
indices=minibatch['sentence'].indices,
values=minibatch['sentence'].values,
dense_shape=[self._batch_size, minibatch['sentence'].dense_shape[1]])
# minibatch.sentence sparse tensor with dense shape
# [batch_size x seq_length], length: [batch_size]
return Dataset(
tokens=minibatch['sentence'],
num_tokens=self.get_row_lengths(minibatch['sentence']),
sentiment=minibatch['sentiment'],
)
示例4: load_data_planetoid
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import load [as 别名]
def load_data_planetoid(name, path, splits_path=None, row_normalize=False,
data_container_class=PlanetoidDataset):
"""Load Planetoid data."""
if splits_path is None:
# Load from file in Planetoid format.
(adj, features, _, _, _, train_mask, val_mask, test_mask,
labels) = load_from_planetoid_files(name, path)
else:
# Otherwise load from a path where we saved a pickle with random splits.
logging.info('Loading from splits path: %s', splits_path)
(adj, features, _, _, _, train_mask, val_mask, test_mask,
labels) = pickle.load(open(splits_path, 'rb'))
return data_container_class.build_from_adjacency_matrix(
name,
adj,
features,
train_mask,
val_mask,
test_mask,
labels,
row_normalize=row_normalize)
示例5: get_train_examples
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import load [as 别名]
def get_train_examples(self, data_dir, filename=None):
"""
Returns the training examples from the data directory.
Args:
data_dir: Directory containing the data files used for training and evaluating.
filename: None by default, specify this if the training file has a different name than the original one
which is `train-v1.1.json` and `train-v2.0.json` for squad versions 1.1 and 2.0 respectively.
"""
if data_dir is None:
data_dir = ""
if self.train_file is None:
raise ValueError("SquadProcessor should be instantiated via SquadV1Processor or SquadV2Processor")
with open(
os.path.join(data_dir, self.train_file if filename is None else filename), "r", encoding="utf-8"
) as reader:
input_data = json.load(reader)["data"]
return self._create_examples(input_data, "train")
示例6: get_dev_examples
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import load [as 别名]
def get_dev_examples(self, data_dir, filename=None):
"""
Returns the evaluation example from the data directory.
Args:
data_dir: Directory containing the data files used for training and evaluating.
filename: None by default, specify this if the evaluation file has a different name than the original one
which is `train-v1.1.json` and `train-v2.0.json` for squad versions 1.1 and 2.0 respectively.
"""
if data_dir is None:
data_dir = ""
if self.dev_file is None:
raise ValueError("SquadProcessor should be instantiated via SquadV1Processor or SquadV2Processor")
with open(
os.path.join(data_dir, self.dev_file if filename is None else filename), "r", encoding="utf-8"
) as reader:
input_data = json.load(reader)["data"]
return self._create_examples(input_data, "dev")
示例7: prepare_glue_examples
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import load [as 别名]
def prepare_glue_examples(tokenizer, task_name='mrpc', split_name='train'):
processor = glue_processors[task_name]()
def tf_mrpc_to_pytorch(d):
for ex in d:
ex = processor.get_example_from_tensor_dict(ex)
# ex = processor.tfds_map(ex)
yield ex
tf_data = tensorflow_datasets.load(f"glue/{task_name}")[split_name]
examples = tf_mrpc_to_pytorch(tf_data)
features = glue_convert_examples_to_features(examples,
tokenizer,
max_length=128,
task='mrpc')
all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long)
all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long)
all_labels = torch.tensor([f.label for f in features], dtype=torch.long)
dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, all_labels)
return dataset
示例8: _load_dataset
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import load [as 别名]
def _load_dataset(self, split):
"""Loads the underlying dataset split from disk.
Args:
split: Name of the split to load.
Returns:
Returns a `tf.data.Dataset` object with a tuple of image and label tensor.
"""
if FLAGS.data_fake_dataset:
return self._make_fake_dataset(split)
ds = tfds.load(
self._tfds_name,
split=split,
data_dir=FLAGS.tfds_data_dir,
as_dataset_kwargs={"shuffle_files": False})
ds = self._replace_labels(split, ds)
ds = ds.map(self._parse_fn)
return ds.prefetch(tf.contrib.data.AUTOTUNE)
示例9: ld_mnist
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import load [as 别名]
def ld_mnist():
"""Load training and test data."""
def convert_types(image, label):
image = tf.cast(image, tf.float32)
image /= 255
return image, label
dataset, info = tfds.load('mnist',
data_dir='gs://tfds-data/datasets',
with_info=True,
as_supervised=True)
mnist_train, mnist_test = dataset['train'], dataset['test']
mnist_train = mnist_train.map(convert_types).shuffle(10000).batch(128)
mnist_test = mnist_test.map(convert_types).batch(128)
return EasyDict(train=mnist_train, test=mnist_test)
示例10: get_test_iterator
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import load [as 别名]
def get_test_iterator(self, tmp_dir):
path = generator_utils.maybe_download(
tmp_dir, os.path.basename(DATA_URL), DATA_URL)
with tf.io.gfile.GFile(path, "rb") as fp:
mnist_test = np.load(fp)
mnist_test = np.transpose(mnist_test, (1, 0, 2, 3))
mnist_test = np.expand_dims(mnist_test, axis=-1)
mnist_test = tf.data.Dataset.from_tensor_slices(mnist_test)
return mnist_test.make_initializable_iterator()
示例11: get_train_iterator
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import load [as 别名]
def get_train_iterator(self):
mnist_ds = tfds.load("mnist:3.*.*", split=tfds.Split.TRAIN,
as_supervised=True)
mnist_ds = mnist_ds.repeat()
moving_mnist_ds = mnist_ds.map(self.map_fn).batch(2)
moving_mnist_ds = moving_mnist_ds.map(lambda x: tf.reduce_max(x, axis=0))
return moving_mnist_ds.make_initializable_iterator()
示例12: train_and_eval_dataset
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import load [as 别名]
def train_and_eval_dataset(dataset_name, data_dir):
"""Return train and evaluation datasets, feature info and supervised keys.
Args:
dataset_name: a string, the name of the dataset; if it starts with "v1_"
then we'll search T2T Problem registry for it, otherwise we assume it
is a dataset from TFDS and load it from there.
data_dir: directory where the data is located.
Returns:
a 4-tuple consisting of:
* the train tf.data.Dataset
* the eval tf.data.Dataset
* information about features: a python dictionary with feature names
as keys and an object as value that provides .shape and .num_classes.
* supervised_keys: information what's the input and what's the target,
ie., a pair of lists with input and target feature names.
"""
if dataset_name.startswith("v1_"):
return _train_and_eval_dataset_v1(dataset_name[3:], data_dir)
dataset_builder = tfds.builder(dataset_name, data_dir=data_dir)
info = dataset_builder.info
splits = dataset_builder.info.splits
if tfds.Split.TRAIN not in splits:
raise ValueError("To train we require a train split in the dataset.")
if tfds.Split.VALIDATION not in splits and "test" not in splits:
raise ValueError("We require a validation or test split in the dataset.")
eval_split = tfds.Split.VALIDATION
if tfds.Split.VALIDATION not in splits:
eval_split = tfds.Split.TEST
train, valid = tfds.load(
name=dataset_name, split=[tfds.Split.TRAIN, eval_split])
keys = None
if info.supervised_keys:
keys = ([info.supervised_keys[0]], [info.supervised_keys[1]])
return train, valid, info.features, keys
示例13: get_train_iterator
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import load [as 别名]
def get_train_iterator(self):
mnist_ds = tfds.load("mnist", split=tfds.Split.TRAIN, as_supervised=True)
mnist_ds = mnist_ds.repeat()
moving_mnist_ds = mnist_ds.map(self.map_fn).batch(2)
moving_mnist_ds = moving_mnist_ds.map(lambda x: tf.reduce_max(x, axis=0))
return moving_mnist_ds.make_initializable_iterator()
示例14: load
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import load [as 别名]
def load(*args, **kwargs):
return tfds.load('shapenet', *args, **kwargs)
示例15: load
# 需要导入模块: import tensorflow_datasets [as 别名]
# 或者: from tensorflow_datasets import load [as 别名]
def load(*args, **kwargs):
return tfds.load('model_net40', *args, **kwargs)