本文整理汇总了Python中tensorflow.dataset方法的典型用法代码示例。如果您正苦于以下问题:Python tensorflow.dataset方法的具体用法?Python tensorflow.dataset怎么用?Python tensorflow.dataset使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorflow
的用法示例。
在下文中一共展示了tensorflow.dataset方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _test
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import dataset [as 别名]
def _test(self, ts, dataset=True):
"""Test an epoch of data using either the input loader or using `tf.dataset`
In non-`tf.dataset` mode, we cycle the loader data feed, and pull a batch and feed it to the feed dict
When we use `tf.dataset`s under the hood, this function simply uses the loader to know how many steps
to train.
:param loader: A data feed
:param kwargs: See below
:Keyword Arguments:
* *dataset* (`bool`) Set to `True` if using `tf.dataset`s, defaults to `True`
* *reporting_fns* (`list`) A list of reporting hooks to use
* *verbose* (`dict`) A dictionary containing `console` boolean and `file` name if on
:return: Metrics
"""
return self.evaluator.test(ts, dataset=dataset)
示例2: _test
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import dataset [as 别名]
def _test(self, ts, steps=0, **kwargs):
"""Test an epoch of data using either the input loader or using `tf.dataset`
In non-`tf.dataset` mode, we cycle the loader data feed, and pull a batch and feed it to the feed dict
When we use `tf.dataset`s under the hood, this function simply uses the loader to know how many steps
to train.
:param loader: A data feed
:param kwargs: See below
:Keyword Arguments:
* *dataset* (`bool`) Set to `True` if using `tf.dataset`s, defaults to `True`
* *reporting_fns* (`list`) A list of reporting hooks to use
* *verbose* (`dict`) A dictionary containing `console` boolean and `file` name if on
:return: Metrics
"""
return self.evaluator.test(ts, steps, **kwargs)
示例3: _evaluate
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import dataset [as 别名]
def _evaluate(self, es, reporting_fns, **kwargs):
"""Run the model with beam search and report Bleu.
:param es: `tf.dataset` of input
:param reporting_fns: Input hooks
"""
preds = []
golds = []
start = time.time()
for features, tgt in es:
features['dst'] = tgt[:, :-1]
tgt_lens = features.pop('tgt_len')
top_preds = self.model.predict(features, make_input=False, **kwargs)
preds.extend(convert_seq2seq_preds(top_preds[:, 0, :], self.tgt_rlut))
golds.extend(convert_seq2seq_golds(tgt, tgt_lens, self.tgt_rlut))
metrics = {'bleu': bleu(preds, golds, self.bleu_n_grams)[0]}
self.report(
0, metrics, start, 'Test', 'EPOCH', reporting_fns
)
return metrics
示例4: setup_dataset
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import dataset [as 别名]
def setup_dataset(
self,
placeholders: Tuple[tf.placeholder, tf.placeholder],
batch_size: int=None,
):
self.batch_size = self.args.batch_size if batch_size is None else batch_size
dataset = tf.data.Dataset.from_tensor_slices(placeholders)
dataset = dataset.map(self._parse_function, num_parallel_calls=self.args.num_threads).prefetch(
self.args.prefetch_factor * self.batch_size)
if self.is_training:
dataset = dataset.repeat()
if self.shuffle:
dataset = dataset.shuffle(buffer_size=self.args.buffer_size)
self.dataset = dataset.batch(self.batch_size)
self.iterator = self.dataset.make_initializable_iterator()
self.next_elem = self.iterator.get_next()
示例5: _make_input_fn
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import dataset [as 别名]
def _make_input_fn(self,
input_pattern,
batch_size,
list_size,
randomize_input=True,
num_epochs=None):
"""Returns the input function for the ranking model.
Args:
input_pattern: (str) File pattern for the input data.
batch_size: (int) The number of input examples to process per batch.
list_size: (int) The list size for an ELWC example.
randomize_input: (bool) If true, randomize input example order. It should
almost always be true except for unittest/debug purposes.
num_epochs: (int) The number of times the input dataset must be repeated.
None to repeat the data indefinitely.
Returns:
An `input_fn` for `tf.estimator.Estimator`.
"""
def _input_fn():
"""`input_fn` for the `Estimator`."""
return self._make_dataset(
batch_size=batch_size,
list_size=list_size,
input_pattern=input_pattern,
randomize_input=randomize_input,
num_epochs=num_epochs)
return _input_fn
示例6: to_tensors
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import dataset [as 别名]
def to_tensors(ts, lengths_key):
"""Convert a data feed into a tuple of `features` (`dict`) and `y` values
This method is required to produce `tf.dataset`s from the input data feed.
Any fields ending with `_lengths` are ignored, unless they match the
`lengths_key` name (as are `ids`)
:param ts: The data feed to convert
:param lengths_key: This is a field passed from the model params specifying source of truth of the temporal lengths
:return: A `tuple` of `features` and `y` (labels)
"""
keys = ts[0].keys()
# This is kind of a hack
keys = [k for k in keys if '_lengths' not in k and k != 'ids'] + [lengths_key]
features = dict((k, []) for k in keys)
for sample in ts:
for k in features.keys():
# add each sample
for s in sample[k]:
features[k].append(s)
features['lengths'] = features[lengths_key]
del features[lengths_key]
features = dict((k, np.stack(v)) for k, v in features.items())
y = features.pop('y')
return features, y
示例7: process_batch
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import dataset [as 别名]
def process_batch(self, batch_dict, handle, txts, dataset=True):
if dataset:
guess = self.sess.run(self.model.best)
else:
feed_dict = self.model.make_input(batch_dict)
guess = self.sess.run(self.model.best, feed_dict=feed_dict)
sentence_lengths = batch_dict[self.model.lengths_key]
ids = batch_dict['ids']
truth = batch_dict['y']
correct_labels = 0
total_labels = 0
# For fscore
gold_chunks = []
pred_chunks = []
# For each sentence
for b in range(len(guess)):
length = sentence_lengths[b]
sentence = guess[b][:length]
# truth[b] is padded, cutting at :length gives us back true length
gold = truth[b][:length]
valid_guess = sentence[gold != Offsets.PAD]
valid_gold = gold[gold != Offsets.PAD]
valid_sentence_length = np.sum(gold != Offsets.PAD)
correct_labels += np.sum(np.equal(valid_guess, valid_gold))
total_labels += valid_sentence_length
gold_chunks.append(set(to_spans(valid_gold, self.idx2label, self.span_type, self.verbose)))
pred_chunks.append(set(to_spans(valid_guess, self.idx2label, self.span_type, self.verbose)))
# Should we write a file out? If so, we have to have txts
if handle is not None:
id = ids[b]
txt = txts[id]
write_sentence_conll(handle, valid_guess, valid_gold, txt, self.idx2label)
return correct_labels, total_labels, gold_chunks, pred_chunks
示例8: to_tensors
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import dataset [as 别名]
def to_tensors(ts, src_lengths_key, dst=False):
"""Convert a data feed into a tuple of `features` (`dict`) and `y` values
This method is required to produce `tf.dataset`s from the input data feed.
Any fields ending with `_lengths` are ignored, unless they match the
`src_lengths_key` or `tgt_lengths_key`, in which case, they are converted to `src_len` and `tgt_len`
:param ts: The data feed to convert
:param lengths_key: This is a field passed from the model params specifying source of truth of the temporal lengths
:param dst: `bool` that says if we should prepare a `dst` tensor. This is needed in distributed mode
:return: A `tuple` of `features` and `y` (labels)
"""
keys = ts[0].keys()
# This is kind of a hack
keys = [k for k in keys if '_lengths' not in k and k != 'ids'] + [src_lengths_key, "tgt_lengths"]
features = dict((k, []) for k in keys)
for sample in ts:
for k in keys:
for s in sample[k]:
features[k].append(s)
features['src_len'] = features[src_lengths_key]
del features[src_lengths_key]
features['tgt_len'] = features['tgt_lengths']
del features['tgt_lengths']
features = dict((k, np.stack(v).astype(np.int32)) for k, v in features.items())
if dst:
features['dst'] = features['tgt'][:, :-1]
tgt = features.pop('tgt')
return features, tgt
示例9: distribute
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import dataset [as 别名]
def distribute(self, dataset):
return self.strategy.experimental_distribute_dataset(dataset)
示例10: test
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import dataset [as 别名]
def test(self, vs, reporting_fns, phase):
"""Run an epoch of testing over the dataset
If we are using a `tf.dataset`-based `fit_func`, we will just
cycle the number of steps and let the `dataset` yield new batches.
If we are using `feed_dict`s, we convert each batch from the `DataFeed`
and pass that into TF as the `feed_dict`
:param vs: A validation set
:param reporting_fns: Reporting hooks
:param phase: The phase of evaluation (`Test`, `Valid`)
:param dataset: (`bool`) Are we using `tf.dataset`s
:return: Metrics
"""
total_loss = 0.0
total_toks = 0
epochs = 0
if phase == 'Valid':
self.valid_epochs += 1
epochs = self.valid_epochs
SET_TRAIN_FLAG(False)
start = time.time()
h = None
for features, y in vs:
if self.model.requires_state:
loss_value, h = loss_with_state(self.model, h, features, y)
else:
loss_value = loss_without_state(self.model, features, y)
loss_value = loss_value.numpy()
toks = self._num_toks(y)
total_loss += loss_value * tf.cast(toks, tf.float32).numpy()
total_toks += toks.numpy()
metrics = self.calc_metrics(total_loss, total_toks)
self.report(
epochs, metrics, start,
phase, 'EPOCH', reporting_fns
)
return metrics
示例11: _get_dataset_files
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import dataset [as 别名]
def _get_dataset_files(dataset_info, mode, root):
"""Generates lists of files for a given dataset version."""
basepath = dataset_info.basepath
base = os.path.join(root, basepath, mode)
if mode == 'train':
num_files = dataset_info.train_size
else:
num_files = dataset_info.test_size
length = len(str(num_files))
template = '{:0%d}-of-{:0%d}.tfrecord' % (length, length)
record_paths = [ # indexing runs from 1 to n
os.path.join(base, template.format(i, num_files))
for i in range(1, num_files + 1)]
return record_paths
示例12: resize_and_padding_before_augmentation
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import dataset [as 别名]
def resize_and_padding_before_augmentation(self, image, size):
# If width > height, resize height to model's input height while preserving aspect ratio
# If height > width, resize width to model's input width while preserving aspect ratio
if self.args.debug_augmentation:
assert size[0] == size[1], "resize_and_padding_before_augmentation only supports square target image"
image = tf.expand_dims(image, 0)
image_dims = tf.shape(image)
height = image_dims[1]
width = image_dims[2]
min_size = min(*size)
width_aspect = tf.maximum(min_size, tf.cast(width * min_size / height, dtype=tf.int32))
height_aspect = tf.maximum(min_size, tf.cast(height * min_size / width, dtype=tf.int32))
image = tf.image.resize_bilinear(image, (height_aspect, width_aspect))
image = image[:, :self.padded_max_size, :self.padded_max_size, :]
# Pads the image on the bottom and right with zeros until it has dimensions target_height, target_width.
image = tf.image.pad_to_bounding_box(
image,
offset_height=tf.maximum(self.padded_max_size-height_aspect, 0),
offset_width=tf.maximum(self.padded_max_size-width_aspect, 0),
target_height=self.padded_max_size,
target_width=self.padded_max_size,
)
image = tf.squeeze(image, 0)
return image
else:
# Have to return some dummy tensor which have .get_shape() to tf.dataset
return tf.constant(0, shape=self.padded_original_image_dummy_shape, dtype=tf.uint8, name="dummy")
示例13: add_arguments
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import dataset [as 别名]
def add_arguments(parser):
g_common = parser.add_argument_group("(DataWrapperBase) Common Arguments for all data wrapper.")
g_common.add_argument("--dataset_path", required=True, type=str, help="The name of the dataset to load.")
g_common.add_argument("--dataset_split_name", required=True, type=str, nargs="*",
help="The name of the train/test split. Support multiple splits")
g_common.add_argument("--batch_size", default=32, type=utils.positive_int,
help="The number of examples in batch.")
g_common.add_argument("--no-shuffle", dest="shuffle", action="store_false")
g_common.add_argument("--shuffle", dest="shuffle", action="store_true")
g_common.set_defaults(shuffle=True)
g_common.add_argument("--width", required=True, type=int)
g_common.add_argument("--height", required=True, type=int)
g_common.add_argument("--no-debug_augmentation", dest="debug_augmentation", action="store_false")
g_common.add_argument("--debug_augmentation", dest="debug_augmentation", action="store_true")
g_common.set_defaults(debug_augmentation=False)
g_common.add_argument("--max_padded_size", default=224, type=int,
help=("We will resize & pads the original image "
"until it has dimensions (padded_size, padded_size)"
"Recommend to set this value as width(or height) * 1.8 ~ 2"))
g_common.add_argument("--augmentation_method", type=str, required=True,
choices=_available_augmentation_methods)
g_common.add_argument("--num_threads", default=8, type=int)
g_common.add_argument("--buffer_size", default=1000, type=int)
g_common.add_argument("--prefetch_factor", default=100, type=int)
g_common.add_argument("--rotation_range", default=0, type=int,
help="Receives maximum angle to be rotated in terms of degree: "
"The image is randomly rotated by the angle "
"randomly chosen from [-rotation_range, rotation_range], "
"and then cropped appropriately to remove dark areas.\n"
"So, be aware that the rotation performs certain kind of zooming.")
g_common.add_argument("--no-has_sub_dataset", dest="has_sub_dataset", action="store_false")
g_common.add_argument("--has_sub_dataset", dest="has_sub_dataset", action="store_true")
g_common.set_defaults(has_sub_dataset=False)
示例14: __init__
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import dataset [as 别名]
def __init__(self,
context_feature_columns,
example_feature_columns,
hparams,
estimator,
label_feature_name="relevance",
label_feature_type=tf.int64,
dataset_reader=tf.data.TFRecordDataset,
best_exporter_metric=None,
best_exporter_metric_higher_better=True,
size_feature_name=None):
"""Constructor.
Args:
context_feature_columns: (dict) Context (aka, query) feature columns.
example_feature_columns: (dict) Example (aka, document) feature columns.
hparams: (dict) A dict containing model hyperparameters.
estimator: (`Estimator`) An `Estimator` instance for model train and eval.
label_feature_name: (str) The name of the label feature.
label_feature_type: (`tf.dtype`) The value type of the label feature.
dataset_reader: (`tf.Dataset`) The dataset format for the input files.
best_exporter_metric: (str) Metric key for exporting the best model. If
None, exports the model with the minimal loss value.
best_exporter_metric_higher_better: (bool) If a higher metric is better.
This is only used if `best_exporter_metric` is not None.
size_feature_name: (str) If set, populates the feature dictionary with
this name and the coresponding value is a `tf.int32` Tensor of shape
[batch_size] indicating the actual sizes of the example lists before
padding and truncation. If None, which is default, this feature is not
generated.
"""
self._validate_parameters(estimator, hparams)
self._context_feature_columns = context_feature_columns
self._example_feature_columns = example_feature_columns
self._hparams = hparams
self._estimator = estimator
self._label_feature_name = label_feature_name
self._label_feature_type = label_feature_type
self._dataset_reader = dataset_reader
self._best_exporter_metric = best_exporter_metric
self._best_exporter_metric_higher_better = (
best_exporter_metric_higher_better)
self._size_feature_name = size_feature_name
示例15: _make_dataset
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import dataset [as 别名]
def _make_dataset(self,
batch_size,
list_size,
input_pattern,
randomize_input=True,
num_epochs=None):
"""Builds a dataset for the TF-Ranking model.
Args:
batch_size: (int) The number of input examples to process per batch. Use
params['batch_size'] for TPUEstimator, and `batch_size` for Estimator.
list_size: (int) The list size for an ELWC example.
input_pattern: (str) File pattern for the input data.
randomize_input: (bool) If true, randomize input example order. It should
almost always be true except for unittest/debug purposes.
num_epochs: (int) The number of times the input dataset must be repeated.
None to repeat the data indefinitely.
Returns:
A tuple of (feature tensors, label tensor).
"""
context_feature_spec = tf.feature_column.make_parse_example_spec(
self._context_feature_columns.values())
label_column = tf.feature_column.numeric_column(
self._label_feature_name,
dtype=self._label_feature_type,
default_value=_PADDING_LABEL)
example_feature_spec = tf.feature_column.make_parse_example_spec(
list(self._example_feature_columns.values()) + [label_column])
dataset = tfr_data.build_ranking_dataset(
file_pattern=input_pattern,
data_format=tfr_data.ELWC,
batch_size=batch_size,
list_size=list_size,
context_feature_spec=context_feature_spec,
example_feature_spec=example_feature_spec,
reader=self._dataset_reader,
reader_args=None,
num_epochs=num_epochs,
shuffle=randomize_input,
shuffle_buffer_size=1000,
shuffle_seed=None,
prefetch_buffer_size=10000,
reader_num_threads=64,
sloppy_ordering=True,
drop_final_batch=False,
num_parser_threads=None,
size_feature_name=self._size_feature_name)
return dataset.map(self._features_and_labels)