本文整理汇总了Python中tensorflow.data方法的典型用法代码示例。如果您正苦于以下问题:Python tensorflow.data方法的具体用法?Python tensorflow.data怎么用?Python tensorflow.data使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorflow
的用法示例。
在下文中一共展示了tensorflow.data方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _init_dataset
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import data [as 别名]
def _init_dataset(self, **config):
"""Prepare the dataset for reading.
This method should configure the dataset for later fetching through `_get_data`,
such as downloading the data if it is not stored locally, or reading the list of
data files from disk. Ideally, especially in the case of large images, this
method shoudl NOT read all the dataset into memory, but rather prepare for faster
seubsequent fetching.
Arguments:
config: A configuration dictionary, given during the object instantiantion.
Returns:
An object subsequently passed to `_get_data`, e.g. a list of file paths and
set splits.
"""
raise NotImplementedError
示例2: _get_data
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import data [as 别名]
def _get_data(self, dataset, split_name, **config):
"""Reads the dataset splits using the Tensorflow `tf.data` API.
This method should create a `tf.data.Dataset` object for the given data split,
with named components defined through a dictionary mapping strings to tensors.
It typically performs operations such as reading data from a file or from a
Python generator, shuffling the elements or applying data augmentation to the
training split. It should however NOT batch the dataset (left to the model).
Arguments:
dataset: An object returned by the `_init_dataset` method.
split_name: A string, the name of the requested split, either `"training"`,
`"validation"` or `"test"`.
config: A configuration dictionary, given during the object instantiantion.
Returns:
An object of type `tf.data.Dataset` corresponding to the corresponding split.
"""
raise NotImplementedError
示例3: make_input_id_masker
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import data [as 别名]
def make_input_id_masker(tokenizer, seed):
# (One of) Bert's unsupervised objectives is to mask some fraction of the input words and predict the masked words
def masker(data):
token_ids = data['token_ids']
maybe_masked_input_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights = create_masked_lm_predictions(
token_ids,
# pre-training defaults from Bert docs
masked_lm_prob=0.15,
max_predictions_per_seq=20,
vocab=tokenizer.vocab,
seed=seed)
return {
**data,
'maybe_masked_input_ids': maybe_masked_input_ids,
'masked_lm_positions': masked_lm_positions,
'masked_lm_ids': masked_lm_ids,
'masked_lm_weights': masked_lm_weights
}
return masker
示例4: make_extra_feature_cleaning
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import data [as 别名]
def make_extra_feature_cleaning():
def extra_feature_cleaning(data):
data['num_authors'] = tf.minimum(data['num_authors'], 6)-1
data['year'] = data['year']-2007
# some extras
equation_referenced = tf.minimum(data['num_ref_to_equations'], 1)
theorem_referenced = tf.minimum(data['num_ref_to_theorems'], 1)
# buzzy title
any_buzz = data["title_contains_deep"] + data["title_contains_neural"] + \
data["title_contains_embedding"] + data["title_contains_gan"]
buzzy_title = tf.cast(tf.not_equal(any_buzz, 0), tf.int32)
return {**data,
'equation_referenced': equation_referenced,
'theorem_referenced': theorem_referenced,
'buzzy_title': buzzy_title,
'index': data['id']}
return extra_feature_cleaning
示例5: __call__
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import data [as 别名]
def __call__(self, batch_size):
"""Reads `batch_size` data.
Args:
batch_size: Tensor of type `int32`, batch size of the data to be
retrieved from the dataset. `batch_size` should be less than or
equal to `max_batch_size`.
Returns:
Read data, An iterable of tensors with batch size equal to `batch_size`.
"""
check_size = tf.assert_less_equal(
batch_size,
tf.convert_to_tensor(self._max_batch_size, dtype=tf.int32),
message='Data set read failure, Batch size greater than max allowed.'
)
with tf.control_dependencies([check_size]):
return _slice_data(self._dataset, batch_size)
示例6: tensor_transform_fn
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import data [as 别名]
def tensor_transform_fn(data, perm):
"""Transpose function.
This function is used to transpose an image tensor on the host and then
perform an inverse transpose on the TPU. The transpose on the TPU gets
effectively elided thus voiding any associated computational cost.
NOTE: Eventually the compiler will be able to detect when this kind of
operation may prove beneficial and perform these types of transformations
implicitly, voiding the need for user intervention
Args:
data: Tensor to be transposed
perm: New ordering of dimensions
Returns:
Transposed tensor
"""
if FLAGS.transpose_enabled:
return tf.transpose(data, perm)
return data
示例7: _build_data_pipeline
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import data [as 别名]
def _build_data_pipeline(self):
"""Build a reproducible tf.data iterator."""
def normalize(image, label):
image = tf.cast(image, tf.float32) / 255.0
return image, label
def flatten(image, label):
image = tf.reshape(image, shape=[self.FLATTENED_DIM])
return image, label
dataset = tf.data.TFRecordDataset([self.local_data_file])
dataset = dataset.map(decode)
dataset = dataset.map(normalize)
dataset = dataset.map(flatten)
dataset = dataset.repeat()
dataset = dataset.batch(self.BATCH_SIZE)
iterator = dataset.make_one_shot_iterator()
return iterator
示例8: _create_encoded_intents
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import data [as 别名]
def _create_encoded_intents(self, intent_dict):
"""Create matrix with intents encoded in rows as bag of words,
if intent_tokenization_flag = False this is identity matrix"""
if self.intent_tokenization_flag:
intent_token_dict = self._create_intent_token_dict(
list(intent_dict.keys()), self.intent_split_symbol)
encoded_all_intents = np.zeros((len(intent_dict),
len(intent_token_dict)))
for key, idx in intent_dict.items():
for t in key.split(self.intent_split_symbol):
encoded_all_intents[idx, intent_token_dict[t]] = 1
return encoded_all_intents
else:
return np.eye(len(intent_dict))
# data helpers:
示例9: input_fn
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import data [as 别名]
def input_fn(self,features, labels, batch_size, shuffle_num, mode):
"""
build tf.data set for input pipeline
:param features: type dict() , define input x structure for parsing
:param labels: type np.array input label
:param batch_size: type int number ,input batch_size
:param shuffle_num: type int number , random select the data
:param mode: type string ,tf.estimator.ModeKeys.TRAIN or tf.estimator.ModeKeys.PREDICT
:return: set() with type of (tf.data , and labels)
"""
dataset = tf.data.Dataset.from_tensor_slices((features, labels))
if mode == tf.estimator.ModeKeys.TRAIN:
dataset = dataset.shuffle(shuffle_num).batch(batch_size).repeat(self.epochs)
else:
dataset = dataset.batch(batch_size)
iterator = dataset.make_one_shot_iterator()
data, labels = iterator.get_next()
return data, labels
示例10: get_data_feed
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import data [as 别名]
def get_data_feed(val_rec_fname, pixels=None):
'''
returning 4-element feed: orig_shape, scale, image, annotation.
TODO: unify parts with with prepare_graph()
'''
dataset = data.TFRecordDataset([val_rec_fname]).map(utils.tfrecordify.parse_record) # .batch(1)
# note - saving shape before rescale
dataset = dataset.map(lambda img, ann: (tf.to_float(tf.shape(img)), img, ann))
if pixels is not None:
dataset = dataset.map(lambda orig_shape_f, img, ann:
(orig_shape_f, tf.reduce_min(pixels/orig_shape_f)) +
utils.augmentation.nonrandom_rescale(img, ann, [pixels, pixels]))
else:
dataset = dataset.map(lambda shape, img, ann:
(shape, 1, img, tf.cast(ann, tf.int32)))
iterator = dataset.repeat().make_initializable_iterator()
return iterator
示例11: plot_latent_space
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import data [as 别名]
def plot_latent_space(self, data, labels, save=False):
"""Plot the latent space learnt by the model
Args:
data: (array) corresponding array containing the data
labels: (array) corresponding array containing the labels
save: (bool) whether to save the latent space plot
Returns:
fig: (figure) plot of the latent space
"""
# obtain the latent features
features = self.latent_features(data)
# plot only the first 2 dimensions
fig = plt.figure(figsize=(8, 6))
plt.scatter(features[:, 0], features[:, 1], c=labels, marker='o',
edgecolor='none', cmap=plt.cm.get_cmap('jet', 10), s = 10)
plt.colorbar()
if(save):
fig.savefig('latent_space.png')
return fig
示例12: input_fn
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import data [as 别名]
def input_fn(words, tags, params=None, shuffle_and_repeat=False):
params = params if params is not None else {}
shapes = (([None], ()), [None])
types = ((tf.string, tf.int32), tf.string)
defaults = (('<pad>', 0), '0')
dataset = tf.data.Dataset.from_generator(
functools.partial(generator_fn, words, tags),
output_shapes=shapes,
output_types=types)
if shuffle_and_repeat:
dataset = dataset.shuffle(params['buffer']).repeat(params['epochs'])
dataset = dataset.padded_batch(params.get('batch_size', 20), shapes, defaults).prefetch(1)
return dataset
## Global Logic of the model_fn
示例13: get_tf_datasets
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import data [as 别名]
def get_tf_datasets(self):
""""Exposes data splits consistent with the Tensorflow `tf.data` API.
Returns:
A dictionary mapping split names (`str`, either `"training"`, `"validation"`,
or `"test"`) to `tf.data.Dataset` objects.
"""
return self.tf_splits
示例14: get_training_set
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import data [as 别名]
def get_training_set(self):
"""Processed training set.
Returns:
A generator of elements from the training set as dictionaries mapping
component names to the corresponding data (e.g. Numpy array).
"""
return self._get_set_generator('training')
示例15: get_validation_set
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import data [as 别名]
def get_validation_set(self):
"""Processed validation set.
Returns:
A generator of elements from the training set as dictionaries mapping
component names to the corresponding data (e.g. Numpy array).
"""
return self._get_set_generator('validation')