本文整理汇总了Python中tensorflow.records方法的典型用法代码示例。如果您正苦于以下问题:Python tensorflow.records方法的具体用法?Python tensorflow.records怎么用?Python tensorflow.records使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorflow
的用法示例。
在下文中一共展示了tensorflow.records方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import records [as 别名]
def main(argv): # pylint: disable=unused-argument
tf.logging.set_verbosity(tf.logging.INFO)
if FLAGS.project is None:
raise ValueError('GCS Project must be provided.')
if FLAGS.gcs_output_path is None:
raise ValueError('GCS output path must be provided.')
elif not FLAGS.gcs_output_path.startswith('gs://'):
raise ValueError('GCS output path must start with gs://')
if FLAGS.local_scratch_dir is None:
raise ValueError('Scratch directory path must be provided.')
# Download the dataset if it is not present locally
raw_data_dir = FLAGS.raw_data_dir
# Convert the raw data into tf-records
training_records, test_records = convert_to_tf_records(raw_data_dir)
# Upload to GCS
upload_to_gcs(training_records, test_records)
示例2: main
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import records [as 别名]
def main(argv): # pylint: disable=unused-argument
tf.logging.set_verbosity(tf.logging.INFO)
if FLAGS.project is None:
raise ValueError('GCS Project must be provided.')
if FLAGS.gcs_output_path is None:
raise ValueError('GCS output path must be provided.')
elif not FLAGS.gcs_output_path.startswith('gs://'):
raise ValueError('GCS output path must start with gs://')
if FLAGS.local_scratch_dir is None:
raise ValueError('Scratch directory path must be provided.')
# Download the dataset if it is not present locally
raw_data_dir = FLAGS.raw_data_dir
# Convert the raw data into tf-records
training_records, test_records = convert_to_tf_records(raw_data_dir)
# Upload to GCS
# upload_to_gcs(training_records, test_records)
示例3: write_tf_examples
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import records [as 别名]
def write_tf_examples(filename, tf_examples, serialize=True):
'''
Args:
filename: Where to write tf.records
tf_examples: An iterable of tf.Example
serialize: whether to serialize the examples.
'''
with tf.python_io.TFRecordWriter(
filename, options=TF_RECORD_CONFIG) as writer:
for ex in tf_examples:
if serialize:
writer.write(ex.SerializeToString())
else:
writer.write(ex)
# Read tf.Example from files
示例4: check_data
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import records [as 别名]
def check_data(self, tfrecords_filename):
"""Checks a specified tf.Records file for coreect dataformat.
Check if the data format in the example files is correct. Prints the shape of the data
stored in a tf.Records file.
Args
tfrecords_filename: `str`, the path to the `tf.records` file to check.
"""
record_iterator = tf.python_io.tf_record_iterator(path=tfrecords_filename)
for string_record in record_iterator:
# Parse the next example
example = tf.train.Example()
example.ParseFromString(string_record)
# Get the features you stored (change to match your tfrecord writing code)
seq = (example.features.feature['seq_raw']
.bytes_list
.value[0])
label = (example.features.feature['label_raw']
.bytes_list
.value[0])
# Convert to a numpy array (change dtype to the datatype you stored)
seq_array = np.fromstring(seq, dtype=np.float64)
label_array = np.fromstring(label, dtype=np.float64)
# Print the image shape; does it match your expectations?
print(seq_array.shape)
print(label_array.shape)
示例5: write_tf_examples
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import records [as 别名]
def write_tf_examples(filename, tf_examples, serialize=True):
"""
Args:
filename: Where to write tf.records
tf_examples: An iterable of tf.Example
serialize: whether to serialize the examples.
"""
with tf.python_io.TFRecordWriter(
filename, options=TF_RECORD_CONFIG) as writer:
for ex in tf_examples:
if serialize:
writer.write(ex.SerializeToString())
else:
writer.write(ex)
示例6: _process_audio_files_batch
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import records [as 别名]
def _process_audio_files_batch(chunk_data):
"""Processes and saves list of audio files as TFRecords.
Args:
chunk_data: tuple of chunk_files and output_file
chunk_files: list of strings; each string is a path to an image file
output_file: string, unique identifier specifying the data set
"""
chunk_files, output_file = chunk_data[0], chunk_data[1]
# Get training files from the directory name
writer = tf.python_io.TFRecordWriter(output_file)
chunk_data_cache = list()
for filename in chunk_files:
# load all wave files into memory and create a buffer
file_data_cache = list()
for source in CHANNEL_NAMES:
data, sr = librosa.core.load(filename+source, sr=SAMPLE_RATE, mono=True)
file_data_cache.append([filename, len(data), data])
# Option 1: use only tf to read and resample audio
# audio_binary = tf.read_file(filename+source)
# wav_decoder = contrib_audio.decode_wav(
# audio_binary,
# desired_channels=CHANNELS)
# Option 2: use Soundfile and read binary files
# SoundFile should be much more faster but it doesn't matter because we store everything in tf.records
# with sf.SoundFile(filename+source, "r") as f:
# print(filename+source, f.samplerate, f.channels, len(f), f.read().tobytes())
for segment in _get_segments_from_audio_cache(file_data_cache):
chunk_data_cache.append(segment)
# shuffle all segments
shuffle_idx = make_shuffle_idx(len(chunk_data_cache))
chunk_data_cache = [chunk_data_cache[i] for i in shuffle_idx]
for chunk in chunk_data_cache:
example = _convert_to_example(filename=chunk[0], sample_idx=chunk[1], data_buffer=chunk[2])
writer.write(example.SerializeToString())
writer.close()
tf.logging.info('Finished writing file: %s' % output_file)
示例7: _process_audio_files_batch
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import records [as 别名]
def _process_audio_files_batch(chunk_data):
"""Processes and saves list of audio files as TFRecords.
Args:
chunk_data: tuple of chunk_files and output_file
chunk_files: list of strings; each string is a path to an wav file
output_file: string, unique identifier specifying the data set
"""
chunk_files, output_file = chunk_data[0], chunk_data[1]
# Get training files from the directory name
writer = tf.python_io.TFRecordWriter(output_file)
chunk_data_cache = list()
for track in chunk_files:
# load all wave files into memory and create a buffer
file_data_cache = list()
for source in track:
data, sr = librosa.core.load(source, sr=SAMPLE_RATE, mono=True)
file_data_cache.append([track, len(data), data])
# Option 1: use only tf to read and resample audio
# audio_binary = tf.read_file(filename+source)
# wav_decoder = contrib_audio.decode_wav(
# audio_binary,
# desired_channels=CHANNELS)
# Option 2: use Soundfile and read binary files
# SoundFile should be much more faster but it doesn't matter because we store everything in tf.records
# with sf.SoundFile(filename+source, "r") as f:
# print(filename+source, f.samplerate, f.channels, len(f), f.read().tobytes())
for segment in _get_segments_from_audio_cache(file_data_cache):
chunk_data_cache.append(segment)
# shuffle all segments
shuffle_idx = make_shuffle_idx(len(chunk_data_cache))
chunk_data_cache = [chunk_data_cache[i] for i in shuffle_idx]
for chunk in chunk_data_cache:
labels = get_labels_from_filename(chunk[0])
example = _convert_to_example(filename=chunk[0], sample_idx=chunk[1],
data_buffer=chunk[2], num_sources=chunk[3],
labels=labels)
writer.write(example.SerializeToString())
writer.close()
tf.logging.info('Finished writing file: %s' % output_file)
示例8: read_tf_records
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import records [as 别名]
def read_tf_records(batch_size, tf_records, num_repeats=None,
shuffle_records=True, shuffle_examples=True,
shuffle_buffer_size=None,
filter_amount=1.0):
'''
Args:
batch_size: batch size to return
tf_records: a list of tf_record filenames
num_repeats: how many times the data should be read (default: infinite)
shuffle_records: whether to shuffle the order of files read
shuffle_examples: whether to shuffle the tf.Examples
shuffle_buffer_size: how big of a buffer to fill before shuffling.
filter_amount: what fraction of records to keep
Returns:
a tf dataset of batched tensors
'''
if shuffle_buffer_size is None:
shuffle_buffer_size = SHUFFLE_BUFFER_SIZE
if shuffle_records:
random.shuffle(tf_records)
record_list = tf.data.Dataset.from_tensor_slices(tf_records)
# compression_type here must agree with write_tf_examples
# cycle_length = how many tfrecord files are read in parallel
# block_length = how many tf.Examples are read from each file before
# moving to the next file
# The idea is to shuffle both the order of the files being read,
# and the examples being read from the files.
dataset = record_list.interleave(lambda x:
tf.data.TFRecordDataset(
x, compression_type='ZLIB'),
cycle_length=64, block_length=16)
# The sampling dataset replaces filter dataset with lambda function below.
# Its a faster implemenation of the filter dataset with this specific lambda
# function.
#dataset = dataset.sampling(filter_amount)
dataset = dataset.filter(lambda x: tf.less(
tf.random_uniform([1]), filter_amount)[0])
# TODO(amj): apply py_func for transforms here.
if num_repeats is not None:
dataset = dataset.repeat(num_repeats)
else:
dataset = dataset.repeat()
if shuffle_examples:
dataset = dataset.shuffle(buffer_size=shuffle_buffer_size)
dataset = dataset.batch(batch_size)
return dataset
示例9: read_tf_records
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import records [as 别名]
def read_tf_records(batch_size, tf_records, num_repeats=None,
shuffle_records=True, shuffle_examples=True,
shuffle_buffer_size=None,
filter_amount=1.0):
'''
Args:
batch_size: batch size to return
tf_records: a list of tf_record filenames
num_repeats: how many times the data should be read (default: infinite)
shuffle_records: whether to shuffle the order of files read
shuffle_examples: whether to shuffle the tf.Examples
shuffle_buffer_size: how big of a buffer to fill before shuffling.
filter_amount: what fraction of records to keep
Returns:
a tf dataset of batched tensors
'''
if shuffle_buffer_size is None:
shuffle_buffer_size = SHUFFLE_BUFFER_SIZE
if shuffle_records:
random.shuffle(tf_records)
record_list = tf.data.Dataset.from_tensor_slices(tf_records)
# compression_type here must agree with write_tf_examples
# cycle_length = how many tfrecord files are read in parallel
# block_length = how many tf.Examples are read from each file before
# moving to the next file
# The idea is to shuffle both the order of the files being read,
# and the examples being read from the files.
dataset = record_list.interleave(lambda x:
tf.data.TFRecordDataset(
x, compression_type='ZLIB'),
cycle_length=64, block_length=16)
# The sampling dataset replaces filter dataset with lambda function below.
# Its a faster implemenation of the filter dataset with this specific lambda
# function.
dataset = dataset.sampling(filter_amount)
#dataset = dataset.filter(lambda x: tf.less(
# tf.random_uniform([1]), filter_amount)[0])
# TODO(amj): apply py_func for transforms here.
if num_repeats is not None:
dataset = dataset.repeat(num_repeats)
else:
dataset = dataset.repeat()
if shuffle_examples:
dataset = dataset.shuffle(buffer_size=shuffle_buffer_size)
dataset = dataset.batch(batch_size)
return dataset
示例10: read_tf_records
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import records [as 别名]
def read_tf_records(batch_size, tf_records, num_repeats=1,
shuffle_records=True, shuffle_examples=True,
shuffle_buffer_size=None, interleave=True,
filter_amount=1.0):
"""
Args:
batch_size: batch size to return
tf_records: a list of tf_record filenames
num_repeats: how many times the data should be read (default: One)
shuffle_records: whether to shuffle the order of files read
shuffle_examples: whether to shuffle the tf.Examples
shuffle_buffer_size: how big of a buffer to fill before shuffling.
interleave: iwhether to interleave examples from multiple tf_records
filter_amount: what fraction of records to keep
Returns:
a tf dataset of batched tensors
"""
if shuffle_examples and not shuffle_buffer_size:
raise ValueError("Must set shuffle buffer size if shuffling examples")
tf_records = list(tf_records)
if shuffle_records:
random.shuffle(tf_records)
record_list = tf.data.Dataset.from_tensor_slices(tf_records)
# compression_type here must agree with write_tf_examples
map_func = functools.partial(
tf.data.TFRecordDataset,
buffer_size=8 * 1024 * 1024,
compression_type='ZLIB')
if interleave:
# cycle_length = how many tfrecord files are read in parallel
# The idea is to shuffle both the order of the files being read,
# and the examples being read from the files.
dataset = record_list.apply(tf.data.experimental.parallel_interleave(
map_func, cycle_length=64, sloppy=True))
else:
dataset = record_list.flat_map(map_func)
if filter_amount < 1.0:
dataset = dataset.filter(
lambda _: tf.random_uniform([]) < filter_amount)
dataset = dataset.repeat(num_repeats)
if shuffle_examples:
dataset = dataset.shuffle(buffer_size=shuffle_buffer_size)
dataset = dataset.batch(batch_size)
return dataset