本文整理汇总了Python中tensorflow.record方法的典型用法代码示例。如果您正苦于以下问题:Python tensorflow.record方法的具体用法?Python tensorflow.record怎么用?Python tensorflow.record使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorflow
的用法示例。
在下文中一共展示了tensorflow.record方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _process_dataset
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import record [as 别名]
def _process_dataset(filenames,
output_directory,
prefix,
num_shards):
"""Processes and saves list of audio files as TFRecords.
Args:
filenames: list of strings; each string is a path to an audio file
channel_names: list of strings; each string is a channel name (vocals, bass, drums etc)
labels: map of string to integer; id for all channel name
output_directory: path where output files should be created
prefix: string; prefix for each file
num_shards: number of chucks to split the filenames into
Returns:
files: list of tf-record filepaths created from processing the dataset.
"""
_check_or_create_dir(output_directory)
chunksize = int(math.ceil(len(filenames) / num_shards))
pool = Pool(multiprocessing.cpu_count()-1)
def output_file(shard_idx):
return os.path.join(output_directory, '%s-%.5d-of-%.5d' % (prefix, shard_idx, num_shards))
# chunk data consists of chunk_filenames and output_file
chunk_data = [(filenames[shard * chunksize: (shard + 1) * chunksize],
output_file(shard)) for shard in range(num_shards)]
files = pool.map(_process_audio_files_batch, chunk_data)
return files
示例2: _process_dataset
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import record [as 别名]
def _process_dataset(filenames,
output_directory,
prefix,
num_shards):
"""Processes and saves list of audio files as TFRecords.
Args:
filenames: list of strings; each string is a path to an audio file
channel_names: list of strings; each string is a channel name (vocals, bass, drums etc)
labels: map of string to integer; id for all channel name
output_directory: path where output files should be created
prefix: string; prefix for each file
num_shards: number of chucks to split the filenames into
Returns:
files: list of tf-record filepaths created from processing the dataset.
"""
_check_or_create_dir(output_directory)
chunksize = int(math.ceil(len(filenames) / float(num_shards)))
pool = Pool(multiprocessing.cpu_count()-1)
def output_file(shard_idx):
return os.path.join(output_directory, '%s-%.5d-of-%.5d' % (prefix, shard_idx, num_shards))
# chunk data consists of chunk_filenames and output_file
chunk_data = [(filenames[shard * chunksize: (shard + 1) * chunksize],
output_file(shard)) for shard in range(num_shards)]
files = pool.map(_process_audio_files_batch, chunk_data)
return files
示例3: dataset_parser
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import record [as 别名]
def dataset_parser(self, value):
"""Parse an audio example record from a serialized string Tensor."""
keys_to_features = {
'audio/file_basename':
tf.FixedLenFeature([], tf.string, ''),
'audio/encoded':
tf.VarLenFeature(tf.float32),
'audio/sample_rate':
tf.FixedLenFeature([], tf.int64, SAMPLE_RATE),
'audio/sample_idx':
tf.FixedLenFeature([], tf.int64, -1),
'audio/num_samples':
tf.FixedLenFeature([], tf.int64, NUM_SAMPLES),
'audio/channels':
tf.FixedLenFeature([], tf.int64, CHANNELS),
'audio/num_sources':
tf.FixedLenFeature([], tf.int64, NUM_SOURCES)
}
parsed = tf.parse_single_example(value, keys_to_features)
audio_data = tf.sparse_tensor_to_dense(parsed['audio/encoded'], default_value=0)
audio_shape = tf.stack([MIX_WITH_PADDING + NUM_SOURCES*NUM_SAMPLES])
audio_data = tf.reshape(audio_data, audio_shape)
mix, sources = tf.reshape(audio_data[:MIX_WITH_PADDING], tf.stack([MIX_WITH_PADDING, CHANNELS])), \
tf.reshape(audio_data[MIX_WITH_PADDING:], tf.stack([NUM_SOURCES, NUM_SAMPLES, CHANNELS]))
mix = tf.cast(mix, tf.bfloat16)
sources = tf.cast(sources, tf.bfloat16)
if self.is_training:
features = {'mix': mix}
else:
features = {'mix': mix, 'filename': parsed['audio/file_basename'], 'sample_id': parsed['audio/sample_idx']}
return features, sources
示例4: create_record
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import record [as 别名]
def create_record(record_path,
data,
labels,
height,
width,
channels):
"""
Fuction to create one tf.record using two numpy arrays.
The array in data is expected to be flat.
:param record_path: path to save the tf.record
:type record_path: str
:param data: dataset
:type data: np.array
:param label: labels
:type label: np.array
:param height: image height
:type height: int
:param width: image width
:type width: int
:param channels: image channels
:type channels: int
"""
assert data.shape[1] == height * width * channels
writer = tf.python_io.TFRecordWriter(record_path)
for i, e in enumerate(data):
img_str = data[i].tostring()
label_str = labels[i].tostring()
example = tf.train.Example(features=tf.train.Features(feature={
'height': _int64_feature(height),
'width': _int64_feature(width),
'channels': _int64_feature(channels),
'image_raw': _bytes_feature(img_str),
'labels_raw': _bytes_feature(label_str)}))
writer.write(example.SerializeToString())
writer.close()
示例5: filed_based_convert_examples_to_features
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import record [as 别名]
def filed_based_convert_examples_to_features(
examples, label_list, max_seq_length, tokenizer, output_file, mode=None
):
"""
将数据转化为TF_Record 结构,作为模型数据输入
:param examples: 样本
:param label_list:标签list
:param max_seq_length: 预先设定的最大序列长度
:param tokenizer: tokenizer 对象
:param output_file: tf.record 输出路径
:param mode:
:return:
"""
writer = tf.python_io.TFRecordWriter(output_file)
# 遍历训练数据
for (ex_index, example) in enumerate(examples):
if ex_index % 5000 == 0:
tf.logging.info("Writing example %d of %d" % (ex_index, len(examples)))
# 对于每一个训练样本,
feature = convert_single_example(ex_index, example, label_list, max_seq_length, tokenizer, mode)
def create_int_feature(values):
f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
return f
features = collections.OrderedDict()
features["input_ids"] = create_int_feature(feature.input_ids)
features["input_mask"] = create_int_feature(feature.input_mask)
features["segment_ids"] = create_int_feature(feature.segment_ids)
features["label_ids"] = create_int_feature(feature.label_ids)
# features["label_mask"] = create_int_feature(feature.label_mask)
# tf.train.Example/Feature 是一种协议,方便序列化???
tf_example = tf.train.Example(features=tf.train.Features(feature=features))
writer.write(tf_example.SerializeToString())
示例6: file_based_input_fn_builder
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import record [as 别名]
def file_based_input_fn_builder(input_file, seq_length, is_training, drop_remainder):
name_to_features = {
"input_ids": tf.FixedLenFeature([seq_length], tf.int64),
"input_mask": tf.FixedLenFeature([seq_length], tf.int64),
"segment_ids": tf.FixedLenFeature([seq_length], tf.int64),
"label_ids": tf.FixedLenFeature([seq_length], tf.int64),
# "label_ids":tf.VarLenFeature(tf.int64),
# "label_mask": tf.FixedLenFeature([seq_length], tf.int64),
}
def _decode_record(record, name_to_features):
example = tf.parse_single_example(record, name_to_features)
for name in list(example.keys()):
t = example[name]
if t.dtype == tf.int64:
t = tf.to_int32(t)
example[name] = t
return example
def input_fn(params):
batch_size = params["batch_size"]
d = tf.data.TFRecordDataset(input_file)
if is_training:
d = d.repeat()
d = d.shuffle(buffer_size=100)
d = d.apply(tf.contrib.data.map_and_batch(
lambda record: _decode_record(record, name_to_features),
batch_size=batch_size,
drop_remainder=drop_remainder
))
return d
return input_fn
示例7: validate_spectra_array_contents
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import record [as 别名]
def validate_spectra_array_contents(record_path_name, hparams,
spectra_array_path_name):
"""Checks that np.array containing spectra matches contents of record.
Args:
record_path_name: pathname to tf.Record file matching np.array
hparams: See get_dataset_from_record
spectra_array_path_name : pathname to spectra np.array.
Raises:
ValueError: if values in np.array stored at spectra_array_path_name
does not match the spectra values in the TFRecord stored in the
record_path_name.
"""
dataset = get_dataset_from_record(
[record_path_name],
hparams,
mode=tf.estimator.ModeKeys.EVAL,
all_data_in_one_batch=True)
feature_names = [fmap_constants.DENSE_MASS_SPEC]
label_names = [fmap_constants.INDEX_TO_GROUND_TRUTH_ARRAY]
features, labels = make_features_and_labels(
dataset, feature_names, label_names, mode=tf.estimator.ModeKeys.EVAL)
with tf.Session() as sess:
feature_values, label_values = sess.run([features, labels])
spectra_array = load_training_spectra_array(spectra_array_path_name)
for i in range(np.shape(spectra_array)[0]):
test_idx = label_values[fmap_constants.INDEX_TO_GROUND_TRUTH_ARRAY][i]
spectra_from_dataset = feature_values[fmap_constants.DENSE_MASS_SPEC][
test_idx, :]
spectra_from_array = spectra_array[test_idx, :]
if not all(spectra_from_dataset.flatten() == spectra_from_array.flatten()):
raise ValueError('np.array of spectra stored at {} does not match spectra'
' values in tf.Record {}'.format(spectra_array_path_name,
record_path_name))
return
示例8: dataset_parser
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import record [as 别名]
def dataset_parser(self, value):
"""Parse an audio example record from a serialized string Tensor."""
keys_to_features = {
'audio/file_basename':
tf.FixedLenFeature([], tf.int64, -1),
'audio/encoded':
tf.VarLenFeature(tf.float32),
'audio/sample_rate':
tf.FixedLenFeature([], tf.int64, SAMPLE_RATE),
'audio/sample_idx':
tf.FixedLenFeature([], tf.int64, -1),
'audio/num_samples':
tf.FixedLenFeature([], tf.int64, NUM_SAMPLES),
'audio/channels':
tf.FixedLenFeature([], tf.int64, CHANNELS),
'audio/labels':
tf.VarLenFeature(tf.int64),
'audio/num_sources':
tf.FixedLenFeature([], tf.int64, NUM_SOURCES),
'audio/source_names':
tf.FixedLenFeature([], tf.string, ''),
}
parsed = tf.parse_single_example(value, keys_to_features)
audio_data = tf.sparse_tensor_to_dense(parsed['audio/encoded'], default_value=0)
audio_shape = tf.stack([MIX_WITH_PADDING + NUM_SOURCES*NUM_SAMPLES])
audio_data = tf.reshape(audio_data, audio_shape)
mix, sources = tf.reshape(audio_data[:MIX_WITH_PADDING], tf.stack([MIX_WITH_PADDING, CHANNELS])),tf.reshape(audio_data[MIX_WITH_PADDING:], tf.stack([NUM_SOURCES, NUM_SAMPLES, CHANNELS]))
labels = tf.sparse_tensor_to_dense(parsed['audio/labels'])
labels = tf.reshape(labels, tf.stack([NUM_SOURCES]))
if self.use_bfloat16:
mix = tf.cast(mix, tf.bfloat16)
labels = tf.cast(labels, tf.bfloat16)
sources = tf.cast(sources, tf.bfloat16)
if self.mode == 'train':
features = {'mix': mix,
'labels': labels}
elif self.mode == 'eval':
features = {'mix': mix,
'labels': labels}
else:
features = {'mix': mix, 'filename': parsed['audio/file_basename'],
'sample_id': parsed['audio/sample_idx'], 'labels': labels}
return features, sources
示例9: write_dicts_to_example
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import record [as 别名]
def write_dicts_to_example(mol_list,
record_path_name,
max_atoms,
max_mass_spec_peak_loc,
true_library_array_path_name=None):
"""Helper function for writing tf.record from all examples.
Uses dict_to_tfexample to write the actual tf.example
Args:
mol_list : list of rdkit.Mol objects
record_path_name : file name for storing tf record
max_atoms : max. number of atoms to consider in a molecule.
max_mass_spec_peak_loc : largest mass/charge ratio to allow in a spectra
true_library_array_path_name: path for storing np.array of true spectra
Returns:
- Writes tf.Record of an example for each eligible molecule
(i.e. # atoms < max_atoms)
- Writes np.array (len(mol_list), max_mass_spec_peak_loc) to
true_library_array_path_name if it is defined.
"""
options = tf.python_io.TFRecordOptions(
tf.python_io.TFRecordCompressionType.ZLIB)
# Wrapper function to add index value to dictionary
if true_library_array_path_name:
spectra_matrix = np.zeros((len(mol_list), max_mass_spec_peak_loc))
def make_mol_dict_with_saved_array(idx, mol):
mol_dict = make_mol_dict(mol, max_atoms, max_mass_spec_peak_loc)
mol_dict[fmap_constants.INDEX_TO_GROUND_TRUTH_ARRAY] = idx
spectra_matrix[idx, :] = mol_dict[fmap_constants.DENSE_MASS_SPEC]
return mol_dict
make_mol_dict_fn = make_mol_dict_with_saved_array
else:
def make_mol_dict_without_saved_array(idx, mol):
del idx
return make_mol_dict(mol, max_atoms, max_mass_spec_peak_loc)
make_mol_dict_fn = make_mol_dict_without_saved_array
with tf.python_io.TFRecordWriter(record_path_name, options) as writer:
for idx, mol in enumerate(mol_list):
mol_dict = make_mol_dict_fn(idx, mol)
example = dict_to_tfexample(mol_dict)
writer.write(example.SerializeToString())
if true_library_array_path_name:
with tf.gfile.Open(true_library_array_path_name, 'w') as f:
np.save(f, spectra_matrix)