本文整理汇总了Python中tensorflow.Record方法的典型用法代码示例。如果您正苦于以下问题:Python tensorflow.Record方法的具体用法?Python tensorflow.Record怎么用?Python tensorflow.Record使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorflow
的用法示例。
在下文中一共展示了tensorflow.Record方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse_args
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import Record [as 别名]
def parse_args():
msg = "convert inputs to tf.Record format"
usage = "input_converter.py [<args>] [-h | --help]"
parser = argparse.ArgumentParser(description=msg, usage=usage)
parser.add_argument("--input", required=True, type=str, nargs=2,
help="Path of input file")
parser.add_argument("--output_name", required=True, type=str,
help="Output name")
parser.add_argument("--output_dir", required=True, type=str,
help="Output directory")
parser.add_argument("--vocab", nargs=2, required=True, type=str,
help="Path of vocabulary")
parser.add_argument("--num_shards", default=100, type=int,
help="Number of output shards")
parser.add_argument("--shuffle", action="store_true",
help="Shuffle inputs")
parser.add_argument("--unk", default="<unk>", type=str,
help="Unknown word symbol")
parser.add_argument("--eos", default="<eos>", type=str,
help="End of sentence symbol")
return parser.parse_args()
示例2: shuffle_tf_examples
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import Record [as 别名]
def shuffle_tf_examples(gather_size, records_to_shuffle):
'''Read through tf.Record and yield shuffled, but unparsed tf.Examples
Args:
gather_size: The number of tf.Examples to be gathered together
records_to_shuffle: A list of filenames
Returns:
An iterator yielding lists of bytes, which are serialized tf.Examples.
'''
dataset = read_tf_records(gather_size, records_to_shuffle, num_repeats=1)
batch = dataset.make_one_shot_iterator().get_next()
sess = tf.Session()
while True:
try:
result = sess.run(batch)
yield list(result)
except tf.errors.OutOfRangeError:
break
示例3: parse_args
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import Record [as 别名]
def parse_args(args=None):
parser = argparse.ArgumentParser(
description="Training neural machine translation models",
usage="trainer.py [<args>] [-h | --help]"
)
# input files
parser.add_argument("--input", type=str, nargs=2,
help="Path of source and target corpus")
parser.add_argument("--record", type=str,
help="Path to tf.Record data")
parser.add_argument("--output", type=str, default="train",
help="Path to saved models")
parser.add_argument("--vocabulary", type=str, nargs=2,
help="Path of source and target vocabulary")
parser.add_argument("--validation", type=str,
help="Path of validation file")
parser.add_argument("--references", type=str, nargs="+",
help="Path of reference files")
parser.add_argument("--checkpoint", type=str,
help="Path to pre-trained checkpoint")
parser.add_argument("--half", action="store_true",
help="Enable FP16 training")
parser.add_argument("--distribute", action="store_true",
help="Enable distributed training")
# model and configuration
parser.add_argument("--model", type=str, required=True,
help="Name of the model")
parser.add_argument("--parameters", type=str, default="",
help="Additional hyper parameters")
return parser.parse_args(args)
示例4: parse_args
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import Record [as 别名]
def parse_args(args=None):
parser = argparse.ArgumentParser(
description="Training neural machine translation models",
usage="trainer.py [<args>] [-h | --help]"
)
# input files
parser.add_argument("--input", type=str, nargs=2,
help="Path of source and target corpus")
parser.add_argument("--record", type=str,
help="Path to tf.Record data")
parser.add_argument("--output", type=str, default="train",
help="Path to saved models")
parser.add_argument("--vocabulary", type=str, nargs=2,
help="Path of source and target vocabulary")
parser.add_argument("--validation", type=str,
help="Path of validation file")
parser.add_argument("--references", type=str, nargs="+",
help="Path of reference files")
# model and configuration
parser.add_argument("--model", type=str, required=True,
help="Name of the model")
parser.add_argument("--parameters", type=str, default="",
help="Additional hyper parameters")
return parser.parse_args(args)
示例5: parse_args
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import Record [as 别名]
def parse_args(args=None):
parser = argparse.ArgumentParser(
description="Training neural machine translation models",
usage="trainer.py [<args>] [-h | --help]"
)
# input files
parser.add_argument("--input", type=str, nargs=2,
help="Path of source and target corpus")
parser.add_argument("--context", type=str,
help="Path of context corpus")
parser.add_argument("--record", type=str,
help="Path to tf.Record data")
parser.add_argument("--output", type=str, default="train",
help="Path to saved models")
parser.add_argument("--vocabulary", type=str, nargs=2,
help="Path of source and target vocabulary")
parser.add_argument("--validation", type=str,
help="Path of validation file")
parser.add_argument("--references", type=str, nargs="+",
help="Path of reference files")
# model and configuration
parser.add_argument("--model", type=str, required=True,
help="Name of the model")
parser.add_argument("--parameters", type=str, default="",
help="Additional hyper parameters")
return parser.parse_args(args)
示例6: validate_spectra_array_contents
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import Record [as 别名]
def validate_spectra_array_contents(record_path_name, hparams,
spectra_array_path_name):
"""Checks that np.array containing spectra matches contents of record.
Args:
record_path_name: pathname to tf.Record file matching np.array
hparams: See get_dataset_from_record
spectra_array_path_name : pathname to spectra np.array.
Raises:
ValueError: if values in np.array stored at spectra_array_path_name
does not match the spectra values in the TFRecord stored in the
record_path_name.
"""
dataset = get_dataset_from_record(
[record_path_name],
hparams,
mode=tf.estimator.ModeKeys.EVAL,
all_data_in_one_batch=True)
feature_names = [fmap_constants.DENSE_MASS_SPEC]
label_names = [fmap_constants.INDEX_TO_GROUND_TRUTH_ARRAY]
features, labels = make_features_and_labels(
dataset, feature_names, label_names, mode=tf.estimator.ModeKeys.EVAL)
with tf.Session() as sess:
feature_values, label_values = sess.run([features, labels])
spectra_array = load_training_spectra_array(spectra_array_path_name)
for i in range(np.shape(spectra_array)[0]):
test_idx = label_values[fmap_constants.INDEX_TO_GROUND_TRUTH_ARRAY][i]
spectra_from_dataset = feature_values[fmap_constants.DENSE_MASS_SPEC][
test_idx, :]
spectra_from_array = spectra_array[test_idx, :]
if not all(spectra_from_dataset.flatten() == spectra_from_array.flatten()):
raise ValueError('np.array of spectra stored at {} does not match spectra'
' values in tf.Record {}'.format(spectra_array_path_name,
record_path_name))
return
示例7: create_tf_record_for_visualwakewords_dataset
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import Record [as 别名]
def create_tf_record_for_visualwakewords_dataset(annotations_file, image_dir,
output_path, num_shards):
"""Loads Visual WakeWords annotations/images and converts to tf.Record format.
Args:
annotations_file: JSON file containing bounding box annotations.
image_dir: Directory containing the image files.
output_path: Path to output tf.Record file.
num_shards: number of output file shards.
"""
with contextlib2.ExitStack() as tf_record_close_stack, \
tf.gfile.GFile(annotations_file, 'r') as fid:
output_tfrecords = dataset_utils.open_sharded_output_tfrecords(
tf_record_close_stack, output_path, num_shards)
groundtruth_data = json.load(fid)
images = groundtruth_data['images']
annotations_index = groundtruth_data['annotations']
annotations_index = {int(k): v for k, v in annotations_index.iteritems()}
# convert 'unicode' key to 'int' key after we parse the json file
for idx, image in enumerate(images):
if idx % 100 == 0:
tf.logging.info('On image %d of %d', idx, len(images))
annotations = annotations_index[image['id']]
tf_example = _create_tf_example(image, annotations, image_dir)
shard_idx = idx % num_shards
output_tfrecords[shard_idx].write(tf_example.SerializeToString())
示例8: _create_tf_record_from_coco_annotations
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import Record [as 别名]
def _create_tf_record_from_coco_annotations(
annotations_file, image_dir, output_path, include_masks, num_shards):
"""Loads COCO annotation json files and converts to tf.Record format.
Args:
annotations_file: JSON file containing bounding box annotations.
image_dir: Directory containing the image files.
output_path: Path to output tf.Record file.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
num_shards: number of output file shards.
"""
with contextlib2.ExitStack() as tf_record_close_stack, \
tf.gfile.GFile(annotations_file, 'r') as fid:
output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
tf_record_close_stack, output_path, num_shards)
groundtruth_data = json.load(fid)
images = groundtruth_data['images']
category_index = label_map_util.create_category_index(
groundtruth_data['categories'])
annotations_index = {}
if 'annotations' in groundtruth_data:
tf.logging.info(
'Found groundtruth annotations. Building annotations index.')
for annotation in groundtruth_data['annotations']:
image_id = annotation['image_id']
if image_id not in annotations_index:
annotations_index[image_id] = []
annotations_index[image_id].append(annotation)
missing_annotation_count = 0
for image in images:
image_id = image['id']
if image_id not in annotations_index:
missing_annotation_count += 1
annotations_index[image_id] = []
tf.logging.info('%d images are missing annotations.',
missing_annotation_count)
total_num_annotations_skipped = 0
for idx, image in enumerate(images):
if idx % 100 == 0:
tf.logging.info('On image %d of %d', idx, len(images))
annotations_list = annotations_index[image['id']]
_, tf_example, num_annotations_skipped = create_tf_example(
image, annotations_list, image_dir, category_index, include_masks)
total_num_annotations_skipped += num_annotations_skipped
shard_idx = idx % num_shards
output_tfrecords[shard_idx].write(tf_example.SerializeToString())
tf.logging.info('Finished writing, skipped %d annotations.',
total_num_annotations_skipped)
示例9: _make_training_input_fn
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import Record [as 别名]
def _make_training_input_fn(tft_working_dir,
filebase,
num_epochs=None,
shuffle=True,
batch_size=200,
buffer_size=None,
prefetch_buffer_size=1):
"""Creates an input function reading from transformed data.
Args:
tft_working_dir: Directory to read transformed data and metadata from and to
write exported model to.
filebase: Base filename (relative to `tft_working_dir`) of examples.
num_epochs: int how many times through to read the data. If None will loop
through data indefinitely
shuffle: bool, whether or not to randomize the order of data. Controls
randomization of both file order and line order within files.
batch_size: Batch size
buffer_size: Buffer size for the shuffle
prefetch_buffer_size: Number of example to prefetch
Returns:
The input function for training or eval.
"""
if buffer_size is None:
buffer_size = 2 * batch_size + 1
# Examples have already been transformed so we only need the feature_columns
# to parse the single the tf.Record
transformed_metadata = metadata_io.read_metadata(
os.path.join(tft_working_dir, transform_fn_io.TRANSFORMED_METADATA_DIR))
transformed_feature_spec = transformed_metadata.schema.as_feature_spec()
def parser(record):
"""Help function to parse tf.Example."""
parsed = tf.parse_single_example(record, transformed_feature_spec)
label = parsed.pop(LABEL_KEY)
return parsed, label
def input_fn():
"""Input function for training and eval."""
files = tf.data.Dataset.list_files(
os.path.join(tft_working_dir, filebase + '*'))
dataset = files.interleave(
tf.data.TFRecordDataset, cycle_length=4, block_length=16)
dataset = dataset.map(parser)
if shuffle:
dataset = dataset.shuffle(buffer_size)
dataset = dataset.repeat(num_epochs)
dataset = dataset.batch(batch_size)
dataset = dataset.prefetch(prefetch_buffer_size)
iterator = dataset.make_one_shot_iterator()
transformed_features, transformed_labels = iterator.get_next()
return transformed_features, transformed_labels
return input_fn
示例10: _create_tf_record_from_coco_annotations
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import Record [as 别名]
def _create_tf_record_from_coco_annotations(
annotations_file, image_dir, output_path, include_masks):
"""Loads COCO annotation json files and converts to tf.Record format.
Args:
annotations_file: JSON file containing bounding box annotations.
image_dir: Directory containing the image files.
output_path: Path to output tf.Record file.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
"""
with tf.gfile.GFile(annotations_file, 'r') as fid:
groundtruth_data = json.load(fid)
images = groundtruth_data['images']
category_index = label_map_util.create_category_index(
groundtruth_data['categories'])
annotations_index = {}
if 'annotations' in groundtruth_data:
tf.logging.info(
'Found groundtruth annotations. Building annotations index.')
for annotation in groundtruth_data['annotations']:
image_id = annotation['image_id']
if image_id not in annotations_index:
annotations_index[image_id] = []
annotations_index[image_id].append(annotation)
missing_annotation_count = 0
for image in images:
image_id = image['id']
if image_id not in annotations_index:
missing_annotation_count += 1
annotations_index[image_id] = []
tf.logging.info('%d images are missing annotations.',
missing_annotation_count)
tf.logging.info('writing to output path: %s', output_path)
writer = tf.python_io.TFRecordWriter(output_path)
total_num_annotations_skipped = 0
for idx, image in enumerate(images):
if idx % 100 == 0:
tf.logging.info('On image %d of %d', idx, len(images))
annotations_list = annotations_index[image['id']]
_, tf_example, num_annotations_skipped = create_tf_example(
image, annotations_list, image_dir, category_index, include_masks)
total_num_annotations_skipped += num_annotations_skipped
writer.write(tf_example.SerializeToString())
writer.close()
tf.logging.info('Finished writing, skipped %d annotations.',
total_num_annotations_skipped)
示例11: _create_tf_record_from_coco_annotations
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import Record [as 别名]
def _create_tf_record_from_coco_annotations(
object_annotations_file,
caption_annotations_file,
image_dir, output_path, include_masks, num_shards):
"""Loads COCO annotation json files and converts to tf.Record format.
Args:
object_annotations_file: JSON file containing bounding box annotations.
caption_annotations_file: JSON file containing caption annotations.
image_dir: Directory containing the image files.
output_path: Path to output tf.Record file.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
num_shards: Number of output files to create.
"""
tf.logging.info('writing to output path: %s', output_path)
writers = [
tf.python_io.TFRecordWriter(output_path + '-%05d-of-%05d.tfrecord' %
(i, num_shards)) for i in range(num_shards)
]
images, img_to_obj_annotation, category_index = (
_load_object_annotations(object_annotations_file))
img_to_caption_annotation = (
_load_caption_annotations(caption_annotations_file))
pool = multiprocessing.Pool()
total_num_annotations_skipped = 0
for idx, (_, tf_example, num_annotations_skipped) in enumerate(
pool.imap(_pool_create_tf_example,
[(image,
img_to_obj_annotation[image['id']],
img_to_caption_annotation[image['id']],
image_dir,
category_index,
include_masks)
for image in images])):
if idx % 100 == 0:
tf.logging.info('On image %d of %d', idx, len(images))
total_num_annotations_skipped += num_annotations_skipped
writers[idx % num_shards].write(tf_example.SerializeToString())
pool.close()
pool.join()
for writer in writers:
writer.close()
tf.logging.info('Finished writing, skipped %d annotations.',
total_num_annotations_skipped)
示例12: write_dicts_to_example
# 需要导入模块: import tensorflow [as 别名]
# 或者: from tensorflow import Record [as 别名]
def write_dicts_to_example(mol_list,
record_path_name,
max_atoms,
max_mass_spec_peak_loc,
true_library_array_path_name=None):
"""Helper function for writing tf.record from all examples.
Uses dict_to_tfexample to write the actual tf.example
Args:
mol_list : list of rdkit.Mol objects
record_path_name : file name for storing tf record
max_atoms : max. number of atoms to consider in a molecule.
max_mass_spec_peak_loc : largest mass/charge ratio to allow in a spectra
true_library_array_path_name: path for storing np.array of true spectra
Returns:
- Writes tf.Record of an example for each eligible molecule
(i.e. # atoms < max_atoms)
- Writes np.array (len(mol_list), max_mass_spec_peak_loc) to
true_library_array_path_name if it is defined.
"""
options = tf.python_io.TFRecordOptions(
tf.python_io.TFRecordCompressionType.ZLIB)
# Wrapper function to add index value to dictionary
if true_library_array_path_name:
spectra_matrix = np.zeros((len(mol_list), max_mass_spec_peak_loc))
def make_mol_dict_with_saved_array(idx, mol):
mol_dict = make_mol_dict(mol, max_atoms, max_mass_spec_peak_loc)
mol_dict[fmap_constants.INDEX_TO_GROUND_TRUTH_ARRAY] = idx
spectra_matrix[idx, :] = mol_dict[fmap_constants.DENSE_MASS_SPEC]
return mol_dict
make_mol_dict_fn = make_mol_dict_with_saved_array
else:
def make_mol_dict_without_saved_array(idx, mol):
del idx
return make_mol_dict(mol, max_atoms, max_mass_spec_peak_loc)
make_mol_dict_fn = make_mol_dict_without_saved_array
with tf.python_io.TFRecordWriter(record_path_name, options) as writer:
for idx, mol in enumerate(mol_list):
mol_dict = make_mol_dict_fn(idx, mol)
example = dict_to_tfexample(mol_dict)
writer.write(example.SerializeToString())
if true_library_array_path_name:
with tf.gfile.Open(true_library_array_path_name, 'w') as f:
np.save(f, spectra_matrix)