本文整理匯總了Python中tensor2tensor.data_generators.generator_utils.generate_files方法的典型用法代碼示例。如果您正苦於以下問題:Python generator_utils.generate_files方法的具體用法?Python generator_utils.generate_files怎麽用?Python generator_utils.generate_files使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類tensor2tensor.data_generators.generator_utils
的用法示例。
在下文中一共展示了generator_utils.generate_files方法的13個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: generate_data_for_problem
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data_for_problem(problem):
"""Generate data for a problem in _SUPPORTED_PROBLEM_GENERATORS."""
training_gen, dev_gen = _SUPPORTED_PROBLEM_GENERATORS[problem]
num_shards = FLAGS.num_shards or 10
tf.logging.info("Generating training data for %s.", problem)
train_output_files = generator_utils.train_data_filenames(
problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, num_shards)
generator_utils.generate_files(training_gen(), train_output_files,
FLAGS.max_cases)
tf.logging.info("Generating development data for %s.", problem)
dev_output_files = generator_utils.dev_data_filenames(
problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, 1)
generator_utils.generate_files(dev_gen(), dev_output_files)
all_output_files = train_output_files + dev_output_files
generator_utils.shuffle_dataset(all_output_files)
示例2: generate_data
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
train_paths = self.training_filepaths(
data_dir, self.num_shards, shuffled=False)
dev_paths = self.dev_filepaths(
data_dir, self.num_dev_shards, shuffled=False)
test_paths = self.test_filepaths(
data_dir, self.num_test_shards, shuffled=True)
generator_utils.generate_files(
self.generator(data_dir, tmp_dir, self.TEST_DATASETS), test_paths)
if self.use_train_shards_for_dev:
all_paths = train_paths + dev_paths
generator_utils.generate_files(
self.generator(data_dir, tmp_dir, self.TRAIN_DATASETS), all_paths)
generator_utils.shuffle_dataset(all_paths)
else:
generator_utils.generate_dataset_and_shuffle(
self.generator(data_dir, tmp_dir, self.TRAIN_DATASETS), train_paths,
self.generator(data_dir, tmp_dir, self.DEV_DATASETS), dev_paths)
示例3: generate_data
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
filepath_fns = {
problem.DatasetSplit.TRAIN: self.training_filepaths,
problem.DatasetSplit.EVAL: self.dev_filepaths,
problem.DatasetSplit.TEST: self.test_filepaths,
}
split_paths = [(split["split"], filepath_fns[split["split"]](
data_dir, split["shards"], shuffled=False))
for split in self.dataset_splits]
all_paths = []
for _, paths in split_paths:
all_paths.extend(paths)
if self.is_generate_per_split:
for split, paths in split_paths:
generator_utils.generate_files(
self.generate_samples(data_dir, tmp_dir, split), paths)
else:
generator_utils.generate_files(
self.generate_samples(data_dir, tmp_dir, problem.DatasetSplit.TRAIN),
all_paths)
generator_utils.shuffle_dataset(all_paths)
示例4: generate_data
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
"""Generates training/dev data.
Args:
data_dir: a string
tmp_dir: a string
task_id: an optional integer
Returns:
shard or shards for which data was generated.
"""
tf.logging.info("generate_data task_id=%s" % task_id)
encoder = self.get_or_create_vocab(data_dir, tmp_dir)
assert task_id >= 0 and task_id < self.num_generate_tasks
if task_id < self.num_train_shards:
out_file = self.training_filepaths(
data_dir, self.num_train_shards, shuffled=False)[task_id]
else:
out_file = self.dev_filepaths(
data_dir, self.num_dev_shards,
shuffled=False)[task_id - self.num_train_shards]
generator_utils.generate_files(
self.example_generator(encoder, tmp_dir, task_id), [out_file])
generator_utils.shuffle_dataset([out_file])
示例5: testGenerateFiles
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def testGenerateFiles(self):
tmp_dir = self.get_temp_dir()
(_, tmp_file_path) = tempfile.mkstemp(dir=tmp_dir)
tmp_file_name = os.path.basename(tmp_file_path)
# Generate a trivial file and assert the file exists.
def test_generator():
yield {"inputs": [1], "target": [1]}
filenames = generator_utils.train_data_filenames(tmp_file_name, tmp_dir, 1)
generator_utils.generate_files(test_generator(), filenames)
self.assertTrue(tf.gfile.Exists(tmp_file_path + "-train-00000-of-00001"))
# Clean up.
os.remove(tmp_file_path + "-train-00000-of-00001")
os.remove(tmp_file_path)
示例6: generate_data
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data(self, data_dir, tmp_dir=None, task_id=-1):
"""Saves the current epoch rollouts to disk, split into train/dev sets."""
if not self._rollouts_by_epoch_and_split[self.current_epoch]:
# Data not loaded from disk.
self._split_current_epoch()
rollouts_by_split = self._rollouts_by_epoch_and_split[self.current_epoch]
splits_and_paths = self.splits_and_paths(data_dir)
for (split, paths) in splits_and_paths:
rollouts = rollouts_by_split[split]
num_frames = self._calc_num_frames(rollouts)
shard_size = num_frames // len(paths)
frame_gen = self._generate_frames(rollouts)
for (path_index, path) in enumerate(paths):
limit = shard_size
# Put the remainder in the last shard to preserve the ordering.
if path_index == len(paths) - 1:
limit = None
generator_utils.generate_files(
itertools.islice(frame_gen, limit), [path],
cycle_every_n=float("inf")
)
示例7: generate_data
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
filepath_fns = {
problem.DatasetSplit.TRAIN: self.training_filepaths,
problem.DatasetSplit.EVAL: self.dev_filepaths,
problem.DatasetSplit.TEST: self.test_filepaths,
}
split_paths = [(split["split"], filepath_fns[split["split"]](
data_dir, split["shards"], shuffled=self.already_shuffled))
for split in self.dataset_splits]
all_paths = []
for _, paths in split_paths:
all_paths.extend(paths)
if self.is_generate_per_split:
for split, paths in split_paths:
generator_utils.generate_files(
self.generate_encoded_samples(data_dir, tmp_dir, split), paths)
else:
generator_utils.generate_files(
self.generate_encoded_samples(
data_dir, tmp_dir, problem.DatasetSplit.TRAIN), all_paths)
generator_utils.shuffle_dataset(all_paths, extra_fn=self._pack_fn())
示例8: generate_data
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
# task_id should be in [0, self.num_output_shards)
assert (0 <= task_id) and (task_id < self.num_output_shards)
# A task_id is only supposed to write only one output shard, it can operate
# over multiple *input* shards.
input_files = self._task_id_to_input_files(task_id)
output_file = self._task_id_to_output_file(data_dir, task_id)
# Which output split is this task writing to?
split, _, _ = self._task_id_to_output_split(task_id)
# Actually generate examples.
generator_utils.generate_files(
self._maybe_pack_examples(
self.generate_encoded_samples(
data_dir, tmp_dir, split, input_files)),
[output_file])
# Shuffle the output.
generator_utils.shuffle_dataset([output_file])
示例9: generate_data
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
filepath_fns = {
problem.DatasetSplit.TRAIN: self.training_filepaths,
problem.DatasetSplit.EVAL: self.dev_filepaths,
problem.DatasetSplit.TEST: self.test_filepaths,
}
split_paths = [(split['split'], filepath_fns[split['split']](
data_dir, split['shards'], shuffled=False))
for split in self.dataset_splits]
all_paths = []
for _, paths in split_paths:
all_paths.extend(paths)
if self.is_generate_per_split:
for split, paths in split_paths:
generator_utils.generate_files(
self.generate_encoded_samples(data_dir, tmp_dir, split), paths)
else:
generator_utils.generate_files(
self.generate_encoded_samples(
data_dir, tmp_dir, problem.DatasetSplit.TRAIN), all_paths)
generator_utils.shuffle_dataset(all_paths)
示例10: generate_data
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
train_paths = self.training_filepaths(data_dir, 1, shuffled=True)
dev_paths = self.dev_filepaths(data_dir, 1, shuffled=True)
generator_utils.generate_files(
self.generator(data_dir, tmp_dir, True), train_paths)
generator_utils.generate_files(
self.generator(data_dir, tmp_dir, False), dev_paths)
示例11: generate_data
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
del tmp_dir, task_id
identity_problem = algorithmic.AlgorithmicIdentityBinary40()
generator_utils.generate_files(
identity_problem.generator(self.num_symbols, 40, 100000),
self.training_filepaths(data_dir, 1, shuffled=True), 100)
generator_utils.generate_files(
identity_problem.generator(self.num_symbols, 400, 10000),
self.dev_filepaths(data_dir, 1, shuffled=True), 100)
示例12: generate_data
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
"""The function generating the data."""
filepath_fns = {
problem.DatasetSplit.TRAIN: self.training_filepaths,
problem.DatasetSplit.EVAL: self.dev_filepaths,
problem.DatasetSplit.TEST: self.test_filepaths,
}
# We set shuffled=True as we don't want to shuffle on disk later.
split_paths = [(split["split"], filepath_fns[split["split"]](
data_dir, split["shards"], shuffled=True))
for split in self.dataset_splits]
all_paths = []
for _, paths in split_paths:
all_paths.extend(paths)
if self.is_generate_per_split:
for split, paths in split_paths:
generator_utils.generate_files(
self.generate_encoded_samples_debug(
data_dir, tmp_dir, split), paths,
cycle_every_n=self.total_number_of_frames // len(paths))
else:
generator_utils.generate_files(
self.generate_encoded_samples_debug(
data_dir, tmp_dir, problem.DatasetSplit.TRAIN),
all_paths,
cycle_every_n=self.total_number_of_frames // len(all_paths))
# TODO(lukaszkaiser): remove this version after everything is ported.
示例13: generate_data_for_problem
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data_for_problem(problem):
"""Generate data for a problem in _SUPPORTED_PROBLEM_GENERATORS."""
training_gen, dev_gen, test_gen = _SUPPORTED_PROBLEM_GENERATORS[problem]
num_train_shards = FLAGS.num_shards or 10
tf.logging.info("Generating training data for %s.", problem)
train_output_files = generator_utils.train_data_filenames(
problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir,
num_train_shards)
generator_utils.generate_files(training_gen(), train_output_files,
FLAGS.max_cases)
num_dev_shards = int(num_train_shards * 0.1)
tf.logging.info("Generating development data for %s.", problem)
dev_output_files = generator_utils.dev_data_filenames(
problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir,
num_dev_shards)
generator_utils.generate_files(dev_gen(), dev_output_files)
num_test_shards = int(num_train_shards * 0.1)
test_output_files = []
test_gen_data = test_gen()
if test_gen_data is not None:
tf.logging.info("Generating test data for %s.", problem)
test_output_files = generator_utils.test_data_filenames(
problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir,
num_test_shards)
generator_utils.generate_files(test_gen_data, test_output_files)
all_output_files = train_output_files + dev_output_files + test_output_files
generator_utils.shuffle_dataset(all_output_files)