本文整理匯總了Python中tensor2tensor.data_generators.generator_utils.shuffle_dataset方法的典型用法代碼示例。如果您正苦於以下問題:Python generator_utils.shuffle_dataset方法的具體用法?Python generator_utils.shuffle_dataset怎麽用?Python generator_utils.shuffle_dataset使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類tensor2tensor.data_generators.generator_utils
的用法示例。
在下文中一共展示了generator_utils.shuffle_dataset方法的10個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: generate_data_for_problem
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import shuffle_dataset [as 別名]
def generate_data_for_problem(problem):
"""Generate data for a problem in _SUPPORTED_PROBLEM_GENERATORS."""
training_gen, dev_gen = _SUPPORTED_PROBLEM_GENERATORS[problem]
num_shards = FLAGS.num_shards or 10
tf.logging.info("Generating training data for %s.", problem)
train_output_files = generator_utils.train_data_filenames(
problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, num_shards)
generator_utils.generate_files(training_gen(), train_output_files,
FLAGS.max_cases)
tf.logging.info("Generating development data for %s.", problem)
dev_output_files = generator_utils.dev_data_filenames(
problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, 1)
generator_utils.generate_files(dev_gen(), dev_output_files)
all_output_files = train_output_files + dev_output_files
generator_utils.shuffle_dataset(all_output_files)
示例2: generate_data
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import shuffle_dataset [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
train_paths = self.training_filepaths(
data_dir, self.num_shards, shuffled=False)
dev_paths = self.dev_filepaths(
data_dir, self.num_dev_shards, shuffled=False)
test_paths = self.test_filepaths(
data_dir, self.num_test_shards, shuffled=True)
generator_utils.generate_files(
self.generator(data_dir, tmp_dir, self.TEST_DATASETS), test_paths)
if self.use_train_shards_for_dev:
all_paths = train_paths + dev_paths
generator_utils.generate_files(
self.generator(data_dir, tmp_dir, self.TRAIN_DATASETS), all_paths)
generator_utils.shuffle_dataset(all_paths)
else:
generator_utils.generate_dataset_and_shuffle(
self.generator(data_dir, tmp_dir, self.TRAIN_DATASETS), train_paths,
self.generator(data_dir, tmp_dir, self.DEV_DATASETS), dev_paths)
示例3: generate_data
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import shuffle_dataset [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
filepath_fns = {
problem.DatasetSplit.TRAIN: self.training_filepaths,
problem.DatasetSplit.EVAL: self.dev_filepaths,
problem.DatasetSplit.TEST: self.test_filepaths,
}
split_paths = [(split["split"], filepath_fns[split["split"]](
data_dir, split["shards"], shuffled=False))
for split in self.dataset_splits]
all_paths = []
for _, paths in split_paths:
all_paths.extend(paths)
if self.is_generate_per_split:
for split, paths in split_paths:
generator_utils.generate_files(
self.generate_samples(data_dir, tmp_dir, split), paths)
else:
generator_utils.generate_files(
self.generate_samples(data_dir, tmp_dir, problem.DatasetSplit.TRAIN),
all_paths)
generator_utils.shuffle_dataset(all_paths)
示例4: generate_data
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import shuffle_dataset [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
filepath_fns = {
problem.DatasetSplit.TRAIN: self.training_filepaths,
problem.DatasetSplit.EVAL: self.dev_filepaths,
problem.DatasetSplit.TEST: self.test_filepaths,
}
split_paths = [(split["split"], filepath_fns[split["split"]](
data_dir, split["shards"], shuffled=self.already_shuffled))
for split in self.dataset_splits]
all_paths = []
for _, paths in split_paths:
all_paths.extend(paths)
if self.is_generate_per_split:
for split, paths in split_paths:
generator_utils.generate_files(
self.generate_encoded_samples(data_dir, tmp_dir, split), paths)
else:
generator_utils.generate_files(
self.generate_encoded_samples(
data_dir, tmp_dir, problem.DatasetSplit.TRAIN), all_paths)
generator_utils.shuffle_dataset(all_paths, extra_fn=self._pack_fn())
示例5: generate_data
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import shuffle_dataset [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
"""Generates training/dev data.
Args:
data_dir: a string
tmp_dir: a string
task_id: an optional integer
Returns:
shard or shards for which data was generated.
"""
tf.logging.info("generate_data task_id=%s" % task_id)
encoder = self.get_or_create_vocab(data_dir, tmp_dir)
assert task_id >= 0 and task_id < self.num_generate_tasks
if task_id < self.num_train_shards:
out_file = self.training_filepaths(
data_dir, self.num_train_shards, shuffled=False)[task_id]
else:
out_file = self.dev_filepaths(
data_dir, self.num_dev_shards,
shuffled=False)[task_id - self.num_train_shards]
generator_utils.generate_files(
self.example_generator(encoder, tmp_dir, task_id), [out_file])
generator_utils.shuffle_dataset([out_file])
示例6: generate_data
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import shuffle_dataset [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
filepath_fns = {
problem.DatasetSplit.TRAIN: self.training_filepaths,
problem.DatasetSplit.EVAL: self.dev_filepaths,
problem.DatasetSplit.TEST: self.test_filepaths,
}
split_paths = [(split['split'], filepath_fns[split['split']](
data_dir, split['shards'], shuffled=False))
for split in self.dataset_splits]
all_paths = []
for _, paths in split_paths:
all_paths.extend(paths)
if self.is_generate_per_split:
for split, paths in split_paths:
generator_utils.generate_files(
self.generate_encoded_samples(data_dir, tmp_dir, split), paths)
else:
generator_utils.generate_files(
self.generate_encoded_samples(
data_dir, tmp_dir, problem.DatasetSplit.TRAIN), all_paths)
generator_utils.shuffle_dataset(all_paths)
示例7: generate_data
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import shuffle_dataset [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
filepath_fns = {
problem.DatasetSplit.TRAIN: self.training_filepaths,
problem.DatasetSplit.EVAL: self.dev_filepaths,
problem.DatasetSplit.TEST: self.test_filepaths,
}
split_paths = [(split["split"], filepath_fns[split["split"]](
data_dir, split["shards"], shuffled=False))
for split in self.dataset_splits]
all_paths = []
for _, paths in split_paths:
all_paths.extend(paths)
if self.is_generate_per_split:
for split, paths in split_paths:
generator_utils.generate_files(
self._maybe_pack_examples(
self.generate_encoded_samples(data_dir, tmp_dir, split)), paths)
else:
generator_utils.generate_files(
self._maybe_pack_examples(
self.generate_encoded_samples(
data_dir, tmp_dir, problem.DatasetSplit.TRAIN)), all_paths)
generator_utils.shuffle_dataset(all_paths)
示例8: generate_data_for_problem
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import shuffle_dataset [as 別名]
def generate_data_for_problem(problem):
"""Generate data for a problem in _SUPPORTED_PROBLEM_GENERATORS."""
training_gen, dev_gen, test_gen = _SUPPORTED_PROBLEM_GENERATORS[problem]
num_train_shards = FLAGS.num_shards or 10
tf.logging.info("Generating training data for %s.", problem)
train_output_files = generator_utils.train_data_filenames(
problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir,
num_train_shards)
generator_utils.generate_files(training_gen(), train_output_files,
FLAGS.max_cases)
num_dev_shards = int(num_train_shards * 0.1)
tf.logging.info("Generating development data for %s.", problem)
dev_output_files = generator_utils.dev_data_filenames(
problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir,
num_dev_shards)
generator_utils.generate_files(dev_gen(), dev_output_files)
num_test_shards = int(num_train_shards * 0.1)
test_output_files = []
test_gen_data = test_gen()
if test_gen_data is not None:
tf.logging.info("Generating test data for %s.", problem)
test_output_files = generator_utils.test_data_filenames(
problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir,
num_test_shards)
generator_utils.generate_files(test_gen_data, test_output_files)
all_output_files = train_output_files + dev_output_files + test_output_files
generator_utils.shuffle_dataset(all_output_files)
示例9: generate_data
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import shuffle_dataset [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
self.data_dir = data_dir
# Determine whether we are in training or validation mode.
self.mode = {problem.DatasetSplit.TRAIN: 'train',
problem.DatasetSplit.EVAL: 'dev',
problem.DatasetSplit.TEST: 'test'}
filepath_fns = {problem.DatasetSplit.TRAIN: self.training_filepaths,
problem.DatasetSplit.EVAL: self.dev_filepaths,
problem.DatasetSplit.TEST: self.test_filepaths}
split_paths = [(split['split'], filepath_fns[split['split']](
data_dir, split['shards'], shuffled=self.already_shuffled))
for split in self.dataset_splits]
all_paths = []
for _, paths in split_paths:
all_paths.extend(paths)
if self.is_generate_per_split:
for split, paths in split_paths:
# Create the source and target txt files from the raw data.
self.preprocess_data(self.mode[split])
generator_utils.generate_files(
self.generate_encoded_samples(data_dir, tmp_dir, split), paths)
else:
self.preprocess_data(self.mode[problem.DatasetSplit.TRAIN])
generator_utils.generate_files(
self.generate_encoded_samples(
data_dir, tmp_dir, problem.DatasetSplit.TRAIN), all_paths)
generator_utils.shuffle_dataset(all_paths, extra_fn=self._pack_fn())
示例10: generate_data
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import shuffle_dataset [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
train_gen = self.generator(tmp_dir, 162770)
train_paths = self.training_filepaths(
data_dir, self.train_shards, shuffled=False)
generator_utils.generate_files(train_gen, train_paths)
dev_gen = self.generator(tmp_dir, 19867, 162770)
dev_paths = self.dev_filepaths(data_dir, self.dev_shards, shuffled=False)
generator_utils.generate_files(dev_gen, dev_paths)
test_gen = self.generator(tmp_dir, 19962, 162770+19867)
test_paths = self.test_filepaths(data_dir, self.test_shards, shuffled=False)
generator_utils.generate_files(test_gen, test_paths)
generator_utils.shuffle_dataset(train_paths + dev_paths + test_paths)