當前位置: 首頁>>代碼示例>>Python>>正文


Python generator_utils.shuffle_dataset方法代碼示例

本文整理匯總了Python中tensor2tensor.data_generators.generator_utils.shuffle_dataset方法的典型用法代碼示例。如果您正苦於以下問題:Python generator_utils.shuffle_dataset方法的具體用法?Python generator_utils.shuffle_dataset怎麽用?Python generator_utils.shuffle_dataset使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在tensor2tensor.data_generators.generator_utils的用法示例。


在下文中一共展示了generator_utils.shuffle_dataset方法的10個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: generate_data_for_problem

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import shuffle_dataset [as 別名]
def generate_data_for_problem(problem):
  """Generate data for a problem in _SUPPORTED_PROBLEM_GENERATORS."""
  training_gen, dev_gen = _SUPPORTED_PROBLEM_GENERATORS[problem]

  num_shards = FLAGS.num_shards or 10
  tf.logging.info("Generating training data for %s.", problem)
  train_output_files = generator_utils.train_data_filenames(
      problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, num_shards)
  generator_utils.generate_files(training_gen(), train_output_files,
                                 FLAGS.max_cases)
  tf.logging.info("Generating development data for %s.", problem)
  dev_output_files = generator_utils.dev_data_filenames(
      problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, 1)
  generator_utils.generate_files(dev_gen(), dev_output_files)
  all_output_files = train_output_files + dev_output_files
  generator_utils.shuffle_dataset(all_output_files) 
開發者ID:akzaidi,項目名稱:fine-lm,代碼行數:18,代碼來源:t2t_datagen.py

示例2: generate_data

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import shuffle_dataset [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
    train_paths = self.training_filepaths(
        data_dir, self.num_shards, shuffled=False)
    dev_paths = self.dev_filepaths(
        data_dir, self.num_dev_shards, shuffled=False)
    test_paths = self.test_filepaths(
        data_dir, self.num_test_shards, shuffled=True)

    generator_utils.generate_files(
        self.generator(data_dir, tmp_dir, self.TEST_DATASETS), test_paths)

    if self.use_train_shards_for_dev:
      all_paths = train_paths + dev_paths
      generator_utils.generate_files(
          self.generator(data_dir, tmp_dir, self.TRAIN_DATASETS), all_paths)
      generator_utils.shuffle_dataset(all_paths)
    else:
      generator_utils.generate_dataset_and_shuffle(
          self.generator(data_dir, tmp_dir, self.TRAIN_DATASETS), train_paths,
          self.generator(data_dir, tmp_dir, self.DEV_DATASETS), dev_paths) 
開發者ID:akzaidi,項目名稱:fine-lm,代碼行數:22,代碼來源:common_voice.py

示例3: generate_data

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import shuffle_dataset [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
    filepath_fns = {
        problem.DatasetSplit.TRAIN: self.training_filepaths,
        problem.DatasetSplit.EVAL: self.dev_filepaths,
        problem.DatasetSplit.TEST: self.test_filepaths,
    }

    split_paths = [(split["split"], filepath_fns[split["split"]](
        data_dir, split["shards"], shuffled=False))
                   for split in self.dataset_splits]

    all_paths = []
    for _, paths in split_paths:
      all_paths.extend(paths)

    if self.is_generate_per_split:
      for split, paths in split_paths:
        generator_utils.generate_files(
            self.generate_samples(data_dir, tmp_dir, split), paths)
    else:
      generator_utils.generate_files(
          self.generate_samples(data_dir, tmp_dir, problem.DatasetSplit.TRAIN),
          all_paths)

    generator_utils.shuffle_dataset(all_paths) 
開發者ID:akzaidi,項目名稱:fine-lm,代碼行數:27,代碼來源:timeseries.py

示例4: generate_data

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import shuffle_dataset [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):

    filepath_fns = {
        problem.DatasetSplit.TRAIN: self.training_filepaths,
        problem.DatasetSplit.EVAL: self.dev_filepaths,
        problem.DatasetSplit.TEST: self.test_filepaths,
    }

    split_paths = [(split["split"], filepath_fns[split["split"]](
        data_dir, split["shards"], shuffled=self.already_shuffled))
                   for split in self.dataset_splits]
    all_paths = []
    for _, paths in split_paths:
      all_paths.extend(paths)

    if self.is_generate_per_split:
      for split, paths in split_paths:
        generator_utils.generate_files(
            self.generate_encoded_samples(data_dir, tmp_dir, split), paths)
    else:
      generator_utils.generate_files(
          self.generate_encoded_samples(
              data_dir, tmp_dir, problem.DatasetSplit.TRAIN), all_paths)

    generator_utils.shuffle_dataset(all_paths, extra_fn=self._pack_fn()) 
開發者ID:tensorflow,項目名稱:tensor2tensor,代碼行數:27,代碼來源:text_problems.py

示例5: generate_data

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import shuffle_dataset [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
    """Generates training/dev data.

    Args:
      data_dir: a string
      tmp_dir: a string
      task_id: an optional integer
    Returns:
      shard or shards for which data was generated.
    """
    tf.logging.info("generate_data task_id=%s" % task_id)
    encoder = self.get_or_create_vocab(data_dir, tmp_dir)
    assert task_id >= 0 and task_id < self.num_generate_tasks
    if task_id < self.num_train_shards:
      out_file = self.training_filepaths(
          data_dir, self.num_train_shards, shuffled=False)[task_id]
    else:
      out_file = self.dev_filepaths(
          data_dir, self.num_dev_shards,
          shuffled=False)[task_id - self.num_train_shards]
    generator_utils.generate_files(
        self.example_generator(encoder, tmp_dir, task_id), [out_file])
    generator_utils.shuffle_dataset([out_file]) 
開發者ID:yyht,項目名稱:BERT,代碼行數:25,代碼來源:text_problems.py

示例6: generate_data

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import shuffle_dataset [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
    filepath_fns = {
        problem.DatasetSplit.TRAIN: self.training_filepaths,
        problem.DatasetSplit.EVAL: self.dev_filepaths,
        problem.DatasetSplit.TEST: self.test_filepaths,
    }

    split_paths = [(split['split'], filepath_fns[split['split']](
        data_dir, split['shards'], shuffled=False))
                   for split in self.dataset_splits]
    all_paths = []
    for _, paths in split_paths:
      all_paths.extend(paths)

    if self.is_generate_per_split:
      for split, paths in split_paths:
        generator_utils.generate_files(
            self.generate_encoded_samples(data_dir, tmp_dir, split), paths)
    else:
      generator_utils.generate_files(
          self.generate_encoded_samples(
              data_dir, tmp_dir, problem.DatasetSplit.TRAIN), all_paths)

    generator_utils.shuffle_dataset(all_paths) 
開發者ID:magenta,項目名稱:magenta,代碼行數:26,代碼來源:glyphazzn.py

示例7: generate_data

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import shuffle_dataset [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):

    filepath_fns = {
        problem.DatasetSplit.TRAIN: self.training_filepaths,
        problem.DatasetSplit.EVAL: self.dev_filepaths,
        problem.DatasetSplit.TEST: self.test_filepaths,
    }

    split_paths = [(split["split"], filepath_fns[split["split"]](
        data_dir, split["shards"], shuffled=False))
                   for split in self.dataset_splits]
    all_paths = []
    for _, paths in split_paths:
      all_paths.extend(paths)

    if self.is_generate_per_split:
      for split, paths in split_paths:
        generator_utils.generate_files(
            self._maybe_pack_examples(
                self.generate_encoded_samples(data_dir, tmp_dir, split)), paths)
    else:
      generator_utils.generate_files(
          self._maybe_pack_examples(
              self.generate_encoded_samples(
                  data_dir, tmp_dir, problem.DatasetSplit.TRAIN)), all_paths)

    generator_utils.shuffle_dataset(all_paths) 
開發者ID:akzaidi,項目名稱:fine-lm,代碼行數:29,代碼來源:text_problems.py

示例8: generate_data_for_problem

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import shuffle_dataset [as 別名]
def generate_data_for_problem(problem):
  """Generate data for a problem in _SUPPORTED_PROBLEM_GENERATORS."""
  training_gen, dev_gen, test_gen = _SUPPORTED_PROBLEM_GENERATORS[problem]

  num_train_shards = FLAGS.num_shards or 10
  tf.logging.info("Generating training data for %s.", problem)
  train_output_files = generator_utils.train_data_filenames(
      problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir,
      num_train_shards)
  generator_utils.generate_files(training_gen(), train_output_files,
                                 FLAGS.max_cases)
  num_dev_shards = int(num_train_shards * 0.1)
  tf.logging.info("Generating development data for %s.", problem)
  dev_output_files = generator_utils.dev_data_filenames(
      problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir,
      num_dev_shards)
  generator_utils.generate_files(dev_gen(), dev_output_files)
  num_test_shards = int(num_train_shards * 0.1)
  test_output_files = []
  test_gen_data = test_gen()
  if test_gen_data is not None:
    tf.logging.info("Generating test data for %s.", problem)
    test_output_files = generator_utils.test_data_filenames(
        problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir,
        num_test_shards)
    generator_utils.generate_files(test_gen_data, test_output_files)
  all_output_files = train_output_files + dev_output_files + test_output_files
  generator_utils.shuffle_dataset(all_output_files) 
開發者ID:tensorflow,項目名稱:tensor2tensor,代碼行數:30,代碼來源:t2t_datagen.py

示例9: generate_data

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import shuffle_dataset [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
    self.data_dir = data_dir
    # Determine whether we are in training or validation mode.
    self.mode = {problem.DatasetSplit.TRAIN: 'train',
                 problem.DatasetSplit.EVAL: 'dev',
                 problem.DatasetSplit.TEST: 'test'}
    filepath_fns = {problem.DatasetSplit.TRAIN: self.training_filepaths,
                    problem.DatasetSplit.EVAL: self.dev_filepaths,
                    problem.DatasetSplit.TEST: self.test_filepaths}

    split_paths = [(split['split'], filepath_fns[split['split']](
        data_dir, split['shards'], shuffled=self.already_shuffled))
                   for split in self.dataset_splits]
    all_paths = []
    for _, paths in split_paths:
      all_paths.extend(paths)

    if self.is_generate_per_split:
      for split, paths in split_paths:
        # Create the source and target txt files from the raw data.
        self.preprocess_data(self.mode[split])
        generator_utils.generate_files(
            self.generate_encoded_samples(data_dir, tmp_dir, split), paths)
    else:
      self.preprocess_data(self.mode[problem.DatasetSplit.TRAIN])
      generator_utils.generate_files(
          self.generate_encoded_samples(
              data_dir, tmp_dir, problem.DatasetSplit.TRAIN), all_paths)

    generator_utils.shuffle_dataset(all_paths, extra_fn=self._pack_fn()) 
開發者ID:tensorflow,項目名稱:tensor2tensor,代碼行數:32,代碼來源:dialog_abstract.py

示例10: generate_data

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import shuffle_dataset [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
    train_gen = self.generator(tmp_dir, 162770)
    train_paths = self.training_filepaths(
        data_dir, self.train_shards, shuffled=False)
    generator_utils.generate_files(train_gen, train_paths)

    dev_gen = self.generator(tmp_dir, 19867, 162770)
    dev_paths = self.dev_filepaths(data_dir, self.dev_shards, shuffled=False)
    generator_utils.generate_files(dev_gen, dev_paths)

    test_gen = self.generator(tmp_dir, 19962, 162770+19867)
    test_paths = self.test_filepaths(data_dir, self.test_shards, shuffled=False)
    generator_utils.generate_files(test_gen, test_paths)

    generator_utils.shuffle_dataset(train_paths + dev_paths + test_paths) 
開發者ID:tensorflow,項目名稱:tensor2tensor,代碼行數:17,代碼來源:celeba.py


注:本文中的tensor2tensor.data_generators.generator_utils.shuffle_dataset方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。