當前位置: 首頁>>代碼示例>>Python>>正文


Python generator_utils.generate_files方法代碼示例

本文整理匯總了Python中tensor2tensor.data_generators.generator_utils.generate_files方法的典型用法代碼示例。如果您正苦於以下問題:Python generator_utils.generate_files方法的具體用法?Python generator_utils.generate_files怎麽用?Python generator_utils.generate_files使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在tensor2tensor.data_generators.generator_utils的用法示例。


在下文中一共展示了generator_utils.generate_files方法的13個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: generate_data_for_problem

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data_for_problem(problem):
  """Generate data for a problem in _SUPPORTED_PROBLEM_GENERATORS."""
  training_gen, dev_gen = _SUPPORTED_PROBLEM_GENERATORS[problem]

  num_shards = FLAGS.num_shards or 10
  tf.logging.info("Generating training data for %s.", problem)
  train_output_files = generator_utils.train_data_filenames(
      problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, num_shards)
  generator_utils.generate_files(training_gen(), train_output_files,
                                 FLAGS.max_cases)
  tf.logging.info("Generating development data for %s.", problem)
  dev_output_files = generator_utils.dev_data_filenames(
      problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, 1)
  generator_utils.generate_files(dev_gen(), dev_output_files)
  all_output_files = train_output_files + dev_output_files
  generator_utils.shuffle_dataset(all_output_files) 
開發者ID:akzaidi,項目名稱:fine-lm,代碼行數:18,代碼來源:t2t_datagen.py

示例2: generate_data

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
    train_paths = self.training_filepaths(
        data_dir, self.num_shards, shuffled=False)
    dev_paths = self.dev_filepaths(
        data_dir, self.num_dev_shards, shuffled=False)
    test_paths = self.test_filepaths(
        data_dir, self.num_test_shards, shuffled=True)

    generator_utils.generate_files(
        self.generator(data_dir, tmp_dir, self.TEST_DATASETS), test_paths)

    if self.use_train_shards_for_dev:
      all_paths = train_paths + dev_paths
      generator_utils.generate_files(
          self.generator(data_dir, tmp_dir, self.TRAIN_DATASETS), all_paths)
      generator_utils.shuffle_dataset(all_paths)
    else:
      generator_utils.generate_dataset_and_shuffle(
          self.generator(data_dir, tmp_dir, self.TRAIN_DATASETS), train_paths,
          self.generator(data_dir, tmp_dir, self.DEV_DATASETS), dev_paths) 
開發者ID:akzaidi,項目名稱:fine-lm,代碼行數:22,代碼來源:common_voice.py

示例3: generate_data

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
    filepath_fns = {
        problem.DatasetSplit.TRAIN: self.training_filepaths,
        problem.DatasetSplit.EVAL: self.dev_filepaths,
        problem.DatasetSplit.TEST: self.test_filepaths,
    }

    split_paths = [(split["split"], filepath_fns[split["split"]](
        data_dir, split["shards"], shuffled=False))
                   for split in self.dataset_splits]

    all_paths = []
    for _, paths in split_paths:
      all_paths.extend(paths)

    if self.is_generate_per_split:
      for split, paths in split_paths:
        generator_utils.generate_files(
            self.generate_samples(data_dir, tmp_dir, split), paths)
    else:
      generator_utils.generate_files(
          self.generate_samples(data_dir, tmp_dir, problem.DatasetSplit.TRAIN),
          all_paths)

    generator_utils.shuffle_dataset(all_paths) 
開發者ID:akzaidi,項目名稱:fine-lm,代碼行數:27,代碼來源:timeseries.py

示例4: generate_data

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
    """Generates training/dev data.

    Args:
      data_dir: a string
      tmp_dir: a string
      task_id: an optional integer
    Returns:
      shard or shards for which data was generated.
    """
    tf.logging.info("generate_data task_id=%s" % task_id)
    encoder = self.get_or_create_vocab(data_dir, tmp_dir)
    assert task_id >= 0 and task_id < self.num_generate_tasks
    if task_id < self.num_train_shards:
      out_file = self.training_filepaths(
          data_dir, self.num_train_shards, shuffled=False)[task_id]
    else:
      out_file = self.dev_filepaths(
          data_dir, self.num_dev_shards,
          shuffled=False)[task_id - self.num_train_shards]
    generator_utils.generate_files(
        self.example_generator(encoder, tmp_dir, task_id), [out_file])
    generator_utils.shuffle_dataset([out_file]) 
開發者ID:akzaidi,項目名稱:fine-lm,代碼行數:25,代碼來源:text_problems.py

示例5: testGenerateFiles

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def testGenerateFiles(self):
    tmp_dir = self.get_temp_dir()
    (_, tmp_file_path) = tempfile.mkstemp(dir=tmp_dir)
    tmp_file_name = os.path.basename(tmp_file_path)

    # Generate a trivial file and assert the file exists.
    def test_generator():
      yield {"inputs": [1], "target": [1]}

    filenames = generator_utils.train_data_filenames(tmp_file_name, tmp_dir, 1)
    generator_utils.generate_files(test_generator(), filenames)
    self.assertTrue(tf.gfile.Exists(tmp_file_path + "-train-00000-of-00001"))

    # Clean up.
    os.remove(tmp_file_path + "-train-00000-of-00001")
    os.remove(tmp_file_path) 
開發者ID:tensorflow,項目名稱:tensor2tensor,代碼行數:18,代碼來源:generator_utils_test.py

示例6: generate_data

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data(self, data_dir, tmp_dir=None, task_id=-1):
    """Saves the current epoch rollouts to disk, split into train/dev sets."""
    if not self._rollouts_by_epoch_and_split[self.current_epoch]:
      # Data not loaded from disk.
      self._split_current_epoch()

    rollouts_by_split = self._rollouts_by_epoch_and_split[self.current_epoch]
    splits_and_paths = self.splits_and_paths(data_dir)

    for (split, paths) in splits_and_paths:
      rollouts = rollouts_by_split[split]
      num_frames = self._calc_num_frames(rollouts)
      shard_size = num_frames // len(paths)

      frame_gen = self._generate_frames(rollouts)
      for (path_index, path) in enumerate(paths):
        limit = shard_size
        # Put the remainder in the last shard to preserve the ordering.
        if path_index == len(paths) - 1:
          limit = None
        generator_utils.generate_files(
            itertools.islice(frame_gen, limit), [path],
            cycle_every_n=float("inf")
        ) 
開發者ID:tensorflow,項目名稱:tensor2tensor,代碼行數:26,代碼來源:gym_env.py

示例7: generate_data

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):

    filepath_fns = {
        problem.DatasetSplit.TRAIN: self.training_filepaths,
        problem.DatasetSplit.EVAL: self.dev_filepaths,
        problem.DatasetSplit.TEST: self.test_filepaths,
    }

    split_paths = [(split["split"], filepath_fns[split["split"]](
        data_dir, split["shards"], shuffled=self.already_shuffled))
                   for split in self.dataset_splits]
    all_paths = []
    for _, paths in split_paths:
      all_paths.extend(paths)

    if self.is_generate_per_split:
      for split, paths in split_paths:
        generator_utils.generate_files(
            self.generate_encoded_samples(data_dir, tmp_dir, split), paths)
    else:
      generator_utils.generate_files(
          self.generate_encoded_samples(
              data_dir, tmp_dir, problem.DatasetSplit.TRAIN), all_paths)

    generator_utils.shuffle_dataset(all_paths, extra_fn=self._pack_fn()) 
開發者ID:yyht,項目名稱:BERT,代碼行數:27,代碼來源:text_problems.py

示例8: generate_data

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
    # task_id should be in [0, self.num_output_shards)
    assert (0 <= task_id) and (task_id < self.num_output_shards)

    # A task_id is only supposed to write only one output shard, it can operate
    # over multiple *input* shards.
    input_files = self._task_id_to_input_files(task_id)
    output_file = self._task_id_to_output_file(data_dir, task_id)

    # Which output split is this task writing to?
    split, _, _ = self._task_id_to_output_split(task_id)

    # Actually generate examples.
    generator_utils.generate_files(
        self._maybe_pack_examples(
            self.generate_encoded_samples(
                data_dir, tmp_dir, split, input_files)),
        [output_file])

    # Shuffle the output.
    generator_utils.shuffle_dataset([output_file]) 
開發者ID:mlperf,項目名稱:training_results_v0.5,代碼行數:23,代碼來源:text_problems.py

示例9: generate_data

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
    filepath_fns = {
        problem.DatasetSplit.TRAIN: self.training_filepaths,
        problem.DatasetSplit.EVAL: self.dev_filepaths,
        problem.DatasetSplit.TEST: self.test_filepaths,
    }

    split_paths = [(split['split'], filepath_fns[split['split']](
        data_dir, split['shards'], shuffled=False))
                   for split in self.dataset_splits]
    all_paths = []
    for _, paths in split_paths:
      all_paths.extend(paths)

    if self.is_generate_per_split:
      for split, paths in split_paths:
        generator_utils.generate_files(
            self.generate_encoded_samples(data_dir, tmp_dir, split), paths)
    else:
      generator_utils.generate_files(
          self.generate_encoded_samples(
              data_dir, tmp_dir, problem.DatasetSplit.TRAIN), all_paths)

    generator_utils.shuffle_dataset(all_paths) 
開發者ID:magenta,項目名稱:magenta,代碼行數:26,代碼來源:glyphazzn.py

示例10: generate_data

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
    train_paths = self.training_filepaths(data_dir, 1, shuffled=True)
    dev_paths = self.dev_filepaths(data_dir, 1, shuffled=True)
    generator_utils.generate_files(
        self.generator(data_dir, tmp_dir, True), train_paths)
    generator_utils.generate_files(
        self.generator(data_dir, tmp_dir, False), dev_paths) 
開發者ID:akzaidi,項目名稱:fine-lm,代碼行數:9,代碼來源:data_reader_test.py

示例11: generate_data

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
    del tmp_dir, task_id
    identity_problem = algorithmic.AlgorithmicIdentityBinary40()
    generator_utils.generate_files(
        identity_problem.generator(self.num_symbols, 40, 100000),
        self.training_filepaths(data_dir, 1, shuffled=True), 100)
    generator_utils.generate_files(
        identity_problem.generator(self.num_symbols, 400, 10000),
        self.dev_filepaths(data_dir, 1, shuffled=True), 100) 
開發者ID:akzaidi,項目名稱:fine-lm,代碼行數:11,代碼來源:trainer_lib_test.py

示例12: generate_data

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
    """The function generating the data."""
    filepath_fns = {
        problem.DatasetSplit.TRAIN: self.training_filepaths,
        problem.DatasetSplit.EVAL: self.dev_filepaths,
        problem.DatasetSplit.TEST: self.test_filepaths,
    }

    # We set shuffled=True as we don't want to shuffle on disk later.
    split_paths = [(split["split"], filepath_fns[split["split"]](
        data_dir, split["shards"], shuffled=True))
                   for split in self.dataset_splits]
    all_paths = []
    for _, paths in split_paths:
      all_paths.extend(paths)

    if self.is_generate_per_split:
      for split, paths in split_paths:
        generator_utils.generate_files(
            self.generate_encoded_samples_debug(
                data_dir, tmp_dir, split), paths,
            cycle_every_n=self.total_number_of_frames // len(paths))
    else:
      generator_utils.generate_files(
          self.generate_encoded_samples_debug(
              data_dir, tmp_dir, problem.DatasetSplit.TRAIN),
          all_paths,
          cycle_every_n=self.total_number_of_frames // len(all_paths))


# TODO(lukaszkaiser): remove this version after everything is ported. 
開發者ID:akzaidi,項目名稱:fine-lm,代碼行數:33,代碼來源:video_utils.py

示例13: generate_data_for_problem

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import generate_files [as 別名]
def generate_data_for_problem(problem):
  """Generate data for a problem in _SUPPORTED_PROBLEM_GENERATORS."""
  training_gen, dev_gen, test_gen = _SUPPORTED_PROBLEM_GENERATORS[problem]

  num_train_shards = FLAGS.num_shards or 10
  tf.logging.info("Generating training data for %s.", problem)
  train_output_files = generator_utils.train_data_filenames(
      problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir,
      num_train_shards)
  generator_utils.generate_files(training_gen(), train_output_files,
                                 FLAGS.max_cases)
  num_dev_shards = int(num_train_shards * 0.1)
  tf.logging.info("Generating development data for %s.", problem)
  dev_output_files = generator_utils.dev_data_filenames(
      problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir,
      num_dev_shards)
  generator_utils.generate_files(dev_gen(), dev_output_files)
  num_test_shards = int(num_train_shards * 0.1)
  test_output_files = []
  test_gen_data = test_gen()
  if test_gen_data is not None:
    tf.logging.info("Generating test data for %s.", problem)
    test_output_files = generator_utils.test_data_filenames(
        problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir,
        num_test_shards)
    generator_utils.generate_files(test_gen_data, test_output_files)
  all_output_files = train_output_files + dev_output_files + test_output_files
  generator_utils.shuffle_dataset(all_output_files) 
開發者ID:tensorflow,項目名稱:tensor2tensor,代碼行數:30,代碼來源:t2t_datagen.py


注:本文中的tensor2tensor.data_generators.generator_utils.generate_files方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。