Python generator_utils.maybe_download方法代碼示例

本文整理匯總了Python中tensor2tensor.data_generators.generator_utils.maybe_download方法的典型用法代碼示例。如果您正苦於以下問題：Python generator_utils.maybe_download方法的具體用法？Python generator_utils.maybe_download怎麽用？Python generator_utils.maybe_download使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類tensor2tensor.data_generators.generator_utils的用法示例。

在下文中一共展示了generator_utils.maybe_download方法的15個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: generate_samples

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def generate_samples(self, data_dir, tmp_dir, dataset_split):
    dataset = self.dataset_url(dataset_split)

    tag = "train" if dataset_split == problem.DatasetSplit.TRAIN else "dev"

    url = dataset[0][0]
    compressed_filename = os.path.basename(url)
    compressed_filepath = os.path.join(tmp_dir, compressed_filename)
    generator_utils.maybe_download(tmp_dir, compressed_filename, url)

    mode = "r:gz" if compressed_filepath.endswith("gz") else "r"
    with tarfile.open(compressed_filepath, mode) as corpus_tar:
      corpus_tar.extractall(tmp_dir)

    if self.vocab_type == text_problems.VocabType.SUBWORD:
      generator_utils.get_or_generate_vocab(
          data_dir, tmp_dir, self.vocab_filename, self.approx_vocab_size,
          self.vocab_data_files())

    source_file = os.path.join(tmp_dir, tag + ".modern")
    target_file = os.path.join(tmp_dir, tag + ".original")
    return text_problems.text2text_txt_iterator(source_file,
                                                target_file)

開發者ID:akzaidi，項目名稱:fine-lm，代碼行數:25，代碼來源:style_transfer.py

示例2: write_raw_text_to_files

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def write_raw_text_to_files(all_files, urls_path, tmp_dir, is_training):
  """Write text to files."""

  def write_to_file(all_files, urls_path, tmp_dir, filename):
    with io.open(os.path.join(tmp_dir, filename + ".source"), "w") as fstory:
      with io.open(os.path.join(tmp_dir, filename + ".target"),
                   "w") as fsummary:
        for example in example_generator(all_files, urls_path, sum_token=True):
          story, summary = _story_summary_split(example)
          fstory.write(story + "\n")
          fsummary.write(summary + "\n")

  filename = "cnndm.train" if is_training else "cnndm.dev"
  tf.logging.info("Writing %s" % filename)
  write_to_file(all_files, urls_path, tmp_dir, filename)

  if not is_training:
    test_urls_path = generator_utils.maybe_download(tmp_dir, "all_test.txt",
                                                    _TEST_URLS)
    filename = "cnndm.test"
    tf.logging.info("Writing %s" % filename)
    write_to_file(all_files, test_urls_path, tmp_dir, filename)

開發者ID:akzaidi，項目名稱:fine-lm，代碼行數:24，代碼來源:cnn_dailymail.py

示例3: generate_data

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
    list_url = ("https://raw.githubusercontent.com/tensorflow/models/master/"
                "street/python/fsns_urls.txt")
    fsns_urls = generator_utils.maybe_download(tmp_dir, "fsns_urls.txt",
                                               list_url)
    fsns_files = [
        f.strip() for f in open(fsns_urls, "r") if f.startswith("http://")
    ]
    for url in fsns_files:
      if "/train/train" in url:
        generator_utils.maybe_download(
            data_dir, "image_fsns-train" + url[-len("-00100-of-00512"):], url)
      elif "/validation/validation" in url:
        generator_utils.maybe_download(
            data_dir, "image_fsns-dev" + url[-len("-00100-of-00512"):], url)
      elif "charset" in url:
        generator_utils.maybe_download(data_dir, "charset_size134.txt", url)

開發者ID:akzaidi，項目名稱:fine-lm，代碼行數:19，代碼來源:fsns.py

示例4: generate_samples

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def generate_samples(self, data_dir, tmp_dir, dataset_split):
    path = generator_utils.maybe_download(
        tmp_dir, os.path.basename(DATA_URL), DATA_URL)

    tar = tarfile.open(path)
    tar.extractall(tmp_dir)
    tar.close()

    if dataset_split == problem.DatasetSplit.TRAIN:
      base_dir = os.path.join(tmp_dir, "softmotion30_44k/train/*")
    else:
      base_dir = os.path.join(tmp_dir, "softmotion30_44k/test/*")

    filenames = tf.gfile.Glob(base_dir)
    for frame_number, frame, state, action in self.parse_frames(filenames):
      yield {
          "frame_number": [frame_number],
          "frame": frame,
          "state": state,
          "action": action,
      }

開發者ID:akzaidi，項目名稱:fine-lm，代碼行數:23，代碼來源:bair_robot_pushing.py

示例5: _original_vocab

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def _original_vocab(tmp_dir):
  """Returns a set containing the original vocabulary.

  This is important for comparing with published results.

  Args:
    tmp_dir: directory containing dataset.

  Returns:
    a set of strings
  """
  vocab_url = ("http://download.tensorflow.org/models/LM_LSTM_CNN/"
               "vocab-2016-09-10.txt")
  vocab_filename = os.path.basename(vocab_url + ".en")
  vocab_filepath = os.path.join(tmp_dir, vocab_filename)
  if not os.path.exists(vocab_filepath):
    generator_utils.maybe_download(tmp_dir, vocab_filename, vocab_url)
  return set([
      text_encoder.native_to_unicode(l.strip())
      for l in tf.gfile.Open(vocab_filepath)
  ])

開發者ID:akzaidi，項目名稱:fine-lm，代碼行數:23，代碼來源:lm1b.py

示例6: generate_samples

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def generate_samples(self, data_dir, tmp_dir, dataset_split):
    """Generate examples."""
    # Download and extract
    compressed_filename = os.path.basename(self.URL)
    download_path = generator_utils.maybe_download(tmp_dir, compressed_filename,
                                                   self.URL)
    imdb_dir = os.path.join(tmp_dir, "aclImdb")
    if not tf.gfile.Exists(imdb_dir):
      with tarfile.open(download_path, "r:gz") as tar:
        tar.extractall(tmp_dir)

    # Generate examples
    train = dataset_split == problem.DatasetSplit.TRAIN
    dataset = "train" if train else "test"
    for doc, label in self.doc_generator(imdb_dir, dataset, include_label=True):
      yield {
          "inputs": doc,
          "label": int(label),
      }

開發者ID:akzaidi，項目名稱:fine-lm，代碼行數:21，代碼來源:imdb.py

示例7: generate_samples

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def generate_samples(self, data_dir, tmp_dir, dataset_split):
    dataset = self.dataset_url(dataset_split)

    url = dataset[0][0]
    compressed_filename = os.path.basename(url)
    compressed_filepath = os.path.join(tmp_dir, compressed_filename)
    generator_utils.maybe_download(tmp_dir, compressed_filename, url)

    mode = "r:gz" if compressed_filepath.endswith("gz") else "r"
    with tarfile.open(compressed_filepath, mode) as corpus_tar:
      corpus_tar.extractall(tmp_dir)

    if self.vocab_type == text_problems.VocabType.SUBWORD:
      generator_utils.get_or_generate_vocab(
          data_dir, tmp_dir, self.vocab_filename, self.approx_vocab_size,
          self.vocab_data_files())

    source_file, target_file = self.source_target_paths(dataset_split, tmp_dir)
    return text_problems.text2text_txt_iterator(source_file,
                                                target_file)

開發者ID:tensorflow，項目名稱:tensor2tensor，代碼行數:22，代碼來源:style_transfer.py

示例8: _maybe_download_corpus

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def _maybe_download_corpus(tmp_dir):
  """Download and unpack the corpus.

  Args:
    tmp_dir: directory containing dataset.

  Returns:
    path to entire corpus as a text file.
  """
  corpus_url = "http://mattmahoney.net/dc/enwik8.zip"
  corpus_filename = os.path.basename(corpus_url)
  compressed_filepath = generator_utils.maybe_download(
      tmp_dir, corpus_filename, corpus_url)

  zip_ref = zipfile.ZipFile(compressed_filepath, "r")
  zip_ref.extractall(tmp_dir)
  zip_ref.close()

  return os.path.join(tmp_dir, "enwik8")

開發者ID:tensorflow，項目名稱:tensor2tensor，代碼行數:21，代碼來源:enwik8.py

示例9: generate_samples

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def generate_samples(self, data_dir, tmp_dir, dataset_split):
    """Generate examples."""
    # Download and extract
    compressed_filename = os.path.basename(self.URL)
    download_path = generator_utils.maybe_download(tmp_dir, compressed_filename,
                                                   self.URL)
    yelp_dir = os.path.join(tmp_dir, "yelp_review_full_csv")
    if not tf.gfile.Exists(yelp_dir):
      with tarfile.open(download_path, "r:gz") as tar:
        tar.extractall(tmp_dir)

    # Generate examples
    train = dataset_split == problem.DatasetSplit.TRAIN
    dataset = "train" if train else "test"
    for doc, label in self.doc_generator(yelp_dir, dataset, include_label=True):
      yield {
          "inputs": doc,
          "label": int(label),
      }

開發者ID:tensorflow，項目名稱:tensor2tensor，代碼行數:21，代碼來源:yelp_full.py

示例10: _maybe_download_corpora

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def _maybe_download_corpora(self, tmp_dir):
    mrpc_dir = os.path.join(tmp_dir, self.DATA_DIR)
    tf.gfile.MakeDirs(mrpc_dir)
    mrpc_train_finalpath = os.path.join(mrpc_dir, "msr_paraphrase_train.txt")
    mrpc_test_finalpath = os.path.join(mrpc_dir, "msr_paraphrase_test.txt")
    mrpc_dev_ids_finalpath = os.path.join(mrpc_dir, "dev_ids.tsv")

    def download_file(tdir, filepath, url):
      if not tf.gfile.Exists(filepath):
        generator_utils.maybe_download(tdir, filepath, url)

    download_file(mrpc_dir, mrpc_train_finalpath, self.MRPC_TRAIN)
    download_file(mrpc_dir, mrpc_test_finalpath, self.MRPC_TEST)
    download_file(mrpc_dir, mrpc_dev_ids_finalpath, self.DEV_IDS)

    return mrpc_dir

開發者ID:tensorflow，項目名稱:tensor2tensor，代碼行數:18，代碼來源:mrpc.py

示例11: generate_samples

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def generate_samples(self, data_dir, tmp_dir, dataset_split):
    """Generate examples."""
    # Download and extract
    compressed_filename = os.path.basename(self.URL)
    download_path = generator_utils.maybe_download(tmp_dir, compressed_filename,
                                                   self.URL)
    yelp_dir = os.path.join(tmp_dir, "yelp_review_polarity_csv")
    if not tf.gfile.Exists(yelp_dir):
      with tarfile.open(download_path, "r:gz") as tar:
        tar.extractall(tmp_dir)

    # Generate examples
    train = dataset_split == problem.DatasetSplit.TRAIN
    dataset = "train" if train else "test"
    for doc, label in self.doc_generator(yelp_dir, dataset, include_label=True):
      yield {
          "inputs": doc,
          "label": int(label),
      }

開發者ID:tensorflow，項目名稱:tensor2tensor，代碼行數:21，代碼來源:yelp_polarity.py

示例12: _maybe_download_corpora

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def _maybe_download_corpora(tmp_dir):
  """Download corpora for multinli.

  Args:
    tmp_dir: a string
  Returns:
    a string
  """
  mnli_filename = "MNLI.zip"
  mnli_finalpath = os.path.join(tmp_dir, "MNLI")
  if not tf.gfile.Exists(mnli_finalpath):
    zip_filepath = generator_utils.maybe_download(
        tmp_dir, mnli_filename, _MNLI_URL)
    zip_ref = zipfile.ZipFile(zip_filepath, "r")
    zip_ref.extractall(tmp_dir)
    zip_ref.close()

  return mnli_finalpath

開發者ID:tensorflow，項目名稱:tensor2tensor，代碼行數:20，代碼來源:multinli.py

示例13: generate_samples

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def generate_samples(self, data_dir, tmp_dir, dataset_split):
    if dataset_split == problem.DatasetSplit.TRAIN:
      urls = self.get_urls(DATA_TRAIN[0], DATA_TRAIN[1])
    else:
      urls = self.get_urls(DATA_TEST_SEEN[0], DATA_TEST_SEEN[1])
      urls += self.get_urls(DATA_TEST_NOVEL[0], DATA_TEST_NOVEL[1])

    for url in urls:
      path = generator_utils.maybe_download(tmp_dir, os.path.basename(url), url)
      for frame_number, frame, state, action in self.parse_frames(path):
        yield {
            "frame_number": [frame_number],
            "frame": frame,
            "state": state,
            "action": action,
        }

開發者ID:tensorflow，項目名稱:tensor2tensor，代碼行數:18，代碼來源:google_robot_pushing.py

示例14: maybe_download_dataset

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def maybe_download_dataset(self, tmp_dir, dataset_split):
    """Downloads the appropriate dataset file and returns its path."""
    # Get the dataset url for the split requested.
    url = self.DATA_URLS.get(dataset_split, None)

    # Sanity check.
    if url is None:
      tf.logging.fatal("Unknown dataset_split passed: {}".format(dataset_split))

    # Download the data, if it doesn't already exist.
    return generator_utils.maybe_download(tmp_dir,
                                          self._extract_filename_from_url(url),
                                          url)

開發者ID:akzaidi，項目名稱:fine-lm，代碼行數:15，代碼來源:program_search.py

示例15: _maybe_download_corpora

# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def _maybe_download_corpora(self, tmp_dir):
    wnli_filename = "WNLI.zip"
    wnli_finalpath = os.path.join(tmp_dir, "WNLI")
    if not tf.gfile.Exists(wnli_finalpath):
      zip_filepath = generator_utils.maybe_download(
          tmp_dir, wnli_filename, self._WNLI_URL)
      zip_ref = zipfile.ZipFile(zip_filepath, "r")
      zip_ref.extractall(tmp_dir)
      zip_ref.close()

    return wnli_finalpath

開發者ID:akzaidi，項目名稱:fine-lm，代碼行數:13，代碼來源:wnli.py

注：本文中的tensor2tensor.data_generators.generator_utils.maybe_download方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。