本文整理匯總了Python中tensor2tensor.data_generators.generator_utils.maybe_download方法的典型用法代碼示例。如果您正苦於以下問題:Python generator_utils.maybe_download方法的具體用法?Python generator_utils.maybe_download怎麽用?Python generator_utils.maybe_download使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類tensor2tensor.data_generators.generator_utils
的用法示例。
在下文中一共展示了generator_utils.maybe_download方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: generate_samples
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def generate_samples(self, data_dir, tmp_dir, dataset_split):
dataset = self.dataset_url(dataset_split)
tag = "train" if dataset_split == problem.DatasetSplit.TRAIN else "dev"
url = dataset[0][0]
compressed_filename = os.path.basename(url)
compressed_filepath = os.path.join(tmp_dir, compressed_filename)
generator_utils.maybe_download(tmp_dir, compressed_filename, url)
mode = "r:gz" if compressed_filepath.endswith("gz") else "r"
with tarfile.open(compressed_filepath, mode) as corpus_tar:
corpus_tar.extractall(tmp_dir)
if self.vocab_type == text_problems.VocabType.SUBWORD:
generator_utils.get_or_generate_vocab(
data_dir, tmp_dir, self.vocab_filename, self.approx_vocab_size,
self.vocab_data_files())
source_file = os.path.join(tmp_dir, tag + ".modern")
target_file = os.path.join(tmp_dir, tag + ".original")
return text_problems.text2text_txt_iterator(source_file,
target_file)
示例2: write_raw_text_to_files
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def write_raw_text_to_files(all_files, urls_path, tmp_dir, is_training):
"""Write text to files."""
def write_to_file(all_files, urls_path, tmp_dir, filename):
with io.open(os.path.join(tmp_dir, filename + ".source"), "w") as fstory:
with io.open(os.path.join(tmp_dir, filename + ".target"),
"w") as fsummary:
for example in example_generator(all_files, urls_path, sum_token=True):
story, summary = _story_summary_split(example)
fstory.write(story + "\n")
fsummary.write(summary + "\n")
filename = "cnndm.train" if is_training else "cnndm.dev"
tf.logging.info("Writing %s" % filename)
write_to_file(all_files, urls_path, tmp_dir, filename)
if not is_training:
test_urls_path = generator_utils.maybe_download(tmp_dir, "all_test.txt",
_TEST_URLS)
filename = "cnndm.test"
tf.logging.info("Writing %s" % filename)
write_to_file(all_files, test_urls_path, tmp_dir, filename)
示例3: generate_data
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def generate_data(self, data_dir, tmp_dir, task_id=-1):
list_url = ("https://raw.githubusercontent.com/tensorflow/models/master/"
"street/python/fsns_urls.txt")
fsns_urls = generator_utils.maybe_download(tmp_dir, "fsns_urls.txt",
list_url)
fsns_files = [
f.strip() for f in open(fsns_urls, "r") if f.startswith("http://")
]
for url in fsns_files:
if "/train/train" in url:
generator_utils.maybe_download(
data_dir, "image_fsns-train" + url[-len("-00100-of-00512"):], url)
elif "/validation/validation" in url:
generator_utils.maybe_download(
data_dir, "image_fsns-dev" + url[-len("-00100-of-00512"):], url)
elif "charset" in url:
generator_utils.maybe_download(data_dir, "charset_size134.txt", url)
示例4: generate_samples
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def generate_samples(self, data_dir, tmp_dir, dataset_split):
path = generator_utils.maybe_download(
tmp_dir, os.path.basename(DATA_URL), DATA_URL)
tar = tarfile.open(path)
tar.extractall(tmp_dir)
tar.close()
if dataset_split == problem.DatasetSplit.TRAIN:
base_dir = os.path.join(tmp_dir, "softmotion30_44k/train/*")
else:
base_dir = os.path.join(tmp_dir, "softmotion30_44k/test/*")
filenames = tf.gfile.Glob(base_dir)
for frame_number, frame, state, action in self.parse_frames(filenames):
yield {
"frame_number": [frame_number],
"frame": frame,
"state": state,
"action": action,
}
示例5: _original_vocab
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def _original_vocab(tmp_dir):
"""Returns a set containing the original vocabulary.
This is important for comparing with published results.
Args:
tmp_dir: directory containing dataset.
Returns:
a set of strings
"""
vocab_url = ("http://download.tensorflow.org/models/LM_LSTM_CNN/"
"vocab-2016-09-10.txt")
vocab_filename = os.path.basename(vocab_url + ".en")
vocab_filepath = os.path.join(tmp_dir, vocab_filename)
if not os.path.exists(vocab_filepath):
generator_utils.maybe_download(tmp_dir, vocab_filename, vocab_url)
return set([
text_encoder.native_to_unicode(l.strip())
for l in tf.gfile.Open(vocab_filepath)
])
示例6: generate_samples
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def generate_samples(self, data_dir, tmp_dir, dataset_split):
"""Generate examples."""
# Download and extract
compressed_filename = os.path.basename(self.URL)
download_path = generator_utils.maybe_download(tmp_dir, compressed_filename,
self.URL)
imdb_dir = os.path.join(tmp_dir, "aclImdb")
if not tf.gfile.Exists(imdb_dir):
with tarfile.open(download_path, "r:gz") as tar:
tar.extractall(tmp_dir)
# Generate examples
train = dataset_split == problem.DatasetSplit.TRAIN
dataset = "train" if train else "test"
for doc, label in self.doc_generator(imdb_dir, dataset, include_label=True):
yield {
"inputs": doc,
"label": int(label),
}
示例7: generate_samples
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def generate_samples(self, data_dir, tmp_dir, dataset_split):
dataset = self.dataset_url(dataset_split)
url = dataset[0][0]
compressed_filename = os.path.basename(url)
compressed_filepath = os.path.join(tmp_dir, compressed_filename)
generator_utils.maybe_download(tmp_dir, compressed_filename, url)
mode = "r:gz" if compressed_filepath.endswith("gz") else "r"
with tarfile.open(compressed_filepath, mode) as corpus_tar:
corpus_tar.extractall(tmp_dir)
if self.vocab_type == text_problems.VocabType.SUBWORD:
generator_utils.get_or_generate_vocab(
data_dir, tmp_dir, self.vocab_filename, self.approx_vocab_size,
self.vocab_data_files())
source_file, target_file = self.source_target_paths(dataset_split, tmp_dir)
return text_problems.text2text_txt_iterator(source_file,
target_file)
示例8: _maybe_download_corpus
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def _maybe_download_corpus(tmp_dir):
"""Download and unpack the corpus.
Args:
tmp_dir: directory containing dataset.
Returns:
path to entire corpus as a text file.
"""
corpus_url = "http://mattmahoney.net/dc/enwik8.zip"
corpus_filename = os.path.basename(corpus_url)
compressed_filepath = generator_utils.maybe_download(
tmp_dir, corpus_filename, corpus_url)
zip_ref = zipfile.ZipFile(compressed_filepath, "r")
zip_ref.extractall(tmp_dir)
zip_ref.close()
return os.path.join(tmp_dir, "enwik8")
示例9: generate_samples
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def generate_samples(self, data_dir, tmp_dir, dataset_split):
"""Generate examples."""
# Download and extract
compressed_filename = os.path.basename(self.URL)
download_path = generator_utils.maybe_download(tmp_dir, compressed_filename,
self.URL)
yelp_dir = os.path.join(tmp_dir, "yelp_review_full_csv")
if not tf.gfile.Exists(yelp_dir):
with tarfile.open(download_path, "r:gz") as tar:
tar.extractall(tmp_dir)
# Generate examples
train = dataset_split == problem.DatasetSplit.TRAIN
dataset = "train" if train else "test"
for doc, label in self.doc_generator(yelp_dir, dataset, include_label=True):
yield {
"inputs": doc,
"label": int(label),
}
示例10: _maybe_download_corpora
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def _maybe_download_corpora(self, tmp_dir):
mrpc_dir = os.path.join(tmp_dir, self.DATA_DIR)
tf.gfile.MakeDirs(mrpc_dir)
mrpc_train_finalpath = os.path.join(mrpc_dir, "msr_paraphrase_train.txt")
mrpc_test_finalpath = os.path.join(mrpc_dir, "msr_paraphrase_test.txt")
mrpc_dev_ids_finalpath = os.path.join(mrpc_dir, "dev_ids.tsv")
def download_file(tdir, filepath, url):
if not tf.gfile.Exists(filepath):
generator_utils.maybe_download(tdir, filepath, url)
download_file(mrpc_dir, mrpc_train_finalpath, self.MRPC_TRAIN)
download_file(mrpc_dir, mrpc_test_finalpath, self.MRPC_TEST)
download_file(mrpc_dir, mrpc_dev_ids_finalpath, self.DEV_IDS)
return mrpc_dir
示例11: generate_samples
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def generate_samples(self, data_dir, tmp_dir, dataset_split):
"""Generate examples."""
# Download and extract
compressed_filename = os.path.basename(self.URL)
download_path = generator_utils.maybe_download(tmp_dir, compressed_filename,
self.URL)
yelp_dir = os.path.join(tmp_dir, "yelp_review_polarity_csv")
if not tf.gfile.Exists(yelp_dir):
with tarfile.open(download_path, "r:gz") as tar:
tar.extractall(tmp_dir)
# Generate examples
train = dataset_split == problem.DatasetSplit.TRAIN
dataset = "train" if train else "test"
for doc, label in self.doc_generator(yelp_dir, dataset, include_label=True):
yield {
"inputs": doc,
"label": int(label),
}
示例12: _maybe_download_corpora
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def _maybe_download_corpora(tmp_dir):
"""Download corpora for multinli.
Args:
tmp_dir: a string
Returns:
a string
"""
mnli_filename = "MNLI.zip"
mnli_finalpath = os.path.join(tmp_dir, "MNLI")
if not tf.gfile.Exists(mnli_finalpath):
zip_filepath = generator_utils.maybe_download(
tmp_dir, mnli_filename, _MNLI_URL)
zip_ref = zipfile.ZipFile(zip_filepath, "r")
zip_ref.extractall(tmp_dir)
zip_ref.close()
return mnli_finalpath
示例13: generate_samples
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def generate_samples(self, data_dir, tmp_dir, dataset_split):
if dataset_split == problem.DatasetSplit.TRAIN:
urls = self.get_urls(DATA_TRAIN[0], DATA_TRAIN[1])
else:
urls = self.get_urls(DATA_TEST_SEEN[0], DATA_TEST_SEEN[1])
urls += self.get_urls(DATA_TEST_NOVEL[0], DATA_TEST_NOVEL[1])
for url in urls:
path = generator_utils.maybe_download(tmp_dir, os.path.basename(url), url)
for frame_number, frame, state, action in self.parse_frames(path):
yield {
"frame_number": [frame_number],
"frame": frame,
"state": state,
"action": action,
}
示例14: maybe_download_dataset
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def maybe_download_dataset(self, tmp_dir, dataset_split):
"""Downloads the appropriate dataset file and returns its path."""
# Get the dataset url for the split requested.
url = self.DATA_URLS.get(dataset_split, None)
# Sanity check.
if url is None:
tf.logging.fatal("Unknown dataset_split passed: {}".format(dataset_split))
# Download the data, if it doesn't already exist.
return generator_utils.maybe_download(tmp_dir,
self._extract_filename_from_url(url),
url)
示例15: _maybe_download_corpora
# 需要導入模塊: from tensor2tensor.data_generators import generator_utils [as 別名]
# 或者: from tensor2tensor.data_generators.generator_utils import maybe_download [as 別名]
def _maybe_download_corpora(self, tmp_dir):
wnli_filename = "WNLI.zip"
wnli_finalpath = os.path.join(tmp_dir, "WNLI")
if not tf.gfile.Exists(wnli_finalpath):
zip_filepath = generator_utils.maybe_download(
tmp_dir, wnli_filename, self._WNLI_URL)
zip_ref = zipfile.ZipFile(zip_filepath, "r")
zip_ref.extractall(tmp_dir)
zip_ref.close()
return wnli_finalpath