本文整理汇总了Python中tensor2tensor.data_generators.text_encoder.EOS属性的典型用法代码示例。如果您正苦于以下问题:Python text_encoder.EOS属性的具体用法?Python text_encoder.EOS怎么用?Python text_encoder.EOS使用的例子?那么恭喜您, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类tensor2tensor.data_generators.text_encoder
的用法示例。
在下文中一共展示了text_encoder.EOS属性的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: generate_samples
# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS [as 别名]
def generate_samples(self, data_dir, tmp_dir, dataset_split):
files = _maybe_download_corpus(tmp_dir, self.vocab_type)
train_file, valid_file = None, None
for filename in files:
if "train" in filename:
train_file = os.path.join(tmp_dir, filename)
elif "valid" in filename:
valid_file = os.path.join(tmp_dir, filename)
assert train_file, "Training file not found"
assert valid_file, "Validation file not found"
_get_token_encoder(data_dir, self.vocab_filename, train_file)
train = dataset_split == problem.DatasetSplit.TRAIN
filepath = train_file if train else valid_file
def _generate_samples():
with tf.gfile.GFile(filepath, "r") as f:
for line in f:
line = " ".join(line.replace("\n", " %s " % EOS).split())
yield {"targets": line}
return _generate_samples()
示例2: build_vocab_list
# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS [as 别名]
def build_vocab_list(data_path):
"""Reads a file to build a vocabulary with letters and phonemes.
Args:
data_path: data file to read list of words from.
Returns:
vocab_list: vocabulary list with both graphemes and phonemes."""
vocab = {}
with tf.gfile.GFile(data_path, "r") as data_file:
for line in data_file:
items = line.strip().split()
vocab.update({char:1 for char in list(items[0])})
vocab.update({phoneme:1 for phoneme in items[1:]})
vocab_list = [PAD, EOS]
for key in sorted(vocab.keys()):
vocab_list.append(key)
return vocab_list
示例3: test_reserved_tokens_in_corpus
# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS [as 别名]
def test_reserved_tokens_in_corpus(self):
"""Test that we handle reserved tokens appearing in the corpus."""
corpus = "A B {} D E F {} G {}".format(text_encoder.EOS,
text_encoder.EOS,
text_encoder.PAD)
encoder = text_encoder.TokenTextEncoder(None, vocab_list=corpus.split())
all_tokens = encoder._id_to_token.values()
# If reserved tokens are removed correctly, then the set of tokens will
# be unique.
self.assertEqual(len(all_tokens), len(set(all_tokens)))
示例4: _read_words
# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS [as 别名]
def _read_words(filename):
"""Reads words from a file."""
with tf.gfile.GFile(filename, "r") as f:
if sys.version_info[0] >= 3:
return f.read().replace("\n", " %s " % EOS).split()
else:
return f.read().decode("utf-8").replace("\n", " %s " % EOS).split()
示例5: evaluate
# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS [as 别名]
def evaluate(self):
"""Run evaluation mode."""
words, pronunciations = [], []
for case in self.problem.generator(self.file_path,
self.problem.source_vocab,
self.problem.target_vocab):
word = self.problem.source_vocab.decode(case["inputs"]).replace(
EOS, "").strip()
pronunciation = self.problem.target_vocab.decode(case["targets"]).replace(
EOS, "").strip()
words.append(word)
pronunciations.append(pronunciation)
self.g2p_gt_map = create_g2p_gt_map(words, pronunciations)
if os.path.exists(self.frozen_graph_filename):
with tf.Session(graph=self.graph) as sess:
inp = tf.placeholder(tf.string, name="inp_decode")[0]
decode_op = tf.py_func(self.calc_errors, [inp],
[tf.int64, tf.int64])
[correct, errors] = self.__run_op(sess, decode_op, self.file_path)
else:
correct, errors = self.calc_errors(g2p_gt_map, self.file_path)
print("Words: %d" % (correct+errors))
print("Errors: %d" % errors)
print("WER: %.3f" % (float(errors)/(correct+errors)))
print("Accuracy: %.3f" % float(1.-(float(errors)/(correct+errors))))
return self.g2p_gt_map
示例6: __init__
# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS [as 别名]
def __init__(self,
vocab_filename=None,
vocab_list=None,
separator="",
num_reserved_ids=text_encoder.NUM_RESERVED_TOKENS):
"""Initialize from a file or list, one token per line.
Handling of reserved tokens works as follows:
- When initializing from a list, we add reserved tokens to the vocab.
- When initializing from a file, we do not add reserved tokens to the vocab.
- When saving vocab files, we save reserved tokens to the file.
Args:
vocab_filename: If not None, the full filename to read vocab from. If this
is not None, then vocab_list should be None.
vocab_list: If not None, a list of elements of the vocabulary. If this is
not None, then vocab_filename should be None.
separator: separator between symbols in original file.
num_reserved_ids: Number of IDs to save for reserved tokens like <EOS>.
"""
super(GraphemePhonemeEncoder, self).__init__(
num_reserved_ids=num_reserved_ids)
if vocab_filename and os.path.exists(vocab_filename):
self._init_vocab_from_file(vocab_filename)
else:
assert vocab_list is not None
self._init_vocab_from_list(vocab_list)
self._separator = separator