Python text_encoder.EOS属性代码示例

本文整理汇总了Python中tensor2tensor.data_generators.text_encoder.EOS属性的典型用法代码示例。如果您正苦于以下问题：Python text_encoder.EOS属性的具体用法？Python text_encoder.EOS怎么用？Python text_encoder.EOS使用的例子？那么恭喜您, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类tensor2tensor.data_generators.text_encoder的用法示例。

在下文中一共展示了text_encoder.EOS属性的6个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: generate_samples

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS [as 别名]
def generate_samples(self, data_dir, tmp_dir, dataset_split):
    files = _maybe_download_corpus(tmp_dir, self.vocab_type)

    train_file, valid_file = None, None
    for filename in files:
      if "train" in filename:
        train_file = os.path.join(tmp_dir, filename)
      elif "valid" in filename:
        valid_file = os.path.join(tmp_dir, filename)

    assert train_file, "Training file not found"
    assert valid_file, "Validation file not found"

    _get_token_encoder(data_dir, self.vocab_filename, train_file)

    train = dataset_split == problem.DatasetSplit.TRAIN
    filepath = train_file if train else valid_file

    def _generate_samples():
      with tf.gfile.GFile(filepath, "r") as f:
        for line in f:
          line = " ".join(line.replace("\n", " %s " % EOS).split())
          yield {"targets": line}

    return _generate_samples()

开发者ID:akzaidi，项目名称:fine-lm，代码行数:27，代码来源:ptb.py

示例2: build_vocab_list

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS [as 别名]
def build_vocab_list(data_path):
  """Reads a file to build a vocabulary with letters and phonemes.

    Args:
      data_path: data file to read list of words from.

    Returns:
      vocab_list: vocabulary list with both graphemes and phonemes."""
  vocab = {}
  with tf.gfile.GFile(data_path, "r") as data_file:
    for line in data_file:
      items = line.strip().split()
      vocab.update({char:1 for char in list(items[0])})
      vocab.update({phoneme:1 for phoneme in items[1:]})
    vocab_list = [PAD, EOS]
    for key in sorted(vocab.keys()):
      vocab_list.append(key)
  return vocab_list

开发者ID:steveash，项目名称:NETransliteration-COLING2018，代码行数:20，代码来源:g2p_encoder.py

示例3: test_reserved_tokens_in_corpus

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS [as 别名]
def test_reserved_tokens_in_corpus(self):
    """Test that we handle reserved tokens appearing in the corpus."""
    corpus = "A B {} D E F {} G {}".format(text_encoder.EOS,
                                           text_encoder.EOS,
                                           text_encoder.PAD)

    encoder = text_encoder.TokenTextEncoder(None, vocab_list=corpus.split())

    all_tokens = encoder._id_to_token.values()

    # If reserved tokens are removed correctly, then the set of tokens will
    # be unique.
    self.assertEqual(len(all_tokens), len(set(all_tokens)))

开发者ID:akzaidi，项目名称:fine-lm，代码行数:15，代码来源:text_encoder_test.py

示例4: _read_words

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS [as 别名]
def _read_words(filename):
  """Reads words from a file."""
  with tf.gfile.GFile(filename, "r") as f:
    if sys.version_info[0] >= 3:
      return f.read().replace("\n", " %s " % EOS).split()
    else:
      return f.read().decode("utf-8").replace("\n", " %s " % EOS).split()

开发者ID:akzaidi，项目名称:fine-lm，代码行数:9，代码来源:ptb.py

示例5: evaluate

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS [as 别名]
def evaluate(self):
    """Run evaluation mode."""
    words, pronunciations = [], []
    for case in self.problem.generator(self.file_path,
                                       self.problem.source_vocab,
                                       self.problem.target_vocab):
      word = self.problem.source_vocab.decode(case["inputs"]).replace(
          EOS, "").strip()
      pronunciation = self.problem.target_vocab.decode(case["targets"]).replace(
          EOS, "").strip()
      words.append(word)
      pronunciations.append(pronunciation)

    self.g2p_gt_map = create_g2p_gt_map(words, pronunciations)

    if os.path.exists(self.frozen_graph_filename):
      with tf.Session(graph=self.graph) as sess:
        inp = tf.placeholder(tf.string, name="inp_decode")[0]
        decode_op = tf.py_func(self.calc_errors, [inp],
            [tf.int64, tf.int64])
        [correct, errors] = self.__run_op(sess, decode_op, self.file_path)

    else:
      correct, errors = self.calc_errors(g2p_gt_map, self.file_path)

    print("Words: %d" % (correct+errors))
    print("Errors: %d" % errors)
    print("WER: %.3f" % (float(errors)/(correct+errors)))
    print("Accuracy: %.3f" % float(1.-(float(errors)/(correct+errors))))
    return self.g2p_gt_map

开发者ID:steveash，项目名称:NETransliteration-COLING2018，代码行数:32，代码来源:g2p.py

示例6: init

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS [as 别名]
def __init__(self,
               vocab_filename=None,
               vocab_list=None,
               separator="",
               num_reserved_ids=text_encoder.NUM_RESERVED_TOKENS):
    """Initialize from a file or list, one token per line.

    Handling of reserved tokens works as follows:
    - When initializing from a list, we add reserved tokens to the vocab.
    - When initializing from a file, we do not add reserved tokens to the vocab.
    - When saving vocab files, we save reserved tokens to the file.

    Args:
      vocab_filename: If not None, the full filename to read vocab from. If this
         is not None, then vocab_list should be None.
      vocab_list: If not None, a list of elements of the vocabulary. If this is
         not None, then vocab_filename should be None.
      separator: separator between symbols in original file.
      num_reserved_ids: Number of IDs to save for reserved tokens like <EOS>.
    """
    super(GraphemePhonemeEncoder, self).__init__(
        num_reserved_ids=num_reserved_ids)
    if vocab_filename and os.path.exists(vocab_filename):
      self._init_vocab_from_file(vocab_filename)
    else:
      assert vocab_list is not None
      self._init_vocab_from_list(vocab_list)
    self._separator = separator

开发者ID:steveash，项目名称:NETransliteration-COLING2018，代码行数:30，代码来源:g2p_encoder.py