当前位置: 首页>>代码示例>>Python>>正文


Python text_encoder.EOS_ID属性代码示例

本文整理汇总了Python中tensor2tensor.data_generators.text_encoder.EOS_ID属性的典型用法代码示例。如果您正苦于以下问题:Python text_encoder.EOS_ID属性的具体用法?Python text_encoder.EOS_ID怎么用?Python text_encoder.EOS_ID使用的例子?那么恭喜您, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在tensor2tensor.data_generators.text_encoder的用法示例。


在下文中一共展示了text_encoder.EOS_ID属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: generate_encoded_samples

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS_ID [as 别名]
def generate_encoded_samples(self, data_dir, tmp_dir, dataset_split):
    """A generator that generates samples that are encoded.

    Args:
      data_dir: data directory
      tmp_dir: temp directory
      dataset_split: dataset split

    Yields:
      A dict.

    """
    generator = self.generate_samples(data_dir, tmp_dir, dataset_split)
    encoder = self.get_or_create_vocab(data_dir, tmp_dir)
    label_encoder = self.get_labels_encoder(data_dir)
    for sample in generator:
      inputs = encoder.encode(sample['inputs'])
      inputs.append(text_encoder.EOS_ID)
      context = encoder.encode(sample['context'])
      context.append(text_encoder.EOS_ID)
      targets = label_encoder.encode(sample['targets'])
      sample['targets'] = targets
      yield {'inputs': inputs, 'context': context, 'targets': targets} 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:25,代码来源:babi_qa.py

示例2: generate_data

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS_ID [as 别名]
def generate_data(self, data_dir, _, task_id=-1):

    def generator_eos(nbr_symbols, max_length, nbr_cases):
      """Shift by NUM_RESERVED_IDS and append EOS token."""
      for case in self.generator(nbr_symbols, max_length, nbr_cases):
        new_case = {}
        for feature in case:
          new_case[feature] = [
              i + text_encoder.NUM_RESERVED_TOKENS for i in case[feature]
          ] + [text_encoder.EOS_ID]
        yield new_case

    utils.generate_dataset_and_shuffle(
        generator_eos(self.num_symbols, self.train_length, self.train_size),
        self.training_filepaths(data_dir, self.num_shards, shuffled=True),
        generator_eos(self.num_symbols, self.dev_length, self.dev_size),
        self.dev_filepaths(data_dir, 1, shuffled=True),
        shuffle=False) 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:20,代码来源:algorithmic.py

示例3: generate_encoded_samples

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS_ID [as 别名]
def generate_encoded_samples(self, data_dir, tmp_dir, dataset_split):
    """A generator that generates samples that are encoded.

    Args:
      data_dir: data directory
      tmp_dir: temp directory
      dataset_split: dataset split

    Yields:
      A dict.

    """
    generator = self.generate_samples(data_dir, tmp_dir, dataset_split)
    txt_encoder = self.get_or_create_vocab(data_dir, tmp_dir)
    label_encoder = self.get_labels_encoder(data_dir)
    for sample in generator:
      inputs = txt_encoder.encode(sample["inputs"])
      inputs.append(text_encoder.EOS_ID)
      targets = label_encoder.encode(sample["label"])
      yield {"inputs": inputs, "targets": targets} 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:22,代码来源:lambada.py

示例4: generate_encoded_samples

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS_ID [as 别名]
def generate_encoded_samples(self, data_dir, tmp_dir, dataset_split):
    """A generator that generates samples that are encoded.

    Args:
      data_dir: data directory
      tmp_dir: temp directory
      dataset_split: dataset split

    Yields:
      A dict.

    """
    generator = self.generate_samples(data_dir, tmp_dir, dataset_split)
    encoder = self.get_or_create_vocab(data_dir, tmp_dir)
    label_encoder = self.get_labels_encoder(data_dir)
    for sample in generator:
      inputs = encoder.encode(sample["inputs"])
      inputs.append(text_encoder.EOS_ID)
      context = encoder.encode(sample["context"])
      context.append(text_encoder.EOS_ID)
      targets = label_encoder.encode(sample["targets"])
      sample["targets"] = targets
      yield {"inputs": inputs, "context": context, "targets": targets} 
开发者ID:tensorflow,项目名称:tensor2tensor,代码行数:25,代码来源:babi_qa.py

示例5: text2text_generate_encoded_oovs

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS_ID [as 别名]
def text2text_generate_encoded_oovs(self,
                                      sample_generator,
                                      vocab,
                                      targets_vocab=None,
                                      has_inputs=True):
    """Encode Text2Text samples from the generator with the vocab."""
    targets_vocab = targets_vocab or vocab
    for sample in sample_generator:
      if has_inputs:
        (sample["inputs"], sample["inputs_extend"], source_oovs,
         _) = vocab.encode(sample["inputs"])
        sample["inputs"].append(text_encoder.EOS_ID)
        sample["inputs_extend"].append(text_encoder.EOS_ID)
      # need to pass the source OOV tokens to the target encoder
      sample["targets"], sample["targets_extend"] = targets_vocab.encode_target(
          sample["targets"], source_oovs)
      sample["targets"].append(text_encoder.EOS_ID)
      sample["targets_extend"].append(text_encoder.EOS_ID)
      yield sample 
开发者ID:tensorflow,项目名称:tensor2tensor,代码行数:21,代码来源:pointer_generator_word.py

示例6: consume

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS_ID [as 别名]
def consume(self, word):
        """Append ``word`` to the current history."""
        if word == utils.EOS_ID:
            return
        pos = (word // EditT2TPredictor.POS_FACTOR) \
              % (EditT2TPredictor.MAX_SEQ_LEN + 1)
        token = word % EditT2TPredictor.POS_FACTOR
        # TODO(fstahlberg): Do not hard code the following section
        op = word // 100000000  
        if op == 1:  # Insertion
            self.trg_sentence = self._ins_op(pos, token)
        elif op == 2:  # Substitution
            self.trg_sentence = self._sub_op(pos, token)
        elif op == 3:  # Deletion
            self.trg_sentence = self._del_op(pos)
        else:
            logging.warn("Invalid edit descriptor %d. Ignoring..." % word)
        self._update_cur_score()
        self.cache.add(self.trg_sentence, utils.NEG_INF) 
开发者ID:ucam-smt,项目名称:sgnmt,代码行数:21,代码来源:tf_t2t.py

示例7: _decode_batch_input_fn

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS_ID [as 别名]
def _decode_batch_input_fn(num_decode_batches, sorted_inputs, vocabulary,
                           batch_size, max_input_size):
  """Generator to produce batches of inputs."""
  tf.logging.info(" batch %d" % num_decode_batches)
  # First reverse all the input sentences so that if you're going to get OOMs,
  # you'll see it in the first batch
  sorted_inputs.reverse()
  for b in range(num_decode_batches):
    tf.logging.info("Decoding batch %d" % b)
    batch_length = 0
    batch_inputs = []
    for inputs in sorted_inputs[b * batch_size:(b + 1) * batch_size]:
      input_ids = vocabulary.encode(inputs)
      if max_input_size > 0:
        # Subtract 1 for the EOS_ID.
        input_ids = input_ids[:max_input_size - 1]
      input_ids.append(text_encoder.EOS_ID)
      batch_inputs.append(input_ids)
      if len(input_ids) > batch_length:
        batch_length = len(input_ids)
    final_batch_inputs = []
    for input_ids in batch_inputs:
      assert len(input_ids) <= batch_length
      x = input_ids + [0] * (batch_length - len(input_ids))
      final_batch_inputs.append(x)

    yield {
        "inputs": np.array(final_batch_inputs).astype(np.int32),
    } 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:31,代码来源:decoding.py

示例8: _save_until_eos

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS_ID [as 别名]
def _save_until_eos(ids, skip=False):
  """Strips everything after the first <EOS> token, which is normally 1."""
  ids = ids.flatten()
  if skip:
    return ids
  try:
    index = list(ids).index(text_encoder.EOS_ID)
    return ids[0:index]
  except ValueError:
    # No EOS_ID: return the array as-is.
    return ids 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:13,代码来源:decoding.py

示例9: _truncate_to_lead_section

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS_ID [as 别名]
def _truncate_to_lead_section(example):
  wiki = example["targets"]
  lead_boundary = example["section_boundaries"][0]
  # Concat a new EOS to the lead since the original one gets truncated.
  lead = tf.concat((wiki[:lead_boundary], [text_encoder.EOS_ID]), 0)
  return lead 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:8,代码来源:wikisum.py

示例10: _default_hparams

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS_ID [as 别名]
def _default_hparams():
  """A set of basic model hyperparameters."""
  return tf.contrib.training.HParams(
      # Use this parameter to get comparable perplexity numbers with different
      # tokenizations.  This value should be set to the ratio of the number of
      # tokens in the test set according to the tokenization used to the number
      # of tokens in the test set in the "official" tokenization.  For
      # example, if we are using a word-piece based model and we want to
      # compute per-word perplexity, then we set loss_multiplier to the number
      # of wordpieces per word in the test set.
      loss_multiplier=1.0,

      # Use this parameter to allow for larger sequences in the batch. Without
      # the use of this parameter, the size of the inner two dimensions will
      # be used to judge the sequence length.
      batch_size_multiplier=1,

      # During inference for autoregressive problems, if the batch_size is 1,
      # the inference will stop when the model predict a text_encoder.EOS_ID
      # token.
      stop_at_eos=False,

      # Modalities used to map from input features to a space compatible with
      # chosen model architecture.  One modality spec (which is a 2-tuple,
      # (modality_full_name, vocab_size)) per feature key. modality_full_name
      # is a string type:name, e.g. class_label:class_label_2d. Leaving off
      # the name uses the default modality for that type (e.g. class_label ==
      # class_label:default).
      input_modality={},

      # Modality used to map from hidden representation to the target space.
      # Specified as a modality spec, a 2-tuple described above.
      target_modality=None,

      # Identifiers used to tell the model which input/target space will be
      # expected. For example, it can tell that we expect French as characters
      # as output, or Spanish as sound. Spaces defined as constants in SpaceID
      # class.
      input_space_id=SpaceID.GENERIC,
      target_space_id=SpaceID.GENERIC) 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:42,代码来源:problem.py

示例11: encode

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS_ID [as 别名]
def encode(self, s):
    return super(ByteTextEncoderWithEos, self).encode(s) + [text_encoder.EOS_ID] 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:4,代码来源:speech_recognition.py

示例12: generate_encoded_samples

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS_ID [as 别名]
def generate_encoded_samples(self, data_dir, tmp_dir, dataset_split):
    generator = super(
        QuestionAndContext2TextProblem, self).generate_encoded_samples(
            data_dir, tmp_dir, dataset_split)
    vocab = self.feature_encoders(data_dir)["context"]
    for sample in generator:
      context = vocab.encode(sample["context"])
      context.append(text_encoder.EOS_ID)
      sample["context"] = context
      yield sample 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:12,代码来源:text_problems.py

示例13: text2text_generate_encoded

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS_ID [as 别名]
def text2text_generate_encoded(sample_generator,
                               vocab,
                               targets_vocab=None,
                               has_inputs=True):
  """Encode Text2Text samples from the generator with the vocab."""
  targets_vocab = targets_vocab or vocab
  for sample in sample_generator:
    if has_inputs:
      sample["inputs"] = vocab.encode(sample["inputs"])
      sample["inputs"].append(text_encoder.EOS_ID)
    sample["targets"] = targets_vocab.encode(sample["targets"])
    sample["targets"].append(text_encoder.EOS_ID)
    yield sample 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:15,代码来源:text_problems.py

示例14: to_example_dict

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS_ID [as 别名]
def to_example_dict(encoder, inputs, mask, outputs):
  """Convert single h5 record to an example dict."""
  # Inputs
  bases = []
  input_ids = []
  last_idx = -1
  for row in np.argwhere(inputs):
    idx, base_id = row
    idx, base_id = int(idx), int(base_id)
    assert idx > last_idx  # if not, means 2 True values in 1 row
    # Some rows are all False. Those rows are mapped to UNK_ID.
    while idx != last_idx + 1:
      bases.append(encoder.UNK)
      last_idx += 1
    bases.append(encoder.BASES[base_id])
    last_idx = idx
  assert len(inputs) == len(bases)

  input_ids = encoder.encode(bases)
  input_ids.append(text_encoder.EOS_ID)

  # Targets: mask and output
  targets_mask = [float(v) for v in mask]
  # The output is (n, m); store targets_shape so that it can be reshaped
  # properly on the other end.
  targets = [float(v) for v in outputs.flatten()]
  targets_shape = [int(dim) for dim in outputs.shape]
  assert mask.shape[0] == outputs.shape[0]

  example_keys = ["inputs", "targets_mask", "targets", "targets_shape"]
  ex_dict = dict(
      zip(example_keys, [input_ids, targets_mask, targets, targets_shape]))
  return ex_dict 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:35,代码来源:gene_expression.py

示例15: _encode

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import EOS_ID [as 别名]
def _encode(inputs, encoder, add_eos=True):
  input_ids = encoder.encode(inputs)
  if add_eos:
    input_ids.append(text_encoder.EOS_ID)
  return input_ids 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:7,代码来源:serving_utils.py


注:本文中的tensor2tensor.data_generators.text_encoder.EOS_ID属性示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。