当前位置: 首页>>代码示例>>Python>>正文


Python text_encoder.ByteTextEncoder方法代码示例

本文整理汇总了Python中tensor2tensor.data_generators.text_encoder.ByteTextEncoder方法的典型用法代码示例。如果您正苦于以下问题:Python text_encoder.ByteTextEncoder方法的具体用法?Python text_encoder.ByteTextEncoder怎么用?Python text_encoder.ByteTextEncoder使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在tensor2tensor.data_generators.text_encoder的用法示例。


在下文中一共展示了text_encoder.ByteTextEncoder方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: vocab_type

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import ByteTextEncoder [as 别名]
def vocab_type(self):
    """What kind of vocabulary to use.

    `VocabType`s:
      * `SUBWORD`: `SubwordTextEncoder`, an invertible wordpiece vocabulary.
        Must provide `self.approx_vocab_size`. Generates the vocabulary based on
        the training data. To limit the number of samples the vocab generation
        looks at, override `self.max_samples_for_vocab`. Recommended and
        default.
      * `CHARACTER`: `ByteTextEncoder`, encode raw bytes.
      * `TOKEN`: `TokenTextEncoder`, vocabulary based on a file. Must provide a
        vocabulary file yourself (`TokenTextEncoder.store_to_file`) because one
        will not be generated for you. The vocab file should be stored in
        `data_dir/` with the name specified by `self.vocab_filename`.

    Returns:
      VocabType constant
    """
    return VocabType.SUBWORD 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:21,代码来源:text_problems.py

示例2: tabbed_parsing_character_generator

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import ByteTextEncoder [as 别名]
def tabbed_parsing_character_generator(tmp_dir, train):
  """Generate source and target data from a single file."""
  character_vocab = text_encoder.ByteTextEncoder()
  filename = "parsing_{0}.pairs".format("train" if train else "dev")
  pair_filepath = os.path.join(tmp_dir, filename)
  return text_problems.text2text_generate_encoded(
      text_problems.text2text_txt_tab_iterator(pair_filepath), character_vocab) 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:9,代码来源:ice_parsing.py

示例3: feature_encoders

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import ByteTextEncoder [as 别名]
def feature_encoders(self, _):
    return {
        "inputs": text_encoder.TextEncoder(),
        "targets": text_encoder.ByteTextEncoder(),
    } 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:7,代码来源:problem_hparams.py

示例4: feature_encoders

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import ByteTextEncoder [as 别名]
def feature_encoders(self, data_dir):
    if self.is_character_level:
      encoder = text_encoder.ByteTextEncoder()
    else:
      vocab_filename = os.path.join(
          data_dir, "vocab.ende.%d" % self.targeted_vocab_size)
      encoder = text_encoder.SubwordTextEncoder(vocab_filename)
    input_encoder = text_encoder.ImageEncoder(channels=self.num_channels)
    return {"inputs": input_encoder, "targets": encoder} 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:11,代码来源:image_utils.py

示例5: feature_encoders

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import ByteTextEncoder [as 别名]
def feature_encoders(self, data_dir):
    if self.is_character_level:
      encoder = text_encoder.ByteTextEncoder()
    else:
      vocab_filename = os.path.join(
          data_dir, self.vocab_problem.vocab_filename)
      encoder = text_encoder.SubwordTextEncoder(vocab_filename)
    input_encoder = text_encoder.ImageEncoder(channels=self.num_channels)
    return {"inputs": input_encoder, "targets": encoder} 
开发者ID:tensorflow,项目名称:tensor2tensor,代码行数:11,代码来源:image_utils.py

示例6: generator

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import ByteTextEncoder [as 别名]
def generator(self, data_dir, tmp_dir, train):
    '''
    Generate the vocab and then build train and validation t2t-datagen files.
    Four .txt files have to be present in the data_dir directory:
      trainSource.txt
      trainTarget.txt
      devSource.txt
      devTarget.txt

    Params:
      :train: Whether we are in train mode or not.
    '''
    character_vocab = text_encoder.ByteTextEncoder()
    mode = 'train' if train else 'dev'
    print('t2t_csaky_log: ' + mode + ' data generation activated.')

    sourcePath = os.path.join(data_dir, mode + 'Source.txt')
    targetPath = os.path.join(data_dir, mode + 'Target.txt')

    # Try to find the txt files.
    if os.path.isfile(sourcePath) and os.path.isfile(targetPath):
      print('t2t_csaky_log: Generating ' + mode + ' files in ' + data_dir)
      return translate.character_generator(sourcePath,
                                           targetPath,
                                           character_vocab,
                                           EOS)
    else:
      print('t2t_csaky_log: ' + mode +
            ' source or target file not found, please check ' +
            'that the following files exist in your ' + data_dir +
            ' directory and rerun this program:')
      print('  trainSource.txt')
      print('  trainTarget.txt')
      print('  devSource.txt')
      print('  devTarget.txt') 
开发者ID:ricsinaruto,项目名称:Seq2seqChatbots,代码行数:37,代码来源:character_chatbot.py

示例7: main

# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import ByteTextEncoder [as 别名]
def main(_):
  """Convert a file to examples."""
  if FLAGS.subword_text_encoder_filename:
    encoder = text_encoder.SubwordTextEncoder(
        FLAGS.subword_text_encoder_filename)
  elif FLAGS.token_text_encoder_filename:
    encoder = text_encoder.TokenTextEncoder(FLAGS.token_text_encoder_filename)
  elif FLAGS.byte_text_encoder:
    encoder = text_encoder.ByteTextEncoder()
  else:
    encoder = None
  reader = tf.python_io.tf_record_iterator(FLAGS.input_filename)
  total_sequences = 0
  total_input_tokens = 0
  total_target_tokens = 0
  nonpadding_input_tokens = 0
  nonpadding_target_tokens = 0
  max_input_length = 0
  max_target_length = 0
  for record in reader:
    x = tf.train.Example()
    x.ParseFromString(record)
    inputs = [int(i) for i in x.features.feature["inputs"].int64_list.value]
    targets = [int(i) for i in x.features.feature["targets"].int64_list.value]
    if FLAGS.print_inputs:
      print("INPUTS:\n" + encoder.decode(inputs) if encoder else inputs)
    if FLAGS.print_targets:
      print("TARGETS:\n" + encoder.decode(targets) if encoder else targets)
    nonpadding_input_tokens += len(inputs) - inputs.count(0)
    nonpadding_target_tokens += len(targets) - targets.count(0)
    total_input_tokens += len(inputs)
    total_target_tokens += len(targets)
    total_sequences += 1
    max_input_length = max(max_input_length, len(inputs))
    max_target_length = max(max_target_length, len(targets))
    if FLAGS.print_all:
      for k, v in six.iteritems(x.features.feature):
        print("%s: %s" % (k, v.int64_list.value))

  print("total_sequences: %d" % total_sequences)
  print("total_input_tokens: %d" % total_input_tokens)
  print("total_target_tokens: %d" % total_target_tokens)
  print("nonpadding_input_tokens: %d" % nonpadding_input_tokens)
  print("nonpadding_target_tokens: %d" % nonpadding_target_tokens)
  print("max_input_length: %d" % max_input_length)
  print("max_target_length: %d" % max_target_length) 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:48,代码来源:inspect_tfrecord.py


注:本文中的tensor2tensor.data_generators.text_encoder.ByteTextEncoder方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。