當前位置: 首頁>>代碼示例>>Python>>正文


Python text_encoder.ByteTextEncoder方法代碼示例

本文整理匯總了Python中tensor2tensor.data_generators.text_encoder.ByteTextEncoder方法的典型用法代碼示例。如果您正苦於以下問題:Python text_encoder.ByteTextEncoder方法的具體用法?Python text_encoder.ByteTextEncoder怎麽用?Python text_encoder.ByteTextEncoder使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在tensor2tensor.data_generators.text_encoder的用法示例。


在下文中一共展示了text_encoder.ByteTextEncoder方法的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: vocab_type

# 需要導入模塊: from tensor2tensor.data_generators import text_encoder [as 別名]
# 或者: from tensor2tensor.data_generators.text_encoder import ByteTextEncoder [as 別名]
def vocab_type(self):
    """What kind of vocabulary to use.

    `VocabType`s:
      * `SUBWORD`: `SubwordTextEncoder`, an invertible wordpiece vocabulary.
        Must provide `self.approx_vocab_size`. Generates the vocabulary based on
        the training data. To limit the number of samples the vocab generation
        looks at, override `self.max_samples_for_vocab`. Recommended and
        default.
      * `CHARACTER`: `ByteTextEncoder`, encode raw bytes.
      * `TOKEN`: `TokenTextEncoder`, vocabulary based on a file. Must provide a
        vocabulary file yourself (`TokenTextEncoder.store_to_file`) because one
        will not be generated for you. The vocab file should be stored in
        `data_dir/` with the name specified by `self.vocab_filename`.

    Returns:
      VocabType constant
    """
    return VocabType.SUBWORD 
開發者ID:akzaidi,項目名稱:fine-lm,代碼行數:21,代碼來源:text_problems.py

示例2: tabbed_parsing_character_generator

# 需要導入模塊: from tensor2tensor.data_generators import text_encoder [as 別名]
# 或者: from tensor2tensor.data_generators.text_encoder import ByteTextEncoder [as 別名]
def tabbed_parsing_character_generator(tmp_dir, train):
  """Generate source and target data from a single file."""
  character_vocab = text_encoder.ByteTextEncoder()
  filename = "parsing_{0}.pairs".format("train" if train else "dev")
  pair_filepath = os.path.join(tmp_dir, filename)
  return text_problems.text2text_generate_encoded(
      text_problems.text2text_txt_tab_iterator(pair_filepath), character_vocab) 
開發者ID:akzaidi,項目名稱:fine-lm,代碼行數:9,代碼來源:ice_parsing.py

示例3: feature_encoders

# 需要導入模塊: from tensor2tensor.data_generators import text_encoder [as 別名]
# 或者: from tensor2tensor.data_generators.text_encoder import ByteTextEncoder [as 別名]
def feature_encoders(self, _):
    return {
        "inputs": text_encoder.TextEncoder(),
        "targets": text_encoder.ByteTextEncoder(),
    } 
開發者ID:akzaidi,項目名稱:fine-lm,代碼行數:7,代碼來源:problem_hparams.py

示例4: feature_encoders

# 需要導入模塊: from tensor2tensor.data_generators import text_encoder [as 別名]
# 或者: from tensor2tensor.data_generators.text_encoder import ByteTextEncoder [as 別名]
def feature_encoders(self, data_dir):
    if self.is_character_level:
      encoder = text_encoder.ByteTextEncoder()
    else:
      vocab_filename = os.path.join(
          data_dir, "vocab.ende.%d" % self.targeted_vocab_size)
      encoder = text_encoder.SubwordTextEncoder(vocab_filename)
    input_encoder = text_encoder.ImageEncoder(channels=self.num_channels)
    return {"inputs": input_encoder, "targets": encoder} 
開發者ID:akzaidi,項目名稱:fine-lm,代碼行數:11,代碼來源:image_utils.py

示例5: feature_encoders

# 需要導入模塊: from tensor2tensor.data_generators import text_encoder [as 別名]
# 或者: from tensor2tensor.data_generators.text_encoder import ByteTextEncoder [as 別名]
def feature_encoders(self, data_dir):
    if self.is_character_level:
      encoder = text_encoder.ByteTextEncoder()
    else:
      vocab_filename = os.path.join(
          data_dir, self.vocab_problem.vocab_filename)
      encoder = text_encoder.SubwordTextEncoder(vocab_filename)
    input_encoder = text_encoder.ImageEncoder(channels=self.num_channels)
    return {"inputs": input_encoder, "targets": encoder} 
開發者ID:tensorflow,項目名稱:tensor2tensor,代碼行數:11,代碼來源:image_utils.py

示例6: generator

# 需要導入模塊: from tensor2tensor.data_generators import text_encoder [as 別名]
# 或者: from tensor2tensor.data_generators.text_encoder import ByteTextEncoder [as 別名]
def generator(self, data_dir, tmp_dir, train):
    '''
    Generate the vocab and then build train and validation t2t-datagen files.
    Four .txt files have to be present in the data_dir directory:
      trainSource.txt
      trainTarget.txt
      devSource.txt
      devTarget.txt

    Params:
      :train: Whether we are in train mode or not.
    '''
    character_vocab = text_encoder.ByteTextEncoder()
    mode = 'train' if train else 'dev'
    print('t2t_csaky_log: ' + mode + ' data generation activated.')

    sourcePath = os.path.join(data_dir, mode + 'Source.txt')
    targetPath = os.path.join(data_dir, mode + 'Target.txt')

    # Try to find the txt files.
    if os.path.isfile(sourcePath) and os.path.isfile(targetPath):
      print('t2t_csaky_log: Generating ' + mode + ' files in ' + data_dir)
      return translate.character_generator(sourcePath,
                                           targetPath,
                                           character_vocab,
                                           EOS)
    else:
      print('t2t_csaky_log: ' + mode +
            ' source or target file not found, please check ' +
            'that the following files exist in your ' + data_dir +
            ' directory and rerun this program:')
      print('  trainSource.txt')
      print('  trainTarget.txt')
      print('  devSource.txt')
      print('  devTarget.txt') 
開發者ID:ricsinaruto,項目名稱:Seq2seqChatbots,代碼行數:37,代碼來源:character_chatbot.py

示例7: main

# 需要導入模塊: from tensor2tensor.data_generators import text_encoder [as 別名]
# 或者: from tensor2tensor.data_generators.text_encoder import ByteTextEncoder [as 別名]
def main(_):
  """Convert a file to examples."""
  if FLAGS.subword_text_encoder_filename:
    encoder = text_encoder.SubwordTextEncoder(
        FLAGS.subword_text_encoder_filename)
  elif FLAGS.token_text_encoder_filename:
    encoder = text_encoder.TokenTextEncoder(FLAGS.token_text_encoder_filename)
  elif FLAGS.byte_text_encoder:
    encoder = text_encoder.ByteTextEncoder()
  else:
    encoder = None
  reader = tf.python_io.tf_record_iterator(FLAGS.input_filename)
  total_sequences = 0
  total_input_tokens = 0
  total_target_tokens = 0
  nonpadding_input_tokens = 0
  nonpadding_target_tokens = 0
  max_input_length = 0
  max_target_length = 0
  for record in reader:
    x = tf.train.Example()
    x.ParseFromString(record)
    inputs = [int(i) for i in x.features.feature["inputs"].int64_list.value]
    targets = [int(i) for i in x.features.feature["targets"].int64_list.value]
    if FLAGS.print_inputs:
      print("INPUTS:\n" + encoder.decode(inputs) if encoder else inputs)
    if FLAGS.print_targets:
      print("TARGETS:\n" + encoder.decode(targets) if encoder else targets)
    nonpadding_input_tokens += len(inputs) - inputs.count(0)
    nonpadding_target_tokens += len(targets) - targets.count(0)
    total_input_tokens += len(inputs)
    total_target_tokens += len(targets)
    total_sequences += 1
    max_input_length = max(max_input_length, len(inputs))
    max_target_length = max(max_target_length, len(targets))
    if FLAGS.print_all:
      for k, v in six.iteritems(x.features.feature):
        print("%s: %s" % (k, v.int64_list.value))

  print("total_sequences: %d" % total_sequences)
  print("total_input_tokens: %d" % total_input_tokens)
  print("total_target_tokens: %d" % total_target_tokens)
  print("nonpadding_input_tokens: %d" % nonpadding_input_tokens)
  print("nonpadding_target_tokens: %d" % nonpadding_target_tokens)
  print("max_input_length: %d" % max_input_length)
  print("max_target_length: %d" % max_target_length) 
開發者ID:akzaidi,項目名稱:fine-lm,代碼行數:48,代碼來源:inspect_tfrecord.py


注:本文中的tensor2tensor.data_generators.text_encoder.ByteTextEncoder方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。