Python data_utils.PAD_ID屬性代碼示例

本文整理匯總了Python中data_utils.PAD_ID屬性的典型用法代碼示例。如果您正苦於以下問題：Python data_utils.PAD_ID屬性的具體用法？Python data_utils.PAD_ID怎麽用？Python data_utils.PAD_ID使用的例子？那麽, 這裏精選的屬性代碼示例或許可以為您提供幫助。您也可以進一步了解該屬性所在類data_utils的用法示例。

在下文中一共展示了data_utils.PAD_ID屬性的5個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: get_batch

# 需要導入模塊: import data_utils [as 別名]
# 或者: from data_utils import PAD_ID [as 別名]
def get_batch(self, data, bucket_id):
    """Get a random batch of data from the specified bucket, prepare for step.

    To feed data in step(..) it must be a list of batch-major vectors, while
    data here contains single length-major cases. So the main logic of this
    function is to re-index data cases to be in the proper format for feeding.

    Args:
      data: a tuple of size len(self.buckets) in which each element contains
        lists of pairs of input and output data that we use to create a batch.
      bucket_id: integer, which bucket to get the batch for.

    Returns:
      The triple (encoder_inputs, decoder_inputs, target_weights) for
      the constructed batch that has the proper format to call step(...) later.
    """
    encoder_size, decoder_size = self.buckets[bucket_id]
    encoder_inputs, decoder_inputs = [], []

    # Get a random batch of encoder and decoder inputs from data,
    # pad them if needed, reverse encoder inputs and add GO to decoder.
    for _ in xrange(self.batch_size):
      encoder_input, decoder_input = random.choice(data[bucket_id])

      # Encoder inputs are padded and then reversed.
      encoder_pad = [data_utils.PAD_ID] * (encoder_size - len(encoder_input))
      encoder_inputs.append(list(reversed(encoder_input + encoder_pad)))

      # Decoder inputs get an extra "GO" symbol, and are padded then.
      decoder_pad_size = decoder_size - len(decoder_input) - 1
      decoder_inputs.append([data_utils.GO_ID] + decoder_input +
                            [data_utils.PAD_ID] * decoder_pad_size)

    # Now we create batch-major vectors from the data selected above.
    batch_encoder_inputs, batch_decoder_inputs, batch_weights = [], [], []

    # Batch encoder inputs are just re-indexed encoder_inputs.
    for length_idx in xrange(encoder_size):
      batch_encoder_inputs.append(
          np.array([encoder_inputs[batch_idx][length_idx]
                    for batch_idx in xrange(self.batch_size)], dtype=np.int32))

    # Batch decoder inputs are re-indexed decoder_inputs, we create weights.
    for length_idx in xrange(decoder_size):
      batch_decoder_inputs.append(
          np.array([decoder_inputs[batch_idx][length_idx]
                    for batch_idx in xrange(self.batch_size)], dtype=np.int32))

      # Create target_weights to be 0 for targets that are padding.
      batch_weight = np.ones(self.batch_size, dtype=np.float32)
      for batch_idx in xrange(self.batch_size):
        # We set weight to 0 if the corresponding target is a PAD symbol.
        # The corresponding target is decoder_input shifted by 1 forward.
        if length_idx < decoder_size - 1:
          target = decoder_inputs[batch_idx][length_idx + 1]
        if length_idx == decoder_size - 1 or target == data_utils.PAD_ID:
          batch_weight[batch_idx] = 0.0
      batch_weights.append(batch_weight)
    return batch_encoder_inputs, batch_decoder_inputs, batch_weights

開發者ID:ringringyi，項目名稱:DOTA_models，代碼行數:61，代碼來源:seq2seq_model.py

示例2: get_batch

# 需要導入模塊: import data_utils [as 別名]
# 或者: from data_utils import PAD_ID [as 別名]
def get_batch(self, data, bucket_id):

        encoder_size, decoder_size = self.buckets[bucket_id]
        encoder_inputs, decoder_inputs, decoder_emotions = [], [], []

        # Get a random batch of encoder and decoder inputs from data,
        # pad them if needed, reverse encoder inputs and add GO to decoder.
        _emotion = np.random.randint(6)

        for _ in xrange(self.batch_size):
            decoder_emotion = -1
            while decoder_emotion != _emotion:
                encoder_input, decoder_input, _, decoder_emotion = random.choice(data[bucket_id])

            # Encoder inputs are padded and then reversed.
            encoder_pad = [data_utils.PAD_ID] * (encoder_size - len(encoder_input))
            encoder_inputs.append(list(reversed(encoder_input + encoder_pad)))

            # Decoder inputs get an extra "GO" symbol, and are padded then.
            decoder_pad_size = decoder_size - len(decoder_input) - 1
            decoder_inputs.append([data_utils.GO_ID] + decoder_input +
                                                        [data_utils.PAD_ID] * decoder_pad_size)
            
            decoder_emotions.append(decoder_emotion)

        # Now we create batch-major vectors from the data selected above.
        batch_encoder_inputs, batch_decoder_inputs, batch_weights = [], [], []

        batch_decoder_emotions = np.array(decoder_emotions, dtype=np.int32)

        # Batch encoder inputs are just re-indexed encoder_inputs.
        for length_idx in xrange(encoder_size):
            batch_encoder_inputs.append(
                    np.array([encoder_inputs[batch_idx][length_idx]
                                        for batch_idx in xrange(self.batch_size)], dtype=np.int32))

        # Batch decoder inputs are re-indexed decoder_inputs, we create weights.
        for length_idx in xrange(decoder_size):
            batch_decoder_inputs.append(
                    np.array([decoder_inputs[batch_idx][length_idx]
                                        for batch_idx in xrange(self.batch_size)], dtype=np.int32))

            # Create target_weights to be 0 for targets that are padding.
            batch_weight = np.ones(self.batch_size, dtype=np.float32)
            for batch_idx in xrange(self.batch_size):
                # We set weight to 0 if the corresponding target is a PAD symbol.
                # The corresponding target is decoder_input shifted by 1 forward.
                if length_idx < decoder_size - 1:
                    target = decoder_inputs[batch_idx][length_idx + 1]
                if length_idx == decoder_size - 1 or target == data_utils.PAD_ID:
                    batch_weight[batch_idx] = 0.0
            batch_weights.append(batch_weight)
        return batch_encoder_inputs, batch_decoder_inputs, batch_weights, batch_decoder_emotions

開發者ID:thu-coai，項目名稱:ecm，代碼行數:55，代碼來源:seq2seq_model.py

示例3: get_batch_data

# 需要導入模塊: import data_utils [as 別名]
# 或者: from data_utils import PAD_ID [as 別名]
def get_batch_data(self, data, bucket_id):

        encoder_size, decoder_size = self.buckets[bucket_id]
        encoder_inputs, decoder_inputs, decoder_emotions = [], [], []

        # Get a random batch of encoder and decoder inputs from data,
        # pad them if needed, reverse encoder inputs and add GO to decoder.
        for idx in xrange(self.batch_size):
            encoder_input, decoder_input, _, decoder_emotion = data[idx]

            # Encoder inputs are padded and then reversed.
            encoder_pad = [data_utils.PAD_ID] * (encoder_size - len(encoder_input))
            encoder_inputs.append(list(reversed(encoder_input + encoder_pad)))

            # Decoder inputs get an extra "GO" symbol, and are padded then.
            decoder_pad_size = decoder_size - len(decoder_input) - 1
            decoder_inputs.append([data_utils.GO_ID] + decoder_input +
                                                        [data_utils.PAD_ID] * decoder_pad_size)
            
            decoder_emotions.append(decoder_emotion)

        # Now we create batch-major vectors from the data selected above.
        batch_encoder_inputs, batch_decoder_inputs, batch_weights = [], [], []

        batch_decoder_emotions = np.array(decoder_emotions, dtype=np.int32)

        # Batch encoder inputs are just re-indexed encoder_inputs.
        for length_idx in xrange(encoder_size):
            batch_encoder_inputs.append(
                    np.array([encoder_inputs[batch_idx][length_idx]
                                        for batch_idx in xrange(self.batch_size)], dtype=np.int32))

        # Batch decoder inputs are re-indexed decoder_inputs, we create weights.
        for length_idx in xrange(decoder_size):
            batch_decoder_inputs.append(
                    np.array([decoder_inputs[batch_idx][length_idx]
                                        for batch_idx in xrange(self.batch_size)], dtype=np.int32))

            # Create target_weights to be 0 for targets that are padding.
            batch_weight = np.ones(self.batch_size, dtype=np.float32)
            for batch_idx in xrange(self.batch_size):
                # We set weight to 0 if the corresponding target is a PAD symbol.
                # The corresponding target is decoder_input shifted by 1 forward.
                if length_idx < decoder_size - 1:
                    target = decoder_inputs[batch_idx][length_idx + 1]
                if length_idx == decoder_size - 1 or target == data_utils.PAD_ID:
                    batch_weight[batch_idx] = 0.0
            batch_weights.append(batch_weight)
        return batch_encoder_inputs, batch_decoder_inputs, batch_weights, batch_decoder_emotions

開發者ID:thu-coai，項目名稱:ecm，代碼行數:51，代碼來源:seq2seq_model.py

示例4: get_batch

# 需要導入模塊: import data_utils [as 別名]
# 或者: from data_utils import PAD_ID [as 別名]
def get_batch(self, data, bucket_id):
    """Get a random batch of data from the specified bucket, prepare for step.
    To feed data in step(..) it must be a list of batch-major vectors, while
    data here contains single length-major cases. So the main logic of this
    function is to re-index data cases to be in the proper format for feeding.
    Args:
      data: a tuple of size len(self.buckets) in which each element contains
        lists of pairs of input and output data that we use to create a batch.
      bucket_id: integer, which bucket to get the batch for.
    Returns:
      The triple (encoder_inputs, decoder_inputs, target_weights) for
      the constructed batch that has the proper format to call step(...) later.
    """
    encoder_size, decoder_size = self.buckets[bucket_id]
    encoder_inputs, decoder_inputs = [], []

    # Get a random batch of encoder and decoder inputs from data,
    # pad them if needed, reverse encoder inputs and add GO to decoder.
    for _ in xrange(self.batch_size):
      encoder_input, decoder_input = random.choice(data[bucket_id])

      # Encoder inputs are padded and then reversed.
      encoder_pad = [data_utils.PAD_ID] * (encoder_size - len(encoder_input))
      encoder_inputs.append(list(reversed(encoder_input + encoder_pad)))

      # Decoder inputs get an extra "GO" symbol, and are padded then.
      decoder_pad_size = decoder_size - len(decoder_input) - 1
      decoder_inputs.append([data_utils.GO_ID] + decoder_input +
                            [data_utils.PAD_ID] * decoder_pad_size)

    # Now we create batch-major vectors from the data selected above.
    batch_encoder_inputs, batch_decoder_inputs, batch_weights = [], [], []

    # Batch encoder inputs are just re-indexed encoder_inputs.
    for length_idx in xrange(encoder_size):
      batch_encoder_inputs.append(
          np.array([encoder_inputs[batch_idx][length_idx]
                    for batch_idx in xrange(self.batch_size)], dtype=np.int32))

    # Batch decoder inputs are re-indexed decoder_inputs, we create weights.
    for length_idx in xrange(decoder_size):
      batch_decoder_inputs.append(
          np.array([decoder_inputs[batch_idx][length_idx]
                    for batch_idx in xrange(self.batch_size)], dtype=np.int32))

      # Create target_weights to be 0 for targets that are padding.
      batch_weight = np.ones(self.batch_size, dtype=np.float32)
      for batch_idx in xrange(self.batch_size):
        # We set weight to 0 if the corresponding target is a PAD symbol.
        # The corresponding target is decoder_input shifted by 1 forward.
        if length_idx < decoder_size - 1:
          target = decoder_inputs[batch_idx][length_idx + 1]
        if length_idx == decoder_size - 1 or target == data_utils.PAD_ID:
          batch_weight[batch_idx] = 0.0
      batch_weights.append(batch_weight)
    return batch_encoder_inputs, batch_decoder_inputs, batch_weights

開發者ID:Shen-Lab，項目名稱:DeepAffinity，代碼行數:58，代碼來源:seq2seq_model.py

示例5: read_mrs_data

# 需要導入模塊: import data_utils [as 別名]
# 或者: from data_utils import PAD_ID [as 別名]
def read_mrs_data(buckets, source_paths, target_paths, max_size=None,
    any_length=False, offset_target=-1):
  # Read in all files seperately.
  source_inputs = [data_utils.read_ids_file(path, max_size) 
                   for path in source_paths]
  target_inputs = [data_utils.read_ids_file(path, max_size) 
                   for path in target_paths]
  
  data_set = [[] for _ in buckets]
  data_list = []
  # Assume everything is well-aligned.
  for i in xrange(len(source_inputs[0])): # over examples
    # List of sequences of each type.
    source_ids = [source_input[i] for source_input in source_inputs]
    # Assume first target type predicts EOS.
    # Not checking pointer ranges: do that inside tf graph.
    target_ids = [target_inputs[0][i] + [data_utils.EOS_ID]]
    for j, target_input in enumerate(target_inputs[1:]):
      if offset_target > 0 and j + 1 == offset_target:
        target_ids.append([data_utils.PAD_ID] + target_input[i] 
                          + [data_utils.PAD_ID])
      else:
        target_ids.append(target_input[i] + [data_utils.PAD_ID])

    found_bucket = False
    for bucket_id, (source_size, target_size) in enumerate(buckets):
      if len(source_ids[0]) < source_size and len(target_ids[0]) < target_size:
        data_set[bucket_id].append([source_ids, target_ids])
        data_list.append([source_ids, target_ids, bucket_id])
        found_bucket = True
        break
    if any_length and not found_bucket:
      # Crop examples that are larger than the largest bucket.
      source_size, target_size = buckets[-1][0], buckets[-1][1]
      if len(source_ids[0]) >= source_size:
        source_ids = [source_id[:source_size] for source_id in source_ids]
      if len(target_ids[0]) >= target_size:
        target_ids = [target_id[:target_size] for target_id in target_ids]
      bucket_id = len(buckets) - 1
      data_set[bucket_id].append([source_ids, target_ids])
      data_list.append([source_ids, target_ids, bucket_id])
  return data_set, data_list

開發者ID:janmbuys，項目名稱:DeepDeepParser，代碼行數:44，代碼來源:parser.py

注：本文中的data_utils.PAD_ID屬性示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。