Python apache_beam.ptransform_fn方法代碼示例

本文整理匯總了Python中apache_beam.ptransform_fn方法的典型用法代碼示例。如果您正苦於以下問題：Python apache_beam.ptransform_fn方法的具體用法？Python apache_beam.ptransform_fn怎麽用？Python apache_beam.ptransform_fn使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類apache_beam的用法示例。

在下文中一共展示了apache_beam.ptransform_fn方法的11個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: GetInputSourceToExamplePTransform

# 需要導入模塊: import apache_beam [as 別名]
# 或者: from apache_beam import ptransform_fn [as 別名]
def GetInputSourceToExamplePTransform(self) -> beam.PTransform:
    """Returns PTransform for converting input source to records.

    The record is by default assumed to be tf.train.Example protos, subclassses
    can serialize any protocol buffer into bytes as output PCollection,
    so long as the downstream component can consume it.

    Note that each input split will be transformed by this function separately.
    For complex use case, consider override 'GenerateExamplesByBeam' instead.

    Here is an example PTransform:
      @beam.ptransform_fn
      @beam.typehints.with_input_types(beam.Pipeline)
      @beam.typehints.with_output_types(Union[tf.train.Example,
                                              tf.train.SequenceExample,
                                              bytes])
      def ExamplePTransform(
          pipeline: beam.Pipeline,
          exec_properties: Dict[Text, Any],
          split_pattern: Text) -> beam.pvalue.PCollection
    """
    pass

開發者ID:tensorflow，項目名稱:tfx，代碼行數:24，代碼來源:base_example_gen_executor.py

示例2: _RawRecordToRecordBatchInternal

# 需要導入模塊: import apache_beam [as 別名]
# 或者: from apache_beam import ptransform_fn [as 別名]
def _RawRecordToRecordBatchInternal(self,
                                      batch_size: Optional[int] = None
                                     ) -> beam.PTransform:

    @beam.typehints.with_input_types(bytes)
    @beam.typehints.with_output_types(pa.RecordBatch)
    def _PTransformFn(raw_records_pcoll: beam.pvalue.PCollection):
      return (raw_records_pcoll
              | "Batch" >> beam.BatchElements(
                  **batch_util.GetBatchElementsKwargs(batch_size))
              | "Decode" >> beam.ParDo(
                  _DecodeBatchExamplesDoFn(self._GetSchemaForDecoding(),
                                           self.raw_record_column_name,
                                           self._can_produce_large_types)))

    return beam.ptransform_fn(_PTransformFn)()

開發者ID:tensorflow，項目名稱:tfx-bsl，代碼行數:18，代碼來源:tf_example_record.py

示例3: _RawRecordToRecordBatchInternal

# 需要導入模塊: import apache_beam [as 別名]
# 或者: from apache_beam import ptransform_fn [as 別名]
def _RawRecordToRecordBatchInternal(self,
                                      batch_size: Optional[int] = None
                                     ) -> beam.PTransform:

    @beam.typehints.with_input_types(bytes)
    @beam.typehints.with_output_types(pa.RecordBatch)
    def _PTransformFn(raw_records_pcoll: beam.pvalue.PCollection):
      return (raw_records_pcoll
              | "Batch" >> beam.BatchElements(
                  **batch_util.GetBatchElementsKwargs(batch_size))
              | "Decode" >> beam.ParDo(
                  _DecodeBatchExamplesDoFn(self._schema,
                                           self.raw_record_column_name,
                                           self._can_produce_large_types)))

    return beam.ptransform_fn(_PTransformFn)()

開發者ID:tensorflow，項目名稱:tfx-bsl，代碼行數:18，代碼來源:tf_sequence_example_record.py

示例4: RawRecordToRecordBatch

# 需要導入模塊: import apache_beam [as 別名]
# 或者: from apache_beam import ptransform_fn [as 別名]
def RawRecordToRecordBatch(self,
                             batch_size: Optional[int] = None
                            ) -> beam.PTransform:
    """Returns a PTransform that converts raw records to Arrow RecordBatches.

    The input PCollection must be from self.RawRecordBeamSource() (also see
    the documentation for that method).

    Args:
      batch_size: if not None, the `pa.RecordBatch` produced will be of the
        specified size. Otherwise it's automatically tuned by Beam.
    """

    @beam.typehints.with_input_types(bytes)
    @beam.typehints.with_output_types(pa.RecordBatch)
    def _PTransformFn(pcoll: beam.pvalue.PCollection):
      return (pcoll
              | "RawRecordToRecordBatch" >>
              self._RawRecordToRecordBatchInternal(batch_size)
              | "CollectRecordBatchTelemetry" >>
              telemetry.ProfileRecordBatches(self._telemetry_descriptors,
                                             self._logical_format,
                                             self._physical_format))

    return beam.ptransform_fn(_PTransformFn)()

開發者ID:tensorflow，項目名稱:tfx-bsl，代碼行數:27，代碼來源:record_based_tfxio.py

示例5: _RawRecordBeamSourceInternal

# 需要導入模塊: import apache_beam [as 別名]
# 或者: from apache_beam import ptransform_fn [as 別名]
def _RawRecordBeamSourceInternal(self):
    """A PTransform that maps batched instances to RecordBatches."""
    @beam.ptransform_fn
    @beam.typehints.with_output_types(pa.RecordBatch)
    def _ptransform_fn(instances):
      return (instances
              | 'EncodeToTfExamples' >> beam.Map(
                  example_proto_coder.ExampleProtoCoder(self._schema).encode))

    return _ptransform_fn()  # pylint: disable=no-value-for-parameter

  # TODO(b/156761358): deprecated; remove after tfx-bsl 0.23 release.

開發者ID:tensorflow，項目名稱:transform，代碼行數:14，代碼來源:tft_unit.py

示例6: _RawRecordBeamSourceInternal

# 需要導入模塊: import apache_beam [as 別名]
# 或者: from apache_beam import ptransform_fn [as 別名]
def _RawRecordBeamSourceInternal(self) -> beam.PTransform:

    @beam.typehints.with_input_types(bytes)
    @beam.typehints.with_output_types(bytes)
    def _PTransformFn(raw_records_pcoll: beam.pvalue.PCollection):
      return raw_records_pcoll

    return beam.ptransform_fn(_PTransformFn)()

開發者ID:tensorflow，項目名稱:tfx-bsl，代碼行數:10，代碼來源:test_util.py

示例7: _RawRecordToRecordBatchInternal

# 需要導入模塊: import apache_beam [as 別名]
# 或者: from apache_beam import ptransform_fn [as 別名]
def _RawRecordToRecordBatchInternal(self,
                                      batch_size: Optional[int] = None
                                     ) -> beam.PTransform:

    @beam.typehints.with_input_types(beam.Pipeline)
    @beam.typehints.with_output_types(pa.RecordBatch)
    def _PTransformFn(raw_record_pcoll: beam.pvalue.PCollection):
      return (raw_record_pcoll
              | "Batch" >> beam.BatchElements(
                  **batch_util.GetBatchElementsKwargs(batch_size))
              | "ToRecordBatch" >>
              beam.Map(_BatchedRecordsToArrow, self.raw_record_column_name,
                       self._can_produce_large_types))

    return beam.ptransform_fn(_PTransformFn)()

開發者ID:tensorflow，項目名稱:tfx-bsl，代碼行數:17，代碼來源:raw_tf_record.py

示例8: _RawRecordBeamSourceInternal

# 需要導入模塊: import apache_beam [as 別名]
# 或者: from apache_beam import ptransform_fn [as 別名]
def _RawRecordBeamSourceInternal(self) -> beam.PTransform:

    @beam.typehints.with_input_types(beam.Pipeline)
    @beam.typehints.with_output_types(bytes)
    def _PTransformFn(pipeline: beam.pvalue.PCollection):
      return pipeline | "ReadFromTFRecord" >> beam.io.ReadFromTFRecord(
          self._file_pattern,
          coder=beam.coders.BytesCoder(),
          # TODO(b/114938612): Eventually remove this override.
          validate=False)

    return beam.ptransform_fn(_PTransformFn)()

開發者ID:tensorflow，項目名稱:tfx-bsl，代碼行數:14，代碼來源:raw_tf_record.py

示例9: BeamSource

# 需要導入模塊: import apache_beam [as 別名]
# 或者: from apache_beam import ptransform_fn [as 別名]
def BeamSource(self, batch_size: Optional[int] = None) -> beam.PTransform:

    @beam.typehints.with_input_types(beam.Pipeline)
    @beam.typehints.with_output_types(pa.RecordBatch)
    def _PTransformFn(pipeline: beam.pvalue.PCollection):
      """Converts raw records to RecordBatches."""
      return (
          pipeline
          | "RawRecordBeamSource" >> self.RawRecordBeamSource()
          | "RawRecordToRecordBatch" >> self.RawRecordToRecordBatch(batch_size))

    return beam.ptransform_fn(_PTransformFn)()

開發者ID:tensorflow，項目名稱:tfx-bsl，代碼行數:14，代碼來源:record_based_tfxio.py

示例10: _RawRecordToRecordBatchInternal

# 需要導入模塊: import apache_beam [as 別名]
# 或者: from apache_beam import ptransform_fn [as 別名]
def _RawRecordToRecordBatchInternal(self,
                                      batch_size: Optional[int] = None
                                     ) -> beam.PTransform:

    @beam.typehints.with_input_types(List[bytes])
    @beam.typehints.with_output_types(pa.RecordBatch)
    def _PTransformFn(raw_records_pcoll: beam.pvalue.PCollection):
      """Returns RecordBatch of csv lines."""

      # Decode raw csv lines to record batches.
      record_batches = (
          raw_records_pcoll
          | "CSVToRecordBatch" >> csv_decoder.CSVToRecordBatch(
              column_names=self._column_names,
              delimiter=self._delimiter,
              skip_blank_lines=self._skip_blank_lines,
              schema=self._schema,
              desired_batch_size=batch_size,
              multivalent_columns=self._multivalent_columns,
              secondary_delimiter=self._secondary_delimiter,
              produce_large_types=self._can_produce_large_types,
              raw_record_column_name=self._raw_record_column_name))

      return record_batches

    return beam.ptransform_fn(_PTransformFn)()

開發者ID:tensorflow，項目名稱:tfx-bsl，代碼行數:28，代碼來源:csv_tfxio.py

示例11: GetInputSourceToExamplePTransform

# 需要導入模塊: import apache_beam [as 別名]
# 或者: from apache_beam import ptransform_fn [as 別名]
def GetInputSourceToExamplePTransform(self) -> beam.PTransform:
    """Returns PTransform for importing records."""

    @beam.ptransform_fn
    @beam.typehints.with_input_types(beam.Pipeline)
    @beam.typehints.with_output_types(Union[tf.train.Example,
                                            tf.train.SequenceExample, bytes])
    def ImportRecord(pipeline: beam.Pipeline, exec_properties: Dict[Text, Any],
                     split_pattern: Text) -> beam.pvalue.PCollection:
      """PTransform to import records.

      The records are tf.train.Example, tf.train.SequenceExample,
      or serialized proto.

      Args:
        pipeline: Beam pipeline.
        exec_properties: A dict of execution properties.
          - input_base: input dir that contains input data.
        split_pattern: Split.pattern in Input config, glob relative file pattern
          that maps to input files with root directory given by input_base.

      Returns:
        PCollection of records (tf.Example, tf.SequenceExample, or bytes).
      """
      output_payload_format = exec_properties.get(utils.OUTPUT_DATA_FORMAT_KEY)

      serialized_records = (
          pipeline
          # pylint: disable=no-value-for-parameter
          | _ImportSerializedRecord(exec_properties, split_pattern))
      if output_payload_format == example_gen_pb2.PayloadFormat.FORMAT_PROTO:
        return serialized_records
      elif (output_payload_format ==
            example_gen_pb2.PayloadFormat.FORMAT_TF_EXAMPLE):
        return (serialized_records
                | 'ToTFExample' >> beam.Map(tf.train.Example.FromString))
      elif (output_payload_format ==
            example_gen_pb2.PayloadFormat.FORMAT_TF_SEQUENCE_EXAMPLE):
        return (serialized_records
                | 'ToTFSequenceExample' >> beam.Map(
                    tf.train.SequenceExample.FromString))

      raise ValueError('output_payload_format must be one of FORMAT_TF_EXAMPLE,'
                       ' FORMAT_TF_SEQUENCE_EXAMPLE or FORMAT_PROTO')

    return ImportRecord

開發者ID:tensorflow，項目名稱:tfx，代碼行數:48，代碼來源:executor.py

注：本文中的apache_beam.ptransform_fn方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。