当前位置: 首页>>代码示例>>Python>>正文


Python pyarrow.list_方法代码示例

本文整理汇总了Python中pyarrow.list_方法的典型用法代码示例。如果您正苦于以下问题:Python pyarrow.list_方法的具体用法?Python pyarrow.list_怎么用?Python pyarrow.list_使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyarrow的用法示例。


在下文中一共展示了pyarrow.list_方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_basic_stats_generator_empty_batch

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def test_basic_stats_generator_empty_batch(self):
    batches = [
        pa.RecordBatch.from_arrays([pa.array([], type=pa.list_(pa.binary()))],
                                   ['a'])
    ]
    expected_result = {
        types.FeaturePath(['a']): text_format.Parse(
            """
            path {
              step: 'a'
            }
            type: STRING
            string_stats {
              common_stats {
                num_non_missing: 0
                tot_num_values: 0
              }
            }
            """, statistics_pb2.FeatureNameStatistics())}
    generator = basic_stats_generator.BasicStatsGenerator()
    self.assertCombinerOutputEqual(batches, generator, expected_result) 
开发者ID:tensorflow,项目名称:data-validation,代码行数:23,代码来源:basic_stats_generator_test.py

示例2: bq_to_arrow_data_type

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def bq_to_arrow_data_type(field):
    """Return the Arrow data type, corresponding to a given BigQuery column.

    Returns:
        None: if default Arrow type inspection should be used.
    """
    if field.mode is not None and field.mode.upper() == "REPEATED":
        inner_type = bq_to_arrow_data_type(
            schema.SchemaField(field.name, field.field_type, fields=field.fields)
        )
        if inner_type:
            return pyarrow.list_(inner_type)
        return None

    field_type_upper = field.field_type.upper() if field.field_type else ""
    if field_type_upper in schema._STRUCT_TYPES:
        return bq_to_arrow_struct_data_type(field)

    data_type_constructor = BQ_TO_ARROW_SCALARS.get(field_type_upper)
    if data_type_constructor is None:
        return None
    return data_type_constructor() 
开发者ID:googleapis,项目名称:python-bigquery,代码行数:24,代码来源:_pandas_helpers.py

示例3: test_simple

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def test_simple(self, factory):
    # 3 int64 values
    # 5 int32 offsets
    # 1 null bitmap byte for outer ListArray
    # 1 null bitmap byte for inner Int64Array
    # 46 bytes in total.
    list_array = pa.array([[1, 2], [None], None, None],
                          type=pa.list_(pa.int64()))

    # 1 null bitmap byte for outer StructArray.
    # 1 null bitmap byte for inner Int64Array.
    # 4 int64 values.
    # 34 bytes in total
    struct_array = pa.array([{"a": 1}, {"a": 2}, {"a": None}, None],
                            type=pa.struct([pa.field("a", pa.int64())]))
    entity = factory([list_array, struct_array], ["a1", "a2"])

    self.assertEqual(46 + 34, table_util.TotalByteSize(entity)) 
开发者ID:tensorflow,项目名称:tfx-bsl,代码行数:20,代码来源:table_util_test.py

示例4: _GetExpectedColumnValues

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def _GetExpectedColumnValues(tfxio):
  if tfxio._can_produce_large_types:
    list_factory = pa.large_list
    bytes_type = pa.large_binary()
  else:
    list_factory = pa.list_
    bytes_type = pa.binary()

  return {
      path.ColumnPath(["int_feature"]):
          pa.array([[1], [2], [3]], type=list_factory(pa.int64())),
      path.ColumnPath(["float_feature"]):
          pa.array([[1, 2, 3, 4], [2, 3, 4, 5], None],
                   type=list_factory(pa.float32())),
      path.ColumnPath([_SEQUENCE_COLUMN_NAME, "int_feature"]):
          pa.array([[[1, 2], [3]], None, [[4]]],
                   list_factory(list_factory(pa.int64()))),
      path.ColumnPath([_SEQUENCE_COLUMN_NAME, "string_feature"]):
          pa.array([None, [[b"foo", b"bar"], []], [[b"baz"]]],
                   list_factory(list_factory(bytes_type)))
  } 
开发者ID:tensorflow,项目名称:tfx-bsl,代码行数:23,代码来源:tf_sequence_example_record_test.py

示例5: testRaggedTensorStructTypeInvalidSteps

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def testRaggedTensorStructTypeInvalidSteps(self):
    tensor_representation = text_format.Parse(
        """
        ragged_tensor {
          feature_path {
            step: "ragged_feature"
            step: "wrong_step"
          }
        }
        """, schema_pb2.TensorRepresentation())
    record_batch = pa.RecordBatch.from_arrays([
        pa.StructArray.from_arrays([
            pa.array([[1, 2, 3]], pa.list_(pa.int64())),
            pa.array([["a", "b", "c"]], pa.list_(pa.binary()))
        ], ["inner_feature", "x2"])
    ], ["ragged_feature"])
    with self.assertRaisesRegex(ValueError,
                                ".*Unable to handle tensor output.*"):
      tensor_adapter.TensorAdapter(
          tensor_adapter.TensorAdapterConfig(record_batch.schema,
                                             {"output": tensor_representation})) 
开发者ID:tensorflow,项目名称:tfx-bsl,代码行数:23,代码来源:tensor_adapter_test.py

示例6: testRaggedTensorStructTypeTooManySteps

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def testRaggedTensorStructTypeTooManySteps(self):
    tensor_representation = text_format.Parse(
        """
        ragged_tensor {
          feature_path {
            step: "ragged_feature"
            step: "inner_feature"
            step: "non_existant_feature"
          }
        }
        """, schema_pb2.TensorRepresentation())
    record_batch = pa.RecordBatch.from_arrays([
        pa.StructArray.from_arrays([
            pa.array([[1, 2, 3]], pa.list_(pa.int64())),
            pa.array([["a", "b", "c"]], pa.list_(pa.binary()))
        ], ["inner_feature", "x2"])
    ], ["ragged_feature"])
    with self.assertRaisesRegex(ValueError,
                                ".*Unable to handle tensor output.*"):
      tensor_adapter.TensorAdapter(
          tensor_adapter.TensorAdapterConfig(record_batch.schema,
                                             {"output": tensor_representation})) 
开发者ID:tensorflow,项目名称:tfx-bsl,代码行数:24,代码来源:tensor_adapter_test.py

示例7: testRaggedTensorStructTypeNonLeaf

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def testRaggedTensorStructTypeNonLeaf(self):
    tensor_representation = text_format.Parse(
        """
        ragged_tensor {
          feature_path {
            step: "ragged_feature"
          }
        }
        """, schema_pb2.TensorRepresentation())
    record_batch = pa.RecordBatch.from_arrays([
        pa.StructArray.from_arrays([
            pa.array([[1, 2, 3]], pa.list_(pa.int64())),
            pa.array([["a", "b", "c"]], pa.list_(pa.binary()))
        ], ["inner_feature", "x2"])
    ], ["ragged_feature"])
    with self.assertRaisesRegex(ValueError,
                                ".*Unable to handle tensor output.*"):
      tensor_adapter.TensorAdapter(
          tensor_adapter.TensorAdapterConfig(record_batch.schema,
                                             {"output": tensor_representation})) 
开发者ID:tensorflow,项目名称:tfx-bsl,代码行数:22,代码来源:tensor_adapter_test.py

示例8: testRaiseOnNoMatchingHandler

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def testRaiseOnNoMatchingHandler(self):
    with self.assertRaisesRegexp(ValueError, "Unable to handle tensor"):
      tensor_adapter.TensorAdapter(
          tensor_adapter.TensorAdapterConfig(
              # nested lists are not supported now.
              pa.schema([pa.field("unsupported_column",
                                  pa.list_(pa.list_(pa.int64())))]),
              {
                  "tensor":
                      text_format.Parse(
                          """
                  dense_tensor {
                    column_name: "unsupported_column"
                    shape: {}
                  }
                  """, schema_pb2.TensorRepresentation())
              })) 
开发者ID:tensorflow,项目名称:tfx-bsl,代码行数:19,代码来源:tensor_adapter_test.py

示例9: _GetExpectedArrowSchema

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def _GetExpectedArrowSchema(tfxio, raw_record_column_name=None):
  if tfxio._can_produce_large_types:
    int_type = pa.large_list(pa.int64())
    float_type = pa.large_list(pa.float32())
    bytes_type = pa.large_list(pa.large_binary())
  else:
    int_type = pa.list_(pa.int64())
    float_type = pa.list_(pa.float32())
    bytes_type = pa.list_(pa.binary())
  fields = [
      pa.field("int_feature", int_type),
      pa.field("float_feature", float_type),
      pa.field("string_feature", bytes_type)
  ]
  if raw_record_column_name is not None:
    fields.append(pa.field(raw_record_column_name, bytes_type))
  return pa.schema(fields) 
开发者ID:tensorflow,项目名称:tfx-bsl,代码行数:19,代码来源:csv_tfxio_test.py

示例10: GetExpectedColumnValues

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def GetExpectedColumnValues(tfxio):
  if tfxio._can_produce_large_types:
    int_type = pa.large_list(pa.int64())
    float_type = pa.large_list(pa.float32())
    bytes_type = pa.large_list(pa.large_binary())
  else:
    int_type = pa.list_(pa.int64())
    float_type = pa.list_(pa.float32())
    bytes_type = pa.list_(pa.binary())

  return {
      "int_feature":
          pa.array([[1], [2], [3]], type=int_type),
      "float_feature":
          pa.array([[1, 2, 3, 4], [2, 3, 4, 5], [4, 5, 6, 7]], type=float_type),
      "string_feature":
          pa.array([None, ["foo", "bar"], None], type=bytes_type),
  } 
开发者ID:tensorflow,项目名称:tfx-bsl,代码行数:20,代码来源:tf_example_record_test.py

示例11: test_decode

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def test_decode(self, schema_text_proto, examples_text_proto,
                  create_expected):
    serialized_examples = [
        text_format.Parse(pbtxt, tf.train.Example()).SerializeToString()
        for pbtxt in examples_text_proto
    ]
    serialized_schema = None
    if schema_text_proto is not None:
      serialized_schema = text_format.Parse(
          schema_text_proto, schema_pb2.Schema()).SerializeToString()

    if serialized_schema:
      coder = example_coder.ExamplesToRecordBatchDecoder(serialized_schema)
    else:
      coder = example_coder.ExamplesToRecordBatchDecoder()

    result = coder.DecodeBatch(serialized_examples)
    self.assertIsInstance(result, pa.RecordBatch)
    expected = create_expected(pa.list_, pa.binary())
    self.assertTrue(
        result.equals(expected),
        "actual: {}\n expected:{}".format(result, expected))
    if serialized_schema:
      self.assertTrue(expected.schema.equals(coder.ArrowSchema())) 
开发者ID:tensorflow,项目名称:tfx-bsl,代码行数:26,代码来源:example_coder_test.py

示例12: testIsListLike

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def testIsListLike(self):
    for t in (pa.list_(pa.int64()), pa.large_list(pa.int64())):
      self.assertTrue(arrow_util.is_list_like(t))

    for t in (pa.binary(), pa.int64(), pa.large_string()):
      self.assertFalse(arrow_util.is_list_like(t)) 
开发者ID:tensorflow,项目名称:data-validation,代码行数:8,代码来源:arrow_util_test.py

示例13: testIsBinaryLike

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def testIsBinaryLike(self):
    for t in (pa.binary(), pa.large_binary(), pa.string(), pa.large_string()):
      self.assertTrue(arrow_util.is_binary_like(t))

    for t in (pa.list_(pa.binary()), pa.large_list(pa.string())):
      self.assertFalse(arrow_util.is_binary_like(t)) 
开发者ID:tensorflow,项目名称:data-validation,代码行数:8,代码来源:arrow_util_test.py

示例14: test_basic_stats_generator_invalid_value_numpy_dtype

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def test_basic_stats_generator_invalid_value_numpy_dtype(self):
    batches = [pa.RecordBatch.from_arrays(
        [pa.array([[]], type=pa.list_(pa.date32()))], ['a'])]
    generator = basic_stats_generator.BasicStatsGenerator()
    with self.assertRaisesRegex(  # pylint: disable=g-error-prone-assert-raises
        TypeError, 'Feature a has unsupported arrow type'):
      self.assertCombinerOutputEqual(batches, generator, None) 
开发者ID:tensorflow,项目名称:data-validation,代码行数:9,代码来源:basic_stats_generator_test.py

示例15: test_topk_uniques_combiner_zero_row

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def test_topk_uniques_combiner_zero_row(self):
    batches = [
        pa.RecordBatch.from_arrays([pa.array([], type=pa.list_(pa.binary()))],
                                   ['f1'])
    ]
    expected_result = {}
    generator = (
        top_k_uniques_combiner_stats_generator
        .TopKUniquesCombinerStatsGenerator(
            num_top_values=4, num_rank_histogram_buckets=3))
    self.assertCombinerOutputEqual(batches, generator, expected_result) 
开发者ID:tensorflow,项目名称:data-validation,代码行数:13,代码来源:top_k_uniques_combiner_stats_generator_test.py


注:本文中的pyarrow.list_方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。