本文整理汇总了Python中pyarrow.list_方法的典型用法代码示例。如果您正苦于以下问题:Python pyarrow.list_方法的具体用法?Python pyarrow.list_怎么用?Python pyarrow.list_使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyarrow
的用法示例。
在下文中一共展示了pyarrow.list_方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_basic_stats_generator_empty_batch
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def test_basic_stats_generator_empty_batch(self):
batches = [
pa.RecordBatch.from_arrays([pa.array([], type=pa.list_(pa.binary()))],
['a'])
]
expected_result = {
types.FeaturePath(['a']): text_format.Parse(
"""
path {
step: 'a'
}
type: STRING
string_stats {
common_stats {
num_non_missing: 0
tot_num_values: 0
}
}
""", statistics_pb2.FeatureNameStatistics())}
generator = basic_stats_generator.BasicStatsGenerator()
self.assertCombinerOutputEqual(batches, generator, expected_result)
示例2: bq_to_arrow_data_type
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def bq_to_arrow_data_type(field):
"""Return the Arrow data type, corresponding to a given BigQuery column.
Returns:
None: if default Arrow type inspection should be used.
"""
if field.mode is not None and field.mode.upper() == "REPEATED":
inner_type = bq_to_arrow_data_type(
schema.SchemaField(field.name, field.field_type, fields=field.fields)
)
if inner_type:
return pyarrow.list_(inner_type)
return None
field_type_upper = field.field_type.upper() if field.field_type else ""
if field_type_upper in schema._STRUCT_TYPES:
return bq_to_arrow_struct_data_type(field)
data_type_constructor = BQ_TO_ARROW_SCALARS.get(field_type_upper)
if data_type_constructor is None:
return None
return data_type_constructor()
示例3: test_simple
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def test_simple(self, factory):
# 3 int64 values
# 5 int32 offsets
# 1 null bitmap byte for outer ListArray
# 1 null bitmap byte for inner Int64Array
# 46 bytes in total.
list_array = pa.array([[1, 2], [None], None, None],
type=pa.list_(pa.int64()))
# 1 null bitmap byte for outer StructArray.
# 1 null bitmap byte for inner Int64Array.
# 4 int64 values.
# 34 bytes in total
struct_array = pa.array([{"a": 1}, {"a": 2}, {"a": None}, None],
type=pa.struct([pa.field("a", pa.int64())]))
entity = factory([list_array, struct_array], ["a1", "a2"])
self.assertEqual(46 + 34, table_util.TotalByteSize(entity))
示例4: _GetExpectedColumnValues
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def _GetExpectedColumnValues(tfxio):
if tfxio._can_produce_large_types:
list_factory = pa.large_list
bytes_type = pa.large_binary()
else:
list_factory = pa.list_
bytes_type = pa.binary()
return {
path.ColumnPath(["int_feature"]):
pa.array([[1], [2], [3]], type=list_factory(pa.int64())),
path.ColumnPath(["float_feature"]):
pa.array([[1, 2, 3, 4], [2, 3, 4, 5], None],
type=list_factory(pa.float32())),
path.ColumnPath([_SEQUENCE_COLUMN_NAME, "int_feature"]):
pa.array([[[1, 2], [3]], None, [[4]]],
list_factory(list_factory(pa.int64()))),
path.ColumnPath([_SEQUENCE_COLUMN_NAME, "string_feature"]):
pa.array([None, [[b"foo", b"bar"], []], [[b"baz"]]],
list_factory(list_factory(bytes_type)))
}
示例5: testRaggedTensorStructTypeInvalidSteps
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def testRaggedTensorStructTypeInvalidSteps(self):
tensor_representation = text_format.Parse(
"""
ragged_tensor {
feature_path {
step: "ragged_feature"
step: "wrong_step"
}
}
""", schema_pb2.TensorRepresentation())
record_batch = pa.RecordBatch.from_arrays([
pa.StructArray.from_arrays([
pa.array([[1, 2, 3]], pa.list_(pa.int64())),
pa.array([["a", "b", "c"]], pa.list_(pa.binary()))
], ["inner_feature", "x2"])
], ["ragged_feature"])
with self.assertRaisesRegex(ValueError,
".*Unable to handle tensor output.*"):
tensor_adapter.TensorAdapter(
tensor_adapter.TensorAdapterConfig(record_batch.schema,
{"output": tensor_representation}))
示例6: testRaggedTensorStructTypeTooManySteps
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def testRaggedTensorStructTypeTooManySteps(self):
tensor_representation = text_format.Parse(
"""
ragged_tensor {
feature_path {
step: "ragged_feature"
step: "inner_feature"
step: "non_existant_feature"
}
}
""", schema_pb2.TensorRepresentation())
record_batch = pa.RecordBatch.from_arrays([
pa.StructArray.from_arrays([
pa.array([[1, 2, 3]], pa.list_(pa.int64())),
pa.array([["a", "b", "c"]], pa.list_(pa.binary()))
], ["inner_feature", "x2"])
], ["ragged_feature"])
with self.assertRaisesRegex(ValueError,
".*Unable to handle tensor output.*"):
tensor_adapter.TensorAdapter(
tensor_adapter.TensorAdapterConfig(record_batch.schema,
{"output": tensor_representation}))
示例7: testRaggedTensorStructTypeNonLeaf
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def testRaggedTensorStructTypeNonLeaf(self):
tensor_representation = text_format.Parse(
"""
ragged_tensor {
feature_path {
step: "ragged_feature"
}
}
""", schema_pb2.TensorRepresentation())
record_batch = pa.RecordBatch.from_arrays([
pa.StructArray.from_arrays([
pa.array([[1, 2, 3]], pa.list_(pa.int64())),
pa.array([["a", "b", "c"]], pa.list_(pa.binary()))
], ["inner_feature", "x2"])
], ["ragged_feature"])
with self.assertRaisesRegex(ValueError,
".*Unable to handle tensor output.*"):
tensor_adapter.TensorAdapter(
tensor_adapter.TensorAdapterConfig(record_batch.schema,
{"output": tensor_representation}))
示例8: testRaiseOnNoMatchingHandler
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def testRaiseOnNoMatchingHandler(self):
with self.assertRaisesRegexp(ValueError, "Unable to handle tensor"):
tensor_adapter.TensorAdapter(
tensor_adapter.TensorAdapterConfig(
# nested lists are not supported now.
pa.schema([pa.field("unsupported_column",
pa.list_(pa.list_(pa.int64())))]),
{
"tensor":
text_format.Parse(
"""
dense_tensor {
column_name: "unsupported_column"
shape: {}
}
""", schema_pb2.TensorRepresentation())
}))
示例9: _GetExpectedArrowSchema
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def _GetExpectedArrowSchema(tfxio, raw_record_column_name=None):
if tfxio._can_produce_large_types:
int_type = pa.large_list(pa.int64())
float_type = pa.large_list(pa.float32())
bytes_type = pa.large_list(pa.large_binary())
else:
int_type = pa.list_(pa.int64())
float_type = pa.list_(pa.float32())
bytes_type = pa.list_(pa.binary())
fields = [
pa.field("int_feature", int_type),
pa.field("float_feature", float_type),
pa.field("string_feature", bytes_type)
]
if raw_record_column_name is not None:
fields.append(pa.field(raw_record_column_name, bytes_type))
return pa.schema(fields)
示例10: GetExpectedColumnValues
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def GetExpectedColumnValues(tfxio):
if tfxio._can_produce_large_types:
int_type = pa.large_list(pa.int64())
float_type = pa.large_list(pa.float32())
bytes_type = pa.large_list(pa.large_binary())
else:
int_type = pa.list_(pa.int64())
float_type = pa.list_(pa.float32())
bytes_type = pa.list_(pa.binary())
return {
"int_feature":
pa.array([[1], [2], [3]], type=int_type),
"float_feature":
pa.array([[1, 2, 3, 4], [2, 3, 4, 5], [4, 5, 6, 7]], type=float_type),
"string_feature":
pa.array([None, ["foo", "bar"], None], type=bytes_type),
}
示例11: test_decode
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def test_decode(self, schema_text_proto, examples_text_proto,
create_expected):
serialized_examples = [
text_format.Parse(pbtxt, tf.train.Example()).SerializeToString()
for pbtxt in examples_text_proto
]
serialized_schema = None
if schema_text_proto is not None:
serialized_schema = text_format.Parse(
schema_text_proto, schema_pb2.Schema()).SerializeToString()
if serialized_schema:
coder = example_coder.ExamplesToRecordBatchDecoder(serialized_schema)
else:
coder = example_coder.ExamplesToRecordBatchDecoder()
result = coder.DecodeBatch(serialized_examples)
self.assertIsInstance(result, pa.RecordBatch)
expected = create_expected(pa.list_, pa.binary())
self.assertTrue(
result.equals(expected),
"actual: {}\n expected:{}".format(result, expected))
if serialized_schema:
self.assertTrue(expected.schema.equals(coder.ArrowSchema()))
示例12: testIsListLike
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def testIsListLike(self):
for t in (pa.list_(pa.int64()), pa.large_list(pa.int64())):
self.assertTrue(arrow_util.is_list_like(t))
for t in (pa.binary(), pa.int64(), pa.large_string()):
self.assertFalse(arrow_util.is_list_like(t))
示例13: testIsBinaryLike
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def testIsBinaryLike(self):
for t in (pa.binary(), pa.large_binary(), pa.string(), pa.large_string()):
self.assertTrue(arrow_util.is_binary_like(t))
for t in (pa.list_(pa.binary()), pa.large_list(pa.string())):
self.assertFalse(arrow_util.is_binary_like(t))
示例14: test_basic_stats_generator_invalid_value_numpy_dtype
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def test_basic_stats_generator_invalid_value_numpy_dtype(self):
batches = [pa.RecordBatch.from_arrays(
[pa.array([[]], type=pa.list_(pa.date32()))], ['a'])]
generator = basic_stats_generator.BasicStatsGenerator()
with self.assertRaisesRegex( # pylint: disable=g-error-prone-assert-raises
TypeError, 'Feature a has unsupported arrow type'):
self.assertCombinerOutputEqual(batches, generator, None)
示例15: test_topk_uniques_combiner_zero_row
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import list_ [as 别名]
def test_topk_uniques_combiner_zero_row(self):
batches = [
pa.RecordBatch.from_arrays([pa.array([], type=pa.list_(pa.binary()))],
['f1'])
]
expected_result = {}
generator = (
top_k_uniques_combiner_stats_generator
.TopKUniquesCombinerStatsGenerator(
num_top_values=4, num_rank_histogram_buckets=3))
self.assertCombinerOutputEqual(batches, generator, expected_result)