本文整理汇总了Python中pyarrow.int64方法的典型用法代码示例。如果您正苦于以下问题:Python pyarrow.int64方法的具体用法?Python pyarrow.int64怎么用?Python pyarrow.int64使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyarrow
的用法示例。
在下文中一共展示了pyarrow.int64方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_list_columns_and_indexes_without_named_index
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int64 [as 别名]
def test_list_columns_and_indexes_without_named_index(module_under_test):
df_data = collections.OrderedDict(
[
("a_series", [1, 2, 3, 4]),
("b_series", [0.1, 0.2, 0.3, 0.4]),
("c_series", ["a", "b", "c", "d"]),
]
)
dataframe = pandas.DataFrame(df_data)
columns_and_indexes = module_under_test.list_columns_and_indexes(dataframe)
expected = [
("a_series", pandas.api.types.pandas_dtype("int64")),
("b_series", pandas.api.types.pandas_dtype("float64")),
("c_series", pandas.api.types.pandas_dtype("object")),
]
assert columns_and_indexes == expected
示例2: test_list_columns_and_indexes_with_named_index_same_as_column_name
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int64 [as 别名]
def test_list_columns_and_indexes_with_named_index_same_as_column_name(
module_under_test,
):
df_data = collections.OrderedDict(
[
("a_series", [1, 2, 3, 4]),
("b_series", [0.1, 0.2, 0.3, 0.4]),
("c_series", ["a", "b", "c", "d"]),
]
)
dataframe = pandas.DataFrame(
df_data,
# Use same name as an integer column but a different datatype so that
# we can verify that the column is listed but the index isn't.
index=pandas.Index([0.1, 0.2, 0.3, 0.4], name="a_series"),
)
columns_and_indexes = module_under_test.list_columns_and_indexes(dataframe)
expected = [
("a_series", pandas.api.types.pandas_dtype("int64")),
("b_series", pandas.api.types.pandas_dtype("float64")),
("c_series", pandas.api.types.pandas_dtype("object")),
]
assert columns_and_indexes == expected
示例3: test_list_columns_and_indexes_with_named_index
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int64 [as 别名]
def test_list_columns_and_indexes_with_named_index(module_under_test):
df_data = collections.OrderedDict(
[
("a_series", [1, 2, 3, 4]),
("b_series", [0.1, 0.2, 0.3, 0.4]),
("c_series", ["a", "b", "c", "d"]),
]
)
dataframe = pandas.DataFrame(
df_data, index=pandas.Index([4, 5, 6, 7], name="a_index")
)
columns_and_indexes = module_under_test.list_columns_and_indexes(dataframe)
expected = [
("a_index", pandas.api.types.pandas_dtype("int64")),
("a_series", pandas.api.types.pandas_dtype("int64")),
("b_series", pandas.api.types.pandas_dtype("float64")),
("c_series", pandas.api.types.pandas_dtype("object")),
]
assert columns_and_indexes == expected
示例4: test_to_dataframe
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int64 [as 别名]
def test_to_dataframe(self):
from google.cloud.bigquery.schema import SchemaField
schema = [
SchemaField("name", "STRING", mode="REQUIRED"),
SchemaField("age", "INTEGER", mode="REQUIRED"),
]
rows = [
{"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
{"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
{"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]},
{"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]},
]
path = "/foo"
api_request = mock.Mock(return_value={"rows": rows})
row_iterator = self._make_one(_mock_client(), api_request, path, schema)
df = row_iterator.to_dataframe(create_bqstorage_client=False)
self.assertIsInstance(df, pandas.DataFrame)
self.assertEqual(len(df), 4) # verify the number of rows
self.assertEqual(list(df), ["name", "age"]) # verify the column names
self.assertEqual(df.name.dtype.name, "object")
self.assertEqual(df.age.dtype.name, "int64")
示例5: get_pa_translated_schema
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int64 [as 别名]
def get_pa_translated_schema(self):
"""Translates a BigQuery schema to an parquet schema.
Returns: Translated parquet schema in pyarrow.Schema format.
"""
type_conversions = {
'STRING': pa.string(),
'NUMERIC': pa.int64(),
}
# TODO(annarudy@google.com): add support for nested fields
pa_schema_list = [
pa.field(
bq_field.name,
type_conversions[bq_field.field_type],
) for bq_field in self.bq_schema
]
return pa.schema(pa_schema_list)
示例6: test_index_store_roundtrip_explicit_key
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int64 [as 别名]
def test_index_store_roundtrip_explicit_key(store):
storage_key = "dataset_uuid/some_index.parquet"
index1 = ExplicitSecondaryIndex(
column="col",
index_dct={1: ["part_1", "part_2"], 3: ["part_3"]},
index_storage_key=storage_key,
dtype=pa.int64(),
)
key1 = index1.store(store, "dataset_uuid")
index2 = ExplicitSecondaryIndex(column="col", index_storage_key=key1).load(store)
assert index1 == index2
key2 = index2.store(store, "dataset_uuid")
index3 = ExplicitSecondaryIndex(column="col", index_storage_key=key2).load(store)
assert index1 == index3
assert index2 == index3
示例7: test_index_as_flat_series
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int64 [as 别名]
def test_index_as_flat_series():
index1 = ExplicitSecondaryIndex(
column="col",
index_dct={1: ["part_1", "part_2"], 2: ["part_1"]},
dtype=pa.int64(),
)
ser = index1.as_flat_series()
expected = pd.Series(
["part_1", "part_2", "part_1"],
index=pd.Index([1, 1, 2], name="col"),
name="partition",
)
assert_series_equal(ser, expected)
ser_comp = index1.as_flat_series(compact=True)
expected = pd.Series(
[["part_1", "part_2"], ["part_1"]],
index=pd.Index([1, 2], name="col"),
name="partition",
)
assert_series_equal(ser_comp, expected)
示例8: test_index_as_flat_series_partitions_as_index
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int64 [as 别名]
def test_index_as_flat_series_partitions_as_index():
index1 = ExplicitSecondaryIndex(
column="col",
index_dct={1: ["part_1", "part_2"], 2: ["part_1"]},
dtype=pa.int64(),
)
ser = index1.as_flat_series(partitions_as_index=True)
expected = pd.Series(
[1, 1, 2],
index=pd.Index(["part_1", "part_2", "part_1"], name="partition"),
name="col",
)
assert_series_equal(ser, expected)
ser_comp = index1.as_flat_series(compact=True, partitions_as_index=True)
expected = pd.Series(
[[1, 2], [1]],
index=pd.Index(["part_1", "part_2"], name="partition"),
name="col",
)
assert_series_equal(ser_comp, expected)
示例9: test_index_as_flat_series_highly_degenerated_sym
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int64 [as 别名]
def test_index_as_flat_series_highly_degenerated_sym():
dim = 4
index1 = ExplicitSecondaryIndex(
column="col",
index_dct={
k: ["part_{}".format(i) for i in range(0, dim)] for k in range(0, dim)
},
dtype=pa.int64(),
)
ser = index1.as_flat_series()
expected = pd.Series(
["part_{}".format(i) for i in range(0, dim)] * dim,
index=pd.Index(
np.array([[i] * dim for i in range(0, dim)]).ravel(), name="col"
),
name="partition",
)
assert_series_equal(ser, expected)
示例10: test_eval_operators_type_safety
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int64 [as 别名]
def test_eval_operators_type_safety():
# gh66
ind = IndexBase(column="col", index_dct={1234: ["part"]}, dtype=pa.int64())
with pytest.raises(
TypeError,
match=r"Unexpected type for predicate: Column 'col' has pandas type 'int64', "
r"but predicate value '1234' has pandas type 'object' \(Python type '<class 'str'>'\).",
):
ind.eval_operator("==", "1234")
with pytest.raises(
TypeError,
match=r"Unexpected type for predicate: Column 'col' has pandas type 'int64', "
r"but predicate value 1234.0 has pandas type 'float64' \(Python type '<class 'float'>'\).",
):
ind.eval_operator("==", 1234.0)
assert ind.eval_operator("==", 1234) == {"part"}
示例11: test_get_flattened_array_parent_indices
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int64 [as 别名]
def test_get_flattened_array_parent_indices(self, list_type_factory,
parent_indices_type):
indices = array_util.GetFlattenedArrayParentIndices(
pa.array([], type=list_type_factory(pa.int32())))
self.assertTrue(indices.equals(pa.array([], type=parent_indices_type)))
indices = array_util.GetFlattenedArrayParentIndices(
pa.array([[1.], [2.], [], [3., 4.]],
type=list_type_factory(pa.float32())))
self.assertTrue(
indices.equals(pa.array([0, 1, 3, 3], type=parent_indices_type)))
indices = array_util.GetFlattenedArrayParentIndices(
pa.array([[1.], [2.], [], [3., 4.]],
type=list_type_factory(pa.float32())).slice(1))
self.assertTrue(
indices.equals(pa.array([0, 2, 2], type=parent_indices_type)))
indices = array_util.GetFlattenedArrayParentIndices(
pa.array([list(range(1024))],
type=list_type_factory(pa.int64())))
self.assertTrue(
indices.equals(pa.array([0] * 1024, type=parent_indices_type)))
示例12: testCooFromListArray
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int64 [as 别名]
def testCooFromListArray(
self, list_array, expected_coo, expected_dense_shape, array_types):
for array_type in array_types:
for input_array in [
pa.array(list_array, type=array_type),
# it should work for sliced arrays.
pa.array(list_array + list_array,
type=array_type).slice(0, len(list_array)),
pa.array(list_array + list_array,
type=array_type).slice(len(list_array)),
]:
coo, dense_shape = array_util.CooFromListArray(input_array)
self.assertTrue(coo.type.equals(pa.int64()))
self.assertTrue(dense_shape.type.equals(pa.int64()))
self.assertEqual(expected_coo, coo.to_pylist())
self.assertEqual(expected_dense_shape, dense_shape.to_pylist())
示例13: test_simple
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int64 [as 别名]
def test_simple(self, factory):
# 3 int64 values
# 5 int32 offsets
# 1 null bitmap byte for outer ListArray
# 1 null bitmap byte for inner Int64Array
# 46 bytes in total.
list_array = pa.array([[1, 2], [None], None, None],
type=pa.list_(pa.int64()))
# 1 null bitmap byte for outer StructArray.
# 1 null bitmap byte for inner Int64Array.
# 4 int64 values.
# 34 bytes in total
struct_array = pa.array([{"a": 1}, {"a": 2}, {"a": None}, None],
type=pa.struct([pa.field("a", pa.int64())]))
entity = factory([list_array, struct_array], ["a1", "a2"])
self.assertEqual(46 + 34, table_util.TotalByteSize(entity))
示例14: _GetExpectedColumnValues
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int64 [as 别名]
def _GetExpectedColumnValues(tfxio):
if tfxio._can_produce_large_types:
list_factory = pa.large_list
bytes_type = pa.large_binary()
else:
list_factory = pa.list_
bytes_type = pa.binary()
return {
path.ColumnPath(["int_feature"]):
pa.array([[1], [2], [3]], type=list_factory(pa.int64())),
path.ColumnPath(["float_feature"]):
pa.array([[1, 2, 3, 4], [2, 3, 4, 5], None],
type=list_factory(pa.float32())),
path.ColumnPath([_SEQUENCE_COLUMN_NAME, "int_feature"]):
pa.array([[[1, 2], [3]], None, [[4]]],
list_factory(list_factory(pa.int64()))),
path.ColumnPath([_SEQUENCE_COLUMN_NAME, "string_feature"]):
pa.array([None, [[b"foo", b"bar"], []], [[b"baz"]]],
list_factory(list_factory(bytes_type)))
}
示例15: testRaggedTensorStructTypeInvalidSteps
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int64 [as 别名]
def testRaggedTensorStructTypeInvalidSteps(self):
tensor_representation = text_format.Parse(
"""
ragged_tensor {
feature_path {
step: "ragged_feature"
step: "wrong_step"
}
}
""", schema_pb2.TensorRepresentation())
record_batch = pa.RecordBatch.from_arrays([
pa.StructArray.from_arrays([
pa.array([[1, 2, 3]], pa.list_(pa.int64())),
pa.array([["a", "b", "c"]], pa.list_(pa.binary()))
], ["inner_feature", "x2"])
], ["ragged_feature"])
with self.assertRaisesRegex(ValueError,
".*Unable to handle tensor output.*"):
tensor_adapter.TensorAdapter(
tensor_adapter.TensorAdapterConfig(record_batch.schema,
{"output": tensor_representation}))