本文整理汇总了Python中pyarrow.uint64方法的典型用法代码示例。如果您正苦于以下问题:Python pyarrow.uint64方法的具体用法?Python pyarrow.uint64怎么用?Python pyarrow.uint64使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyarrow
的用法示例。
在下文中一共展示了pyarrow.uint64方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _get_numeric_byte_size_test_cases
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import uint64 [as 别名]
def _get_numeric_byte_size_test_cases():
result = []
for array_type, sizeof in [
(pa.int8(), 1),
(pa.uint8(), 1),
(pa.int16(), 2),
(pa.uint16(), 2),
(pa.int32(), 4),
(pa.uint32(), 4),
(pa.int64(), 8),
(pa.uint64(), 8),
(pa.float32(), 4),
(pa.float64(), 8),
]:
result.append(
dict(
testcase_name=str(array_type),
array=pa.array(range(9), type=array_type),
slice_offset=2,
slice_length=3,
expected_size=(_all_false_null_bitmap_size(2) + sizeof * 9),
expected_sliced_size=(_all_false_null_bitmap_size(1) + sizeof * 3)))
return result
示例2: _get_numba_typ_from_pa_typ
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import uint64 [as 别名]
def _get_numba_typ_from_pa_typ(pa_typ):
import pyarrow as pa
_typ_map = {
# boolean
pa.bool_(): types.bool_,
# signed int types
pa.int8(): types.int8,
pa.int16(): types.int16,
pa.int32(): types.int32,
pa.int64(): types.int64,
# unsigned int types
pa.uint8(): types.uint8,
pa.uint16(): types.uint16,
pa.uint32(): types.uint32,
pa.uint64(): types.uint64,
# float types (TODO: float16?)
pa.float32(): types.float32,
pa.float64(): types.float64,
# String
pa.string(): string_type,
# date
pa.date32(): types.NPDatetime('ns'),
pa.date64(): types.NPDatetime('ns'),
# time (TODO: time32, time64, ...)
pa.timestamp('ns'): types.NPDatetime('ns'),
pa.timestamp('us'): types.NPDatetime('ns'),
pa.timestamp('ms'): types.NPDatetime('ns'),
pa.timestamp('s'): types.NPDatetime('ns'),
}
if pa_typ not in _typ_map:
raise ValueError("Arrow data type {} not supported yet".format(pa_typ))
return _typ_map[pa_typ]
示例3: _dtype_to_arrow_type
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import uint64 [as 别名]
def _dtype_to_arrow_type(dtype: np.dtype) -> pyarrow.DataType:
if dtype == np.int8:
return pyarrow.int8()
elif dtype == np.int16:
return pyarrow.int16()
elif dtype == np.int32:
return pyarrow.int32()
elif dtype == np.int64:
return pyarrow.int64()
elif dtype == np.uint8:
return pyarrow.uint8()
elif dtype == np.uint16:
return pyarrow.uint16()
elif dtype == np.uint32:
return pyarrow.uint32()
elif dtype == np.uint64:
return pyarrow.uint64()
elif dtype == np.float16:
return pyarrow.float16()
elif dtype == np.float32:
return pyarrow.float32()
elif dtype == np.float64:
return pyarrow.float64()
elif dtype.kind == "M":
# [2019-09-17] Pandas only allows "ns" unit -- as in, datetime64[ns]
# https://github.com/pandas-dev/pandas/issues/7307#issuecomment-224180563
assert dtype.str.endswith("[ns]")
return pyarrow.timestamp(unit="ns", tz=None)
elif dtype == np.object_:
return pyarrow.string()
else:
raise RuntimeError("Unhandled dtype %r" % dtype)
示例4: test_empty_dataframe_from_schema
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import uint64 [as 别名]
def test_empty_dataframe_from_schema(df_all_types):
schema = make_meta(df_all_types, origin="1")
actual_df = empty_dataframe_from_schema(schema)
expected_df = df_all_types.loc[[]]
expected_df["date"] = pd.Series([], dtype="datetime64[ns]")
for c in expected_df.columns:
if c.startswith("float"):
expected_df[c] = pd.Series([], dtype=float)
if c.startswith("int"):
expected_df[c] = pd.Series([], dtype=int)
if c.startswith("uint"):
expected_df[c] = pd.Series([], dtype=np.uint64)
pdt.assert_frame_equal(actual_df, expected_df)
示例5: test_empty_dataframe_from_schema_columns
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import uint64 [as 别名]
def test_empty_dataframe_from_schema_columns(df_all_types):
schema = make_meta(df_all_types, origin="1")
actual_df = empty_dataframe_from_schema(schema, ["uint64", "int64"])
expected_df = df_all_types.loc[[], ["uint64", "int64"]]
pdt.assert_frame_equal(actual_df, expected_df)
示例6: normalize_type
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import uint64 [as 别名]
def normalize_type(t_pa, t_pd, t_np, metadata):
"""
This will normalize types as followed:
- all signed integers (``int8``, ``int16``, ``int32``, ``int64``) will be converted to ``int64``
- all unsigned integers (``uint8``, ``uint16``, ``uint32``, ``uint64``) will be converted to ``uint64``
- all floats (``float32``, ``float64``) will be converted to ``float64``
- all list value types will be normalized (e.g. ``list[int16]`` to ``list[int64]``, ``list[list[uint8]]`` to
``list[list[uint64]]``)
- all dict value types will be normalized (e.g. ``dictionary<values=float32, indices=int16, ordered=0>`` to
``float64``)
Parameters
----------
t_pa: pyarrow.Type
pyarrow type object, e.g. ``pa.list_(pa.int8())``.
t_pd: string
pandas type identifier, e.g. ``"list[int8]"``.
t_np: string
numpy type identifier, e.g. ``"object"``.
metadata: Union[None, Dict[String, Any]]
metadata associated with the type, e.g. information about categorials.
Returns
-------
type_tuple: Tuple[pyarrow.Type, string, string, Union[None, Dict[String, Any]]]
tuple of ``t_pa``, ``t_pd``, ``t_np``, ``metadata`` for normalized type
"""
if pa.types.is_signed_integer(t_pa):
return pa.int64(), "int64", "int64", None
elif pa.types.is_unsigned_integer(t_pa):
return pa.uint64(), "uint64", "uint64", None
elif pa.types.is_floating(t_pa):
return pa.float64(), "float64", "float64", None
elif pa.types.is_list(t_pa):
t_pa2, t_pd2, t_np2, metadata2 = normalize_type(
t_pa.value_type, t_pd[len("list[") : -1], None, None
)
return pa.list_(t_pa2), "list[{}]".format(t_pd2), "object", None
elif pa.types.is_dictionary(t_pa):
# downcast to dictionary content, `t_pd` is useless in that case
if ARROW_LARGER_EQ_0141:
return normalize_type(t_pa.value_type, t_np, t_np, None)
else:
return normalize_type(t_pa.dictionary.type, t_np, t_np, None)
else:
return t_pa, t_pd, t_np, metadata
示例7: test_store_schema_metadata
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import uint64 [as 别名]
def test_store_schema_metadata(store, df_all_types):
store_schema_metadata(
schema=make_meta(df_all_types, origin="df_all_types"),
dataset_uuid="some_uuid",
store=store,
table="some_table",
)
key = "some_uuid/some_table/_common_metadata"
assert key in store.keys()
pq_file = pq.ParquetFile(store.open(key))
actual_schema = pq_file.schema.to_arrow_schema()
fields = [
pa.field("array_float32", pa.list_(pa.float64())),
pa.field("array_float64", pa.list_(pa.float64())),
pa.field("array_int16", pa.list_(pa.int64())),
pa.field("array_int32", pa.list_(pa.int64())),
pa.field("array_int64", pa.list_(pa.int64())),
pa.field("array_int8", pa.list_(pa.int64())),
pa.field("array_uint16", pa.list_(pa.uint64())),
pa.field("array_uint32", pa.list_(pa.uint64())),
pa.field("array_uint64", pa.list_(pa.uint64())),
pa.field("array_uint8", pa.list_(pa.uint64())),
pa.field("array_unicode", pa.list_(pa.string())),
pa.field("bool", pa.bool_()),
pa.field("byte", pa.binary()),
pa.field("date", pa.date32()),
pa.field("datetime64", pa.timestamp("us")),
pa.field("float32", pa.float64()),
pa.field("float64", pa.float64()),
pa.field("int16", pa.int64()),
pa.field("int32", pa.int64()),
pa.field("int64", pa.int64()),
pa.field("int8", pa.int64()),
pa.field("null", pa.null()),
pa.field("uint16", pa.uint64()),
pa.field("uint32", pa.uint64()),
pa.field("uint64", pa.uint64()),
pa.field("uint8", pa.uint64()),
pa.field("unicode", pa.string()),
]
expected_schema = pa.schema(fields)
assert actual_schema.remove_metadata() == expected_schema
示例8: test_eq_explicit
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import uint64 [as 别名]
def test_eq_explicit():
def assert_eq(a, b):
assert a == b
assert b == a
assert not (a != b)
assert not (b != a)
def assert_ne(a, b):
assert a != b
assert b != a
assert not (a == b)
assert not (b == a)
original_index = ExplicitSecondaryIndex(
column="col",
index_dct={1: ["part_1"]},
dtype=pa.int64(),
index_storage_key="dataset_uuid/some_index.parquet",
)
idx1 = original_index.copy()
assert_eq(idx1, original_index)
idx2 = original_index.copy()
idx2.column = "col2"
assert_ne(idx2, original_index)
idx3 = original_index.copy()
idx3.dtype = pa.uint64()
assert_ne(idx3, original_index)
idx4 = original_index.copy()
idx4.index_dct = {1: ["part_1"], 2: ["part_2"]}
assert_ne(idx4, original_index)
idx5 = original_index.copy()
idx5.index_dct = {1: ["part_1", "part_2"]}
assert_ne(idx5, original_index)
idx6 = original_index.copy()
idx6.index_dct = {1: ["part_2"]}
assert_ne(idx6, original_index)
idx7 = original_index.copy()
idx7.index_dct = {2: ["part_1"]}
assert_ne(idx7, original_index)
idx8 = original_index.copy()
idx8.dtype = None
assert_ne(idx8, original_index)
idx9a = original_index.copy()
idx9b = original_index.copy()
idx9a.dtype = None
idx9b.dtype = None
assert_eq(idx9a, idx9b)