本文整理汇总了Python中pyarrow.int16方法的典型用法代码示例。如果您正苦于以下问题:Python pyarrow.int16方法的具体用法?Python pyarrow.int16怎么用?Python pyarrow.int16使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyarrow
的用法示例。
在下文中一共展示了pyarrow.int16方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _get_numeric_byte_size_test_cases
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int16 [as 别名]
def _get_numeric_byte_size_test_cases():
result = []
for array_type, sizeof in [
(pa.int8(), 1),
(pa.uint8(), 1),
(pa.int16(), 2),
(pa.uint16(), 2),
(pa.int32(), 4),
(pa.uint32(), 4),
(pa.int64(), 8),
(pa.uint64(), 8),
(pa.float32(), 4),
(pa.float64(), 8),
]:
result.append(
dict(
testcase_name=str(array_type),
array=pa.array(range(9), type=array_type),
slice_offset=2,
slice_length=3,
expected_size=(_all_false_null_bitmap_size(2) + sizeof * 9),
expected_sliced_size=(_all_false_null_bitmap_size(1) + sizeof * 3)))
return result
示例2: test_iterate_over_int16_chunk
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int16 [as 别名]
def test_iterate_over_int16_chunk():
random.seed(datetime.datetime.now())
column_meta = [
{"logicalType": "FIXED", "precision": "5", "scale": "0"},
{"logicalType": "FIXED", "precision": "5", "scale": "0"}
]
def int16_generator():
return random.randint(-32768, 32767)
iterate_over_test_chunk([pyarrow.int16(), pyarrow.int16()],
column_meta, int16_generator)
示例3: get_pyarrow_types
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int16 [as 别名]
def get_pyarrow_types():
return {
'bool': PA_BOOL,
'float32': PA_FLOAT32,
'float64': PA_FLOAT64,
'int8': PA_INT8,
'int16': PA_INT16,
'int32': PA_INT32,
'int64': PA_INT64,
'string': PA_STRING,
'timestamp': PA_TIMESTAMP,
'base64': PA_BINARY
}
# pylint: disable=too-many-branches,too-many-statements
示例4: setUp
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int16 [as 别名]
def setUp(self):
self.sa_meta = sa.MetaData()
self.data = [
[17.124, 1.12, 3.14, 13.37],
[1, 2, 3, 4],
[1, 2, 3, 4],
[1, 2, 3, 4],
[True, None, False, True],
['string 1', 'string 2', None, 'string 3'],
[datetime(2007, 7, 13, 1, 23, 34, 123456),
None,
datetime(2006, 1, 13, 12, 34, 56, 432539),
datetime(2010, 8, 13, 5, 46, 57, 437699), ],
["Test Text", "Some#More#Test# Text", "!@#$%%^&*&", None],
]
self.table = sa.Table(
'unit_test_table',
self.sa_meta,
sa.Column('real_col', sa.REAL),
sa.Column('bigint_col', sa.BIGINT),
sa.Column('int_col', sa.INTEGER),
sa.Column('smallint_col', sa.SMALLINT),
sa.Column('bool_col', sa.BOOLEAN),
sa.Column('str_col', sa.VARCHAR),
sa.Column('timestamp_col', sa.TIMESTAMP),
sa.Column('plaintext_col', sa.TEXT),
)
self.expected_datatypes = [
pa.float32(),
pa.int64(),
pa.int32(),
pa.int16(),
pa.bool_(),
pa.string(),
pa.timestamp('ns'),
pa.string(),
]
示例5: test_arrow_schema_convertion
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int16 [as 别名]
def test_arrow_schema_convertion():
fields = [
pa.field('string', pa.string()),
pa.field('int8', pa.int8()),
pa.field('int16', pa.int16()),
pa.field('int32', pa.int32()),
pa.field('int64', pa.int64()),
pa.field('float', pa.float32()),
pa.field('double', pa.float64()),
pa.field('bool', pa.bool_(), False),
pa.field('fixed_size_binary', pa.binary(10)),
pa.field('variable_size_binary', pa.binary()),
pa.field('decimal', pa.decimal128(3, 4)),
pa.field('timestamp_s', pa.timestamp('s')),
pa.field('timestamp_ns', pa.timestamp('ns')),
pa.field('date_32', pa.date32()),
pa.field('date_64', pa.date64())
]
arrow_schema = pa.schema(fields)
mock_dataset = _mock_parquet_dataset([], arrow_schema)
unischema = Unischema.from_arrow_schema(mock_dataset)
for name in arrow_schema.names:
assert getattr(unischema, name).name == name
assert getattr(unischema, name).codec is None
if name == 'bool':
assert not getattr(unischema, name).nullable
else:
assert getattr(unischema, name).nullable
# Test schema preserve fields order
field_name_list = [f.name for f in fields]
assert list(unischema.fields.keys()) == field_name_list
示例6: to_arrow_type
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int16 [as 别名]
def to_arrow_type(dt):
""" Convert Spark data type to pyarrow type
"""
from distutils.version import LooseVersion
import pyarrow as pa
if type(dt) == BooleanType:
arrow_type = pa.bool_()
elif type(dt) == ByteType:
arrow_type = pa.int8()
elif type(dt) == ShortType:
arrow_type = pa.int16()
elif type(dt) == IntegerType:
arrow_type = pa.int32()
elif type(dt) == LongType:
arrow_type = pa.int64()
elif type(dt) == FloatType:
arrow_type = pa.float32()
elif type(dt) == DoubleType:
arrow_type = pa.float64()
elif type(dt) == DecimalType:
arrow_type = pa.decimal128(dt.precision, dt.scale)
elif type(dt) == StringType:
arrow_type = pa.string()
elif type(dt) == BinaryType:
# TODO: remove version check once minimum pyarrow version is 0.10.0
if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
raise TypeError("Unsupported type in conversion to Arrow: " + str(dt) +
"\nPlease install pyarrow >= 0.10.0 for BinaryType support.")
arrow_type = pa.binary()
elif type(dt) == DateType:
arrow_type = pa.date32()
elif type(dt) == TimestampType:
# Timestamps should be in UTC, JVM Arrow timestamps require a timezone to be read
arrow_type = pa.timestamp('us', tz='UTC')
elif type(dt) == ArrayType:
if type(dt.elementType) == TimestampType:
raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
arrow_type = pa.list_(to_arrow_type(dt.elementType))
else:
raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
return arrow_type
示例7: test_load_table_creates
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int16 [as 别名]
def test_load_table_creates(self, con):
data = pd.DataFrame(
{
"boolean_": [True, False],
"smallint_cast": np.array([0, 1], dtype=np.int8),
"smallint_": np.array([0, 1], dtype=np.int16),
"int_": np.array([0, 1], dtype=np.int32),
"bigint_": np.array([0, 1], dtype=np.int64),
"float_": np.array([0, 1], dtype=np.float32),
"double_": np.array([0, 1], dtype=np.float64),
"varchar_": ["a", "b"],
"text_": ['a', 'b'],
"time_": [datetime.time(0, 11, 59), datetime.time(13)],
"timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017")],
"date_": [
datetime.date(2016, 1, 1),
datetime.date(2017, 1, 1),
],
},
columns=[
'boolean_',
'smallint_',
'int_',
'bigint_',
'float_',
'double_',
'varchar_',
'text_',
'time_',
'timestamp_',
'date_',
],
)
con.execute("drop table if exists test_load_table_creates;")
con.load_table("test_load_table_creates", data, create=True)
con.execute("drop table if exists test_load_table_creates;")
示例8: _get_numba_typ_from_pa_typ
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int16 [as 别名]
def _get_numba_typ_from_pa_typ(pa_typ):
import pyarrow as pa
_typ_map = {
# boolean
pa.bool_(): types.bool_,
# signed int types
pa.int8(): types.int8,
pa.int16(): types.int16,
pa.int32(): types.int32,
pa.int64(): types.int64,
# unsigned int types
pa.uint8(): types.uint8,
pa.uint16(): types.uint16,
pa.uint32(): types.uint32,
pa.uint64(): types.uint64,
# float types (TODO: float16?)
pa.float32(): types.float32,
pa.float64(): types.float64,
# String
pa.string(): string_type,
# date
pa.date32(): types.NPDatetime('ns'),
pa.date64(): types.NPDatetime('ns'),
# time (TODO: time32, time64, ...)
pa.timestamp('ns'): types.NPDatetime('ns'),
pa.timestamp('us'): types.NPDatetime('ns'),
pa.timestamp('ms'): types.NPDatetime('ns'),
pa.timestamp('s'): types.NPDatetime('ns'),
}
if pa_typ not in _typ_map:
raise ValueError("Arrow data type {} not supported yet".format(pa_typ))
return _typ_map[pa_typ]
示例9: _dtype_to_arrow_type
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int16 [as 别名]
def _dtype_to_arrow_type(dtype: np.dtype) -> pyarrow.DataType:
if dtype == np.int8:
return pyarrow.int8()
elif dtype == np.int16:
return pyarrow.int16()
elif dtype == np.int32:
return pyarrow.int32()
elif dtype == np.int64:
return pyarrow.int64()
elif dtype == np.uint8:
return pyarrow.uint8()
elif dtype == np.uint16:
return pyarrow.uint16()
elif dtype == np.uint32:
return pyarrow.uint32()
elif dtype == np.uint64:
return pyarrow.uint64()
elif dtype == np.float16:
return pyarrow.float16()
elif dtype == np.float32:
return pyarrow.float32()
elif dtype == np.float64:
return pyarrow.float64()
elif dtype.kind == "M":
# [2019-09-17] Pandas only allows "ns" unit -- as in, datetime64[ns]
# https://github.com/pandas-dev/pandas/issues/7307#issuecomment-224180563
assert dtype.str.endswith("[ns]")
return pyarrow.timestamp(unit="ns", tz=None)
elif dtype == np.object_:
return pyarrow.string()
else:
raise RuntimeError("Unhandled dtype %r" % dtype)