本文整理汇总了Python中pyarrow.int8方法的典型用法代码示例。如果您正苦于以下问题:Python pyarrow.int8方法的具体用法?Python pyarrow.int8怎么用?Python pyarrow.int8使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyarrow
的用法示例。
在下文中一共展示了pyarrow.int8方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_as_spark_schema
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int8 [as 别名]
def test_as_spark_schema():
"""Try using 'as_spark_schema' function"""
TestSchema = Unischema('TestSchema', [
UnischemaField('int_field', np.int8, (), ScalarCodec(IntegerType()), False),
UnischemaField('string_field', np.string_, (), ScalarCodec(StringType()), False),
UnischemaField('string_field_implicit', np.string_, ()),
])
spark_schema = TestSchema.as_spark_schema()
assert spark_schema.fields[0].name == 'int_field'
assert spark_schema.fields[1].name == 'string_field'
assert spark_schema.fields[1].dataType == StringType()
assert spark_schema.fields[2].name == 'string_field_implicit'
assert spark_schema.fields[2].dataType == StringType()
assert TestSchema.fields['int_field'].name == 'int_field'
assert TestSchema.fields['string_field'].name == 'string_field'
示例2: test_render_has_header_false
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int8 [as 别名]
def test_render_has_header_false(self):
with tempfile_context("http") as http_path:
httpfile.write(
http_path,
{"url": "https://blah"},
"200 OK",
[("content-type", "text/csv")],
io.BytesIO(b"1,2\n3,4"),
)
with self.render(P(has_header=False), FetchResult(http_path)) as result:
assert_arrow_table_equals(
result.table,
{
"Column 1": pyarrow.array([1, 3], pyarrow.int8()),
"Column 2": pyarrow.array([2, 4], pyarrow.int8()),
},
)
self.assertEqual(result.errors, [])
示例3: test_dataframe_category_column
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int8 [as 别名]
def test_dataframe_category_column(self):
assert_arrow_table_equals(
dataframe_to_arrow_table(
pd.DataFrame({"A": ["A", "B", None, "A"]}, dtype="category"),
[Column("A", ColumnType.TEXT())],
self.path,
),
arrow_table(
{
"A": pyarrow.DictionaryArray.from_arrays(
pyarrow.array([0, 1, None, 0], type=pyarrow.int8()),
pyarrow.array(["A", "B"], type=pyarrow.string()),
)
}
),
)
示例4: test_dataframe_all_null_category_column
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int8 [as 别名]
def test_dataframe_all_null_category_column(self):
assert_arrow_table_equals(
dataframe_to_arrow_table(
pd.DataFrame({"A": [None]}, dtype=str).astype("category"),
[Column("A", ColumnType.TEXT())],
self.path,
),
arrow_table(
{
"A": pyarrow.DictionaryArray.from_arrays(
pyarrow.array([None], type=pyarrow.int8()),
pyarrow.array([], type=pyarrow.string()),
)
}
),
)
示例5: test_render_has_header_false
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int8 [as 别名]
def test_render_has_header_false(self):
with tempfile_context(prefix="http-") as http_path:
httpfile.write(
http_path,
{"url": "http://example.com/hello"},
"200 OK",
[("content-type", "text/csv")],
io.BytesIO(b"1,2\n3,4"),
)
table, errors = call_render(P(has_header=False), FetchResult(http_path))
assert_arrow_table_equals(
table,
{
"Column 1": pyarrow.array([1, 3], pyarrow.int8()),
"Column 2": pyarrow.array([2, 4], pyarrow.int8()),
},
)
self.assertEqual(errors, [])
示例6: test_index_normalize_during_init_warn_collision
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int8 [as 别名]
def test_index_normalize_during_init_warn_collision(collision, caplog):
index_dct = {1: ["a", "c"], 2.0: ["d"]}
if collision:
index_dct["1"] = ["a", "b"]
caplog.set_level(logging.DEBUG)
ExplicitSecondaryIndex(column="col", dtype=pa.int8(), index_dct=index_dct)
warn = [
t[2]
for t in caplog.record_tuples
if t[0] == "kartothek.core.index" and t[1] == logging.WARN
]
if collision:
assert any(
msg.startswith(
"Value normalization for index column col resulted in 1 collision(s)."
)
for msg in warn
)
else:
assert not any(
msg.startswith("Value normalization for index column") for msg in warn
)
示例7: _get_numeric_byte_size_test_cases
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int8 [as 别名]
def _get_numeric_byte_size_test_cases():
result = []
for array_type, sizeof in [
(pa.int8(), 1),
(pa.uint8(), 1),
(pa.int16(), 2),
(pa.uint16(), 2),
(pa.int32(), 4),
(pa.uint32(), 4),
(pa.int64(), 8),
(pa.uint64(), 8),
(pa.float32(), 4),
(pa.float64(), 8),
]:
result.append(
dict(
testcase_name=str(array_type),
array=pa.array(range(9), type=array_type),
slice_offset=2,
slice_length=3,
expected_size=(_all_false_null_bitmap_size(2) + sizeof * 9),
expected_sliced_size=(_all_false_null_bitmap_size(1) + sizeof * 3)))
return result
示例8: test_iterate_over_int8_chunk
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int8 [as 别名]
def test_iterate_over_int8_chunk():
random.seed(datetime.datetime.now())
column_meta = [
{"logicalType": "FIXED", "precision": "3", "scale": "0"},
{"logicalType": "FIXED", "precision": "3", "scale": "0"}
]
def int8_generator():
return random.randint(-128, 127)
iterate_over_test_chunk([pyarrow.int8(), pyarrow.int8()],
column_meta, int8_generator)
示例9: get_pyarrow_types
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int8 [as 别名]
def get_pyarrow_types():
return {
'bool': PA_BOOL,
'float32': PA_FLOAT32,
'float64': PA_FLOAT64,
'int8': PA_INT8,
'int16': PA_INT16,
'int32': PA_INT32,
'int64': PA_INT64,
'string': PA_STRING,
'timestamp': PA_TIMESTAMP,
'base64': PA_BINARY
}
# pylint: disable=too-many-branches,too-many-statements
示例10: test_argparse_types
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int8 [as 别名]
def test_argparse_types():
_map = {}
csv2parquet.main_with_args(capture_args(_map), ['foo.csv', '--type', '0=string', '0=int8?'])
assert _map['raw_types'] == [('0', pa.string(), False), ('0', pa.int8(), True)]
示例11: test_fields
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int8 [as 别名]
def test_fields():
"""Try using 'fields' getter"""
TestSchema = Unischema('TestSchema', [
UnischemaField('int_field', np.int8, (), ScalarCodec(IntegerType()), False),
UnischemaField('string_field', np.string_, (), ScalarCodec(StringType()), False),
])
assert len(TestSchema.fields) == 2
assert TestSchema.fields['int_field'].name == 'int_field'
assert TestSchema.fields['string_field'].name == 'string_field'
示例12: test_as_spark_schema_unspecified_codec_type_for_non_scalars_raises
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int8 [as 别名]
def test_as_spark_schema_unspecified_codec_type_for_non_scalars_raises():
"""Do not currently support choosing spark type automatically for non-scalar types."""
TestSchema = Unischema('TestSchema', [
UnischemaField('int_vector_unspecified_codec', np.int8, (1,)),
])
with pytest.raises(ValueError, match='has codec set to None'):
TestSchema.as_spark_schema()
示例13: test_create_schema_view_fails_validate
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int8 [as 别名]
def test_create_schema_view_fails_validate():
""" Exercises code paths unischema.create_schema_view ValueError, and unischema.__str__."""
TestSchema = Unischema('TestSchema', [
UnischemaField('int_field', np.int8, (), ScalarCodec(IntegerType()), False),
UnischemaField('string_field', np.string_, (), ScalarCodec(StringType()), False),
])
with pytest.raises(ValueError, match='does not belong to the schema'):
TestSchema.create_schema_view([UnischemaField('id', np.int64, (), ScalarCodec(LongType()), False)])
示例14: test_create_schema_view_using_invalid_type
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int8 [as 别名]
def test_create_schema_view_using_invalid_type():
""" Exercises code paths unischema.create_schema_view ValueError, and unischema.__str__."""
TestSchema = Unischema('TestSchema', [
UnischemaField('int_field', np.int8, (), ScalarCodec(IntegerType()), False),
UnischemaField('string_field', np.string_, (), ScalarCodec(StringType()), False),
])
with pytest.raises(ValueError, match='must be either a string'):
TestSchema.create_schema_view([42])
示例15: test_create_schema_view_using_regex
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import int8 [as 别名]
def test_create_schema_view_using_regex():
TestSchema = Unischema('TestSchema', [
UnischemaField('int_field', np.int8, (), ScalarCodec(IntegerType()), False),
UnischemaField('string_field', np.string_, (), ScalarCodec(StringType()), False),
])
view = TestSchema.create_schema_view(['int.*$'])
assert set(view.fields.keys()) == {'int_field'}
view = TestSchema.create_schema_view([u'int.*$'])
assert set(view.fields.keys()) == {'int_field'}