本文整理汇总了Python中pyarrow.float64方法的典型用法代码示例。如果您正苦于以下问题:Python pyarrow.float64方法的具体用法?Python pyarrow.float64怎么用?Python pyarrow.float64使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyarrow
的用法示例。
在下文中一共展示了pyarrow.float64方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_list_columns_and_indexes_without_named_index
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import float64 [as 别名]
def test_list_columns_and_indexes_without_named_index(module_under_test):
df_data = collections.OrderedDict(
[
("a_series", [1, 2, 3, 4]),
("b_series", [0.1, 0.2, 0.3, 0.4]),
("c_series", ["a", "b", "c", "d"]),
]
)
dataframe = pandas.DataFrame(df_data)
columns_and_indexes = module_under_test.list_columns_and_indexes(dataframe)
expected = [
("a_series", pandas.api.types.pandas_dtype("int64")),
("b_series", pandas.api.types.pandas_dtype("float64")),
("c_series", pandas.api.types.pandas_dtype("object")),
]
assert columns_and_indexes == expected
示例2: test_list_columns_and_indexes_with_named_index_same_as_column_name
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import float64 [as 别名]
def test_list_columns_and_indexes_with_named_index_same_as_column_name(
module_under_test,
):
df_data = collections.OrderedDict(
[
("a_series", [1, 2, 3, 4]),
("b_series", [0.1, 0.2, 0.3, 0.4]),
("c_series", ["a", "b", "c", "d"]),
]
)
dataframe = pandas.DataFrame(
df_data,
# Use same name as an integer column but a different datatype so that
# we can verify that the column is listed but the index isn't.
index=pandas.Index([0.1, 0.2, 0.3, 0.4], name="a_series"),
)
columns_and_indexes = module_under_test.list_columns_and_indexes(dataframe)
expected = [
("a_series", pandas.api.types.pandas_dtype("int64")),
("b_series", pandas.api.types.pandas_dtype("float64")),
("c_series", pandas.api.types.pandas_dtype("object")),
]
assert columns_and_indexes == expected
示例3: test_list_columns_and_indexes_with_named_index
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import float64 [as 别名]
def test_list_columns_and_indexes_with_named_index(module_under_test):
df_data = collections.OrderedDict(
[
("a_series", [1, 2, 3, 4]),
("b_series", [0.1, 0.2, 0.3, 0.4]),
("c_series", ["a", "b", "c", "d"]),
]
)
dataframe = pandas.DataFrame(
df_data, index=pandas.Index([4, 5, 6, 7], name="a_index")
)
columns_and_indexes = module_under_test.list_columns_and_indexes(dataframe)
expected = [
("a_index", pandas.api.types.pandas_dtype("int64")),
("a_series", pandas.api.types.pandas_dtype("int64")),
("b_series", pandas.api.types.pandas_dtype("float64")),
("c_series", pandas.api.types.pandas_dtype("object")),
]
assert columns_and_indexes == expected
示例4: test_reduce_op_no_identity
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import float64 [as 别名]
def test_reduce_op_no_identity(data, skipna, op, pandas_op):
arrow = pa.array(data, type=pa.float64(), from_pandas=True)
pandas = pd.Series(data, dtype=float)
should_raise = arrow.null_count == len(arrow) and (skipna or len(arrow) == 0)
if should_raise:
with pytest.raises(ValueError):
assert_allclose_na(op(arrow, skipna), pandas_op(pandas, skipna=skipna))
else:
assert_allclose_na(op(arrow, skipna), pandas_op(pandas, skipna=skipna))
# Split in the middle and check whether this still works
if len(data) > 2:
arrow = pa.chunked_array(
[
pa.array(data[: len(data) // 2], type=pa.float64(), from_pandas=True),
pa.array(data[len(data) // 2 :], type=pa.float64(), from_pandas=True),
]
)
if should_raise:
with pytest.raises(ValueError):
assert_allclose_na(op(arrow, skipna), pandas_op(pandas, skipna=skipna))
else:
assert_allclose_na(op(arrow, skipna), pandas_op(pandas, skipna=skipna))
示例5: format
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import float64 [as 别名]
def format(self, value: Union[int, float]) -> str:
if self._need_int:
value = int(value)
else:
# Format float64 _integers_ as int. For instance, '3.0' should be
# formatted as though it were the int, '3'.
#
# Python would normally format '3.0' as '3.0' by default; that's
# not acceptable to us because we can't write a JavaScript
# formatter that would do the same thing. (Javascript doesn't
# distinguish between float and int.)
int_value = int(value)
if int_value == value:
value = int_value
return self._prefix + format(value, self._format_spec) + self._suffix
示例6: test_eval_operators_type_safety
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import float64 [as 别名]
def test_eval_operators_type_safety():
# gh66
ind = IndexBase(column="col", index_dct={1234: ["part"]}, dtype=pa.int64())
with pytest.raises(
TypeError,
match=r"Unexpected type for predicate: Column 'col' has pandas type 'int64', "
r"but predicate value '1234' has pandas type 'object' \(Python type '<class 'str'>'\).",
):
ind.eval_operator("==", "1234")
with pytest.raises(
TypeError,
match=r"Unexpected type for predicate: Column 'col' has pandas type 'int64', "
r"but predicate value 1234.0 has pandas type 'float64' \(Python type '<class 'float'>'\).",
):
ind.eval_operator("==", 1234.0)
assert ind.eval_operator("==", 1234) == {"part"}
示例7: _get_numeric_byte_size_test_cases
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import float64 [as 别名]
def _get_numeric_byte_size_test_cases():
result = []
for array_type, sizeof in [
(pa.int8(), 1),
(pa.uint8(), 1),
(pa.int16(), 2),
(pa.uint16(), 2),
(pa.int32(), 4),
(pa.uint32(), 4),
(pa.int64(), 8),
(pa.uint64(), 8),
(pa.float32(), 4),
(pa.float64(), 8),
]:
result.append(
dict(
testcase_name=str(array_type),
array=pa.array(range(9), type=array_type),
slice_offset=2,
slice_length=3,
expected_size=(_all_false_null_bitmap_size(2) + sizeof * 9),
expected_sliced_size=(_all_false_null_bitmap_size(1) + sizeof * 3)))
return result
示例8: test_iterate_over_float_chunk
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import float64 [as 别名]
def test_iterate_over_float_chunk():
random.seed(datetime.datetime.now())
column_meta = [
{"logicalType": "REAL"},
{"logicalType": "FLOAT"}
]
def float_generator():
return random.uniform(-100.0, 100.0)
iterate_over_test_chunk([pyarrow.float64(), pyarrow.float64()],
column_meta, float_generator)
示例9: get_pyarrow_types
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import float64 [as 别名]
def get_pyarrow_types():
return {
'bool': PA_BOOL,
'float32': PA_FLOAT32,
'float64': PA_FLOAT64,
'int8': PA_INT8,
'int16': PA_INT16,
'int32': PA_INT32,
'int64': PA_INT64,
'string': PA_STRING,
'timestamp': PA_TIMESTAMP,
'base64': PA_BINARY
}
# pylint: disable=too-many-branches,too-many-statements
示例10: test_dict_to_spark_row_order
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import float64 [as 别名]
def test_dict_to_spark_row_order():
TestSchema = Unischema('TestSchema', [
UnischemaField('float_col', np.float64, ()),
UnischemaField('int_col', np.int64, ()),
])
row_dict = {
TestSchema.int_col.name: 3,
TestSchema.float_col.name: 2.0,
}
spark_row = dict_to_spark_row(TestSchema, row_dict)
schema_field_names = list(TestSchema.fields)
assert spark_row[0] == row_dict[schema_field_names[0]]
assert spark_row[1] == row_dict[schema_field_names[1]]
示例11: test_arrow_schema_convertion
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import float64 [as 别名]
def test_arrow_schema_convertion():
fields = [
pa.field('string', pa.string()),
pa.field('int8', pa.int8()),
pa.field('int16', pa.int16()),
pa.field('int32', pa.int32()),
pa.field('int64', pa.int64()),
pa.field('float', pa.float32()),
pa.field('double', pa.float64()),
pa.field('bool', pa.bool_(), False),
pa.field('fixed_size_binary', pa.binary(10)),
pa.field('variable_size_binary', pa.binary()),
pa.field('decimal', pa.decimal128(3, 4)),
pa.field('timestamp_s', pa.timestamp('s')),
pa.field('timestamp_ns', pa.timestamp('ns')),
pa.field('date_32', pa.date32()),
pa.field('date_64', pa.date64())
]
arrow_schema = pa.schema(fields)
mock_dataset = _mock_parquet_dataset([], arrow_schema)
unischema = Unischema.from_arrow_schema(mock_dataset)
for name in arrow_schema.names:
assert getattr(unischema, name).name == name
assert getattr(unischema, name).codec is None
if name == 'bool':
assert not getattr(unischema, name).nullable
else:
assert getattr(unischema, name).nullable
# Test schema preserve fields order
field_name_list = [f.name for f in fields]
assert list(unischema.fields.keys()) == field_name_list
示例12: to_arrow_type
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import float64 [as 别名]
def to_arrow_type(dt):
""" Convert Spark data type to pyarrow type
"""
from distutils.version import LooseVersion
import pyarrow as pa
if type(dt) == BooleanType:
arrow_type = pa.bool_()
elif type(dt) == ByteType:
arrow_type = pa.int8()
elif type(dt) == ShortType:
arrow_type = pa.int16()
elif type(dt) == IntegerType:
arrow_type = pa.int32()
elif type(dt) == LongType:
arrow_type = pa.int64()
elif type(dt) == FloatType:
arrow_type = pa.float32()
elif type(dt) == DoubleType:
arrow_type = pa.float64()
elif type(dt) == DecimalType:
arrow_type = pa.decimal128(dt.precision, dt.scale)
elif type(dt) == StringType:
arrow_type = pa.string()
elif type(dt) == BinaryType:
# TODO: remove version check once minimum pyarrow version is 0.10.0
if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
raise TypeError("Unsupported type in conversion to Arrow: " + str(dt) +
"\nPlease install pyarrow >= 0.10.0 for BinaryType support.")
arrow_type = pa.binary()
elif type(dt) == DateType:
arrow_type = pa.date32()
elif type(dt) == TimestampType:
# Timestamps should be in UTC, JVM Arrow timestamps require a timezone to be read
arrow_type = pa.timestamp('us', tz='UTC')
elif type(dt) == ArrayType:
if type(dt.elementType) == TimestampType:
raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
arrow_type = pa.list_(to_arrow_type(dt.elementType))
else:
raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
return arrow_type
示例13: test_bq_to_arrow_data_type_w_struct
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import float64 [as 别名]
def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type):
fields = (
schema.SchemaField("field01", "STRING"),
schema.SchemaField("field02", "BYTES"),
schema.SchemaField("field03", "INTEGER"),
schema.SchemaField("field04", "INT64"),
schema.SchemaField("field05", "FLOAT"),
schema.SchemaField("field06", "FLOAT64"),
schema.SchemaField("field07", "NUMERIC"),
schema.SchemaField("field08", "BOOLEAN"),
schema.SchemaField("field09", "BOOL"),
schema.SchemaField("field10", "TIMESTAMP"),
schema.SchemaField("field11", "DATE"),
schema.SchemaField("field12", "TIME"),
schema.SchemaField("field13", "DATETIME"),
schema.SchemaField("field14", "GEOGRAPHY"),
)
field = schema.SchemaField("ignored_name", bq_type, mode="NULLABLE", fields=fields)
actual = module_under_test.bq_to_arrow_data_type(field)
expected = pyarrow.struct(
(
pyarrow.field("field01", pyarrow.string()),
pyarrow.field("field02", pyarrow.binary()),
pyarrow.field("field03", pyarrow.int64()),
pyarrow.field("field04", pyarrow.int64()),
pyarrow.field("field05", pyarrow.float64()),
pyarrow.field("field06", pyarrow.float64()),
pyarrow.field("field07", module_under_test.pyarrow_numeric()),
pyarrow.field("field08", pyarrow.bool_()),
pyarrow.field("field09", pyarrow.bool_()),
pyarrow.field("field10", module_under_test.pyarrow_timestamp()),
pyarrow.field("field11", pyarrow.date32()),
pyarrow.field("field12", module_under_test.pyarrow_time()),
pyarrow.field("field13", module_under_test.pyarrow_datetime()),
pyarrow.field("field14", pyarrow.string()),
)
)
assert pyarrow.types.is_struct(actual)
assert actual.num_children == len(fields)
assert actual.equals(expected)
示例14: test_bq_to_arrow_data_type_w_array_struct
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import float64 [as 别名]
def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type):
fields = (
schema.SchemaField("field01", "STRING"),
schema.SchemaField("field02", "BYTES"),
schema.SchemaField("field03", "INTEGER"),
schema.SchemaField("field04", "INT64"),
schema.SchemaField("field05", "FLOAT"),
schema.SchemaField("field06", "FLOAT64"),
schema.SchemaField("field07", "NUMERIC"),
schema.SchemaField("field08", "BOOLEAN"),
schema.SchemaField("field09", "BOOL"),
schema.SchemaField("field10", "TIMESTAMP"),
schema.SchemaField("field11", "DATE"),
schema.SchemaField("field12", "TIME"),
schema.SchemaField("field13", "DATETIME"),
schema.SchemaField("field14", "GEOGRAPHY"),
)
field = schema.SchemaField("ignored_name", bq_type, mode="REPEATED", fields=fields)
actual = module_under_test.bq_to_arrow_data_type(field)
expected_value_type = pyarrow.struct(
(
pyarrow.field("field01", pyarrow.string()),
pyarrow.field("field02", pyarrow.binary()),
pyarrow.field("field03", pyarrow.int64()),
pyarrow.field("field04", pyarrow.int64()),
pyarrow.field("field05", pyarrow.float64()),
pyarrow.field("field06", pyarrow.float64()),
pyarrow.field("field07", module_under_test.pyarrow_numeric()),
pyarrow.field("field08", pyarrow.bool_()),
pyarrow.field("field09", pyarrow.bool_()),
pyarrow.field("field10", module_under_test.pyarrow_timestamp()),
pyarrow.field("field11", pyarrow.date32()),
pyarrow.field("field12", module_under_test.pyarrow_time()),
pyarrow.field("field13", module_under_test.pyarrow_datetime()),
pyarrow.field("field14", pyarrow.string()),
)
)
assert pyarrow.types.is_list(actual)
assert pyarrow.types.is_struct(actual.value_type)
assert actual.value_type.num_children == len(fields)
assert actual.value_type.equals(expected_value_type)
示例15: test_to_dataframe_column_dtypes
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import float64 [as 别名]
def test_to_dataframe_column_dtypes(self):
from google.cloud.bigquery.schema import SchemaField
schema = [
SchemaField("start_timestamp", "TIMESTAMP"),
SchemaField("seconds", "INT64"),
SchemaField("miles", "FLOAT64"),
SchemaField("km", "FLOAT64"),
SchemaField("payment_type", "STRING"),
SchemaField("complete", "BOOL"),
SchemaField("date", "DATE"),
]
row_data = [
["1.4338368E9", "420", "1.1", "1.77", u"Cash", "true", "1999-12-01"],
["1.3878117E9", "2580", "17.7", "28.5", u"Cash", "false", "1953-06-14"],
["1.3855653E9", "2280", "4.4", "7.1", u"Credit", "true", "1981-11-04"],
]
rows = [{"f": [{"v": field} for field in row]} for row in row_data]
path = "/foo"
api_request = mock.Mock(return_value={"rows": rows})
row_iterator = self._make_one(_mock_client(), api_request, path, schema)
df = row_iterator.to_dataframe(
dtypes={"km": "float16"}, create_bqstorage_client=False,
)
self.assertIsInstance(df, pandas.DataFrame)
self.assertEqual(len(df), 3) # verify the number of rows
exp_columns = [field.name for field in schema]
self.assertEqual(list(df), exp_columns) # verify the column names
self.assertEqual(df.start_timestamp.dtype.name, "datetime64[ns, UTC]")
self.assertEqual(df.seconds.dtype.name, "int64")
self.assertEqual(df.miles.dtype.name, "float64")
self.assertEqual(df.km.dtype.name, "float16")
self.assertEqual(df.payment_type.dtype.name, "object")
self.assertEqual(df.complete.dtype.name, "bool")
self.assertEqual(df.date.dtype.name, "object")