本文整理汇总了Python中pyarrow.DataType方法的典型用法代码示例。如果您正苦于以下问题:Python pyarrow.DataType方法的具体用法?Python pyarrow.DataType怎么用?Python pyarrow.DataType使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyarrow
的用法示例。
在下文中一共展示了pyarrow.DataType方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_nest_level
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import DataType [as 别名]
def get_nest_level(array_type: pa.DataType) -> int:
"""Returns the nest level of an array type.
The nest level of primitive types is 0.
The nest level of null is 1, because an null array is to represent
list<unknown_type>.
The nest level of list<inner_type> is get_nest_level(inner_type) + 1
Args:
array_type: pa.DataType
Returns:
the nest level.
"""
result = 0
while is_list_like(array_type):
result += 1
array_type = array_type.value_type
# null is like list<unkown_primitive>
if pa.types.is_null(array_type):
result += 1
return result
示例2: _pyarrow_type_to_column_type
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import DataType [as 别名]
def _pyarrow_type_to_column_type(
dtype: pyarrow.DataType, fallback_column_type: Optional[ColumnType]
) -> ColumnType:
if pyarrow.types.is_floating(dtype) or pyarrow.types.is_integer(dtype):
if fallback_column_type is not None and fallback_column_type.name == "number":
return ColumnTypeNumber(fallback_column_type.format)
else:
return ColumnTypeNumber()
elif pyarrow.types.is_string(dtype) or (
pyarrow.types.is_dictionary(dtype) and pyarrow.types.is_string(dtype.value_type)
):
return ColumnTypeText()
elif pyarrow.types.is_timestamp(dtype):
return ColumnTypeDatetime()
else:
return ValueError("Unknown pyarrow type %r" % dtype)
示例3: __init__
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import DataType [as 别名]
def __init__(
self,
column: str,
index_dct: Optional[IndexDictType] = None,
dtype: pa.DataType = None,
normalize_dtype: bool = True,
):
if dtype is None:
raise ValueError(
'PartitionIndex dtype of column "{}" cannot be None!'.format(column)
)
super(PartitionIndex, self).__init__(
column=column,
index_dct=index_dct,
dtype=dtype,
normalize_dtype=normalize_dtype,
)
示例4: _get_type_from_meta
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import DataType [as 别名]
def _get_type_from_meta(
table_meta: Optional[Dict[str, SchemaWrapper]],
column: str,
default: Optional[pa.DataType],
) -> pa.DataType:
# use first schema that provides type information, since write path should ensure that types are normalized and
# equal
if table_meta is not None:
for schema in table_meta.values():
if column not in schema.names:
continue
idx = schema.get_field_index(column)
return schema[idx].type
if default is not None:
return default
raise ValueError(
'Cannot find type information for partition column "{}"'.format(column)
)
示例5: _GetNestDepthAndValueType
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import DataType [as 别名]
def _GetNestDepthAndValueType(
arrow_schema: pa.Schema,
column_path: path.ColumnPath) -> Tuple[int, pa.DataType]:
"""Returns the depth of a leaf field, and its innermost value type.
The Depth is constituted by the number of nested lists in the leaf field.
Args:
arrow_schema: The arrow schema to traverse.
column_path: A path of field names. The path must describe a leaf struct.
Returns: A Tuple of depth and arrow type
"""
arrow_type = arrow_schema.field(column_path.steps()[0]).type
depth = 0
for arrow_type in _EnumerateTypesAlongPath(arrow_schema, column_path):
if _IsListLike(arrow_type):
depth += 1
return depth, arrow_type
示例6: _GetAllowedDefaultValue
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import DataType [as 别名]
def _GetAllowedDefaultValue(
value_type: pa.DataType,
default_value_proto: schema_pb2.TensorRepresentation.DefaultValue
) -> Union[int, float, bytes]:
"""Returns the default value set in DefaultValue proto or raises."""
kind = default_value_proto.WhichOneof("kind")
if kind in ("int_value", "uint_value") and pa.types.is_integer(value_type):
value = getattr(default_value_proto, kind)
iinfo = np.iinfo(value_type.to_pandas_dtype())
if value <= iinfo.max and value >= iinfo.min:
return value
else:
raise ValueError("Integer default value out of range: {} is set for a "
"{} column".format(value, value_type))
elif kind == "float_value" and pa.types.is_floating(value_type):
return default_value_proto.float_value
elif kind == "bytes_value" and _IsBinaryLike(value_type):
return default_value_proto.bytes_value
raise ValueError(
"Incompatible default value: {} is set for a {} column".format(
kind, value_type))
示例7: get_feature_type_from_arrow_type
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import DataType [as 别名]
def get_feature_type_from_arrow_type(
feature_path: types.FeaturePath,
arrow_type: pa.DataType) -> Optional[types.FeatureNameStatisticsType]:
"""Get feature type from Arrow type.
Args:
feature_path: path of the feature.
arrow_type: Arrow DataType.
Returns:
A statistics_pb2.FeatureNameStatistics.Type value or None if arrow_type
is null (which means it cannot be determined for now).
Raises:
TypeError: if the type is not supported.
"""
if pa.types.is_null(arrow_type):
return None
if not arrow_util.is_list_like(arrow_type):
raise TypeError('Expected feature column to be a '
'(Large)List<primitive|struct> or null, but feature {} '
'was {}.'.format(feature_path, arrow_type))
value_type = arrow_util.get_innermost_nested_type(arrow_type)
if pa.types.is_integer(value_type):
return statistics_pb2.FeatureNameStatistics.INT
elif pa.types.is_floating(value_type):
return statistics_pb2.FeatureNameStatistics.FLOAT
elif arrow_util.is_binary_like(value_type):
return statistics_pb2.FeatureNameStatistics.STRING
elif pa.types.is_struct(value_type):
return statistics_pb2.FeatureNameStatistics.STRUCT
elif pa.types.is_null(value_type):
return None
raise TypeError('Feature {} has unsupported arrow type: {}'.format(
feature_path, arrow_type))
示例8: is_binary_like
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import DataType [as 别名]
def is_binary_like(data_type: pa.DataType) -> bool:
"""Returns true if an Arrow type is binary-like.
Qualified types are {Large,}BinaryArray, {Large,}StringArray.
Args:
data_type: a pa.Array.
Returns:
bool.
"""
return (pa.types.is_binary(data_type) or
pa.types.is_large_binary(data_type) or
pa.types.is_unicode(data_type) or
pa.types.is_large_unicode(data_type))
示例9: is_list_like
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import DataType [as 别名]
def is_list_like(data_type: pa.DataType) -> bool:
"""Returns true if an Arrow type is list-like."""
return pa.types.is_list(data_type) or pa.types.is_large_list(data_type)
示例10: get_innermost_nested_type
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import DataType [as 别名]
def get_innermost_nested_type(arrow_type: pa.DataType) -> pa.DataType:
"""Returns the innermost type of a nested list type."""
while is_list_like(arrow_type):
arrow_type = arrow_type.value_type
return arrow_type
示例11: _create_batch
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import DataType [as 别名]
def _create_batch(series, timezone):
"""
Create an Arrow record batch from the given pandas.Series or list of Series, with optional type.
:param series: A single pandas.Series, list of Series, or list of (series, arrow_type)
:param timezone: A timezone to respect when handling timestamp values
:return: Arrow RecordBatch
"""
import decimal
from distutils.version import LooseVersion
import pyarrow as pa
from pyspark.sql.types import _check_series_convert_timestamps_internal
# Make input conform to [(series1, type1), (series2, type2), ...]
if not isinstance(series, (list, tuple)) or \
(len(series) == 2 and isinstance(series[1], pa.DataType)):
series = [series]
series = ((s, None) if not isinstance(s, (list, tuple)) else s for s in series)
def create_array(s, t):
mask = s.isnull()
# Ensure timestamp series are in expected form for Spark internal representation
# TODO: maybe don't need None check anymore as of Arrow 0.9.1
if t is not None and pa.types.is_timestamp(t):
s = _check_series_convert_timestamps_internal(s.fillna(0), timezone)
# TODO: need cast after Arrow conversion, ns values cause error with pandas 0.19.2
return pa.Array.from_pandas(s, mask=mask).cast(t, safe=False)
elif t is not None and pa.types.is_string(t) and sys.version < '3':
# TODO: need decode before converting to Arrow in Python 2
# TODO: don't need as of Arrow 0.9.1
return pa.Array.from_pandas(s.apply(
lambda v: v.decode("utf-8") if isinstance(v, str) else v), mask=mask, type=t)
elif t is not None and pa.types.is_decimal(t) and \
LooseVersion("0.9.0") <= LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
# TODO: see ARROW-2432. Remove when the minimum PyArrow version becomes 0.10.0.
return pa.Array.from_pandas(s.apply(
lambda v: decimal.Decimal('NaN') if v is None else v), mask=mask, type=t)
return pa.Array.from_pandas(s, mask=mask, type=t)
arrs = [create_array(s, t) for s, t in series]
return pa.RecordBatch.from_arrays(arrs, ["_%d" % i for i in xrange(len(arrs))])
示例12: is_arithmetic_type
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import DataType [as 别名]
def is_arithmetic_type(arrow_dtype: pa.DataType) -> bool:
"""Check whether this is a type that support arithmetics."""
return (
pa.types.is_integer(arrow_dtype)
or pa.types.is_floating(arrow_dtype)
or pa.types.is_decimal(arrow_dtype)
)
示例13: _get_example
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import DataType [as 别名]
def _get_example(arrow_dtype: pa.DataType) -> pa.Array:
if isinstance(arrow_dtype, pa.ListType):
return pa.array(
[None, _get_example(arrow_dtype.value_type).to_pylist()], type=arrow_dtype
)
return _examples[arrow_dtype]
示例14: _is_numeric
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import DataType [as 别名]
def _is_numeric(arrow_dtype: pa.DataType) -> bool:
return (
pa.types.is_integer(arrow_dtype)
or pa.types.is_floating(arrow_dtype)
or pa.types.is_decimal(arrow_dtype)
)
示例15: __init__
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import DataType [as 别名]
def __init__(self, arrow_dtype: pa.DataType):
self.arrow_dtype = arrow_dtype