當前位置: 首頁>>代碼示例>>Python>>正文


Python pyarrow.DataType方法代碼示例

本文整理匯總了Python中pyarrow.DataType方法的典型用法代碼示例。如果您正苦於以下問題:Python pyarrow.DataType方法的具體用法?Python pyarrow.DataType怎麽用?Python pyarrow.DataType使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在pyarrow的用法示例。


在下文中一共展示了pyarrow.DataType方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: get_nest_level

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import DataType [as 別名]
def get_nest_level(array_type: pa.DataType) -> int:
  """Returns the nest level of an array type.

  The nest level of primitive types is 0.
  The nest level of null is 1, because an null array is to represent
    list<unknown_type>.
  The nest level of list<inner_type> is get_nest_level(inner_type) + 1

  Args:
    array_type: pa.DataType

  Returns:
    the nest level.
  """
  result = 0
  while is_list_like(array_type):
    result += 1
    array_type = array_type.value_type

  # null is like list<unkown_primitive>
  if pa.types.is_null(array_type):
    result += 1
  return result 
開發者ID:tensorflow,項目名稱:data-validation,代碼行數:25,代碼來源:arrow_util.py

示例2: _pyarrow_type_to_column_type

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import DataType [as 別名]
def _pyarrow_type_to_column_type(
    dtype: pyarrow.DataType, fallback_column_type: Optional[ColumnType]
) -> ColumnType:
    if pyarrow.types.is_floating(dtype) or pyarrow.types.is_integer(dtype):
        if fallback_column_type is not None and fallback_column_type.name == "number":
            return ColumnTypeNumber(fallback_column_type.format)
        else:
            return ColumnTypeNumber()
    elif pyarrow.types.is_string(dtype) or (
        pyarrow.types.is_dictionary(dtype) and pyarrow.types.is_string(dtype.value_type)
    ):
        return ColumnTypeText()
    elif pyarrow.types.is_timestamp(dtype):
        return ColumnTypeDatetime()
    else:
        return ValueError("Unknown pyarrow type %r" % dtype) 
開發者ID:CJWorkbench,項目名稱:cjworkbench,代碼行數:18,代碼來源:types.py

示例3: __init__

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import DataType [as 別名]
def __init__(
        self,
        column: str,
        index_dct: Optional[IndexDictType] = None,
        dtype: pa.DataType = None,
        normalize_dtype: bool = True,
    ):
        if dtype is None:
            raise ValueError(
                'PartitionIndex dtype of column "{}" cannot be None!'.format(column)
            )
        super(PartitionIndex, self).__init__(
            column=column,
            index_dct=index_dct,
            dtype=dtype,
            normalize_dtype=normalize_dtype,
        ) 
開發者ID:JDASoftwareGroup,項目名稱:kartothek,代碼行數:19,代碼來源:index.py

示例4: _get_type_from_meta

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import DataType [as 別名]
def _get_type_from_meta(
    table_meta: Optional[Dict[str, SchemaWrapper]],
    column: str,
    default: Optional[pa.DataType],
) -> pa.DataType:
    # use first schema that provides type information, since write path should ensure that types are normalized and
    # equal
    if table_meta is not None:
        for schema in table_meta.values():
            if column not in schema.names:
                continue
            idx = schema.get_field_index(column)
            return schema[idx].type

    if default is not None:
        return default

    raise ValueError(
        'Cannot find type information for partition column "{}"'.format(column)
    ) 
開發者ID:JDASoftwareGroup,項目名稱:kartothek,代碼行數:22,代碼來源:dataset.py

示例5: _GetNestDepthAndValueType

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import DataType [as 別名]
def _GetNestDepthAndValueType(
    arrow_schema: pa.Schema,
    column_path: path.ColumnPath) -> Tuple[int, pa.DataType]:
  """Returns the depth of a leaf field, and its innermost value type.

  The Depth is constituted by the number of nested lists in the leaf field.

  Args:
    arrow_schema: The arrow schema to traverse.
    column_path: A path of field names. The path must describe a leaf struct.

  Returns: A Tuple of depth and arrow type
  """
  arrow_type = arrow_schema.field(column_path.steps()[0]).type
  depth = 0

  for arrow_type in _EnumerateTypesAlongPath(arrow_schema, column_path):
    if _IsListLike(arrow_type):
      depth += 1

  return depth, arrow_type 
開發者ID:tensorflow,項目名稱:tfx-bsl,代碼行數:23,代碼來源:tensor_adapter.py

示例6: _GetAllowedDefaultValue

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import DataType [as 別名]
def _GetAllowedDefaultValue(
    value_type: pa.DataType,
    default_value_proto: schema_pb2.TensorRepresentation.DefaultValue
) -> Union[int, float, bytes]:
  """Returns the default value set in DefaultValue proto or raises."""
  kind = default_value_proto.WhichOneof("kind")
  if kind in ("int_value", "uint_value") and pa.types.is_integer(value_type):
    value = getattr(default_value_proto, kind)
    iinfo = np.iinfo(value_type.to_pandas_dtype())
    if value <= iinfo.max and value >= iinfo.min:
      return value
    else:
      raise ValueError("Integer default value out of range: {} is set for a "
                       "{} column".format(value, value_type))
  elif kind == "float_value" and pa.types.is_floating(value_type):
    return default_value_proto.float_value
  elif kind == "bytes_value" and _IsBinaryLike(value_type):
    return default_value_proto.bytes_value

  raise ValueError(
      "Incompatible default value: {} is set for a {} column".format(
          kind, value_type)) 
開發者ID:tensorflow,項目名稱:tfx-bsl,代碼行數:24,代碼來源:tensor_adapter.py

示例7: get_feature_type_from_arrow_type

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import DataType [as 別名]
def get_feature_type_from_arrow_type(
    feature_path: types.FeaturePath,
    arrow_type: pa.DataType) -> Optional[types.FeatureNameStatisticsType]:
  """Get feature type from Arrow type.

  Args:
    feature_path: path of the feature.
    arrow_type: Arrow DataType.

  Returns:
    A statistics_pb2.FeatureNameStatistics.Type value or None if arrow_type
    is null (which means it cannot be determined for now).

  Raises:
    TypeError: if the type is not supported.
  """
  if pa.types.is_null(arrow_type):
    return None
  if not arrow_util.is_list_like(arrow_type):
    raise TypeError('Expected feature column to be a '
                    '(Large)List<primitive|struct> or null, but feature {} '
                    'was {}.'.format(feature_path, arrow_type))

  value_type = arrow_util.get_innermost_nested_type(arrow_type)
  if pa.types.is_integer(value_type):
    return statistics_pb2.FeatureNameStatistics.INT
  elif pa.types.is_floating(value_type):
    return statistics_pb2.FeatureNameStatistics.FLOAT
  elif arrow_util.is_binary_like(value_type):
    return statistics_pb2.FeatureNameStatistics.STRING
  elif pa.types.is_struct(value_type):
    return statistics_pb2.FeatureNameStatistics.STRUCT
  elif pa.types.is_null(value_type):
    return None

  raise TypeError('Feature {} has unsupported arrow type: {}'.format(
      feature_path, arrow_type)) 
開發者ID:tensorflow,項目名稱:data-validation,代碼行數:39,代碼來源:stats_util.py

示例8: is_binary_like

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import DataType [as 別名]
def is_binary_like(data_type: pa.DataType) -> bool:
  """Returns true if an Arrow type is binary-like.

  Qualified types are {Large,}BinaryArray, {Large,}StringArray.

  Args:
    data_type: a pa.Array.

  Returns:
    bool.
  """
  return (pa.types.is_binary(data_type) or
          pa.types.is_large_binary(data_type) or
          pa.types.is_unicode(data_type) or
          pa.types.is_large_unicode(data_type)) 
開發者ID:tensorflow,項目名稱:data-validation,代碼行數:17,代碼來源:arrow_util.py

示例9: is_list_like

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import DataType [as 別名]
def is_list_like(data_type: pa.DataType) -> bool:
  """Returns true if an Arrow type is list-like."""
  return pa.types.is_list(data_type) or pa.types.is_large_list(data_type) 
開發者ID:tensorflow,項目名稱:data-validation,代碼行數:5,代碼來源:arrow_util.py

示例10: get_innermost_nested_type

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import DataType [as 別名]
def get_innermost_nested_type(arrow_type: pa.DataType) -> pa.DataType:
  """Returns the innermost type of a nested list type."""
  while is_list_like(arrow_type):
    arrow_type = arrow_type.value_type
  return arrow_type 
開發者ID:tensorflow,項目名稱:data-validation,代碼行數:7,代碼來源:arrow_util.py

示例11: _create_batch

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import DataType [as 別名]
def _create_batch(series, timezone):
    """
    Create an Arrow record batch from the given pandas.Series or list of Series, with optional type.

    :param series: A single pandas.Series, list of Series, or list of (series, arrow_type)
    :param timezone: A timezone to respect when handling timestamp values
    :return: Arrow RecordBatch
    """
    import decimal
    from distutils.version import LooseVersion
    import pyarrow as pa
    from pyspark.sql.types import _check_series_convert_timestamps_internal
    # Make input conform to [(series1, type1), (series2, type2), ...]
    if not isinstance(series, (list, tuple)) or \
            (len(series) == 2 and isinstance(series[1], pa.DataType)):
        series = [series]
    series = ((s, None) if not isinstance(s, (list, tuple)) else s for s in series)

    def create_array(s, t):
        mask = s.isnull()
        # Ensure timestamp series are in expected form for Spark internal representation
        # TODO: maybe don't need None check anymore as of Arrow 0.9.1
        if t is not None and pa.types.is_timestamp(t):
            s = _check_series_convert_timestamps_internal(s.fillna(0), timezone)
            # TODO: need cast after Arrow conversion, ns values cause error with pandas 0.19.2
            return pa.Array.from_pandas(s, mask=mask).cast(t, safe=False)
        elif t is not None and pa.types.is_string(t) and sys.version < '3':
            # TODO: need decode before converting to Arrow in Python 2
            # TODO: don't need as of Arrow 0.9.1
            return pa.Array.from_pandas(s.apply(
                lambda v: v.decode("utf-8") if isinstance(v, str) else v), mask=mask, type=t)
        elif t is not None and pa.types.is_decimal(t) and \
                LooseVersion("0.9.0") <= LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
            # TODO: see ARROW-2432. Remove when the minimum PyArrow version becomes 0.10.0.
            return pa.Array.from_pandas(s.apply(
                lambda v: decimal.Decimal('NaN') if v is None else v), mask=mask, type=t)
        return pa.Array.from_pandas(s, mask=mask, type=t)

    arrs = [create_array(s, t) for s, t in series]
    return pa.RecordBatch.from_arrays(arrs, ["_%d" % i for i in xrange(len(arrs))]) 
開發者ID:runawayhorse001,項目名稱:LearningApacheSpark,代碼行數:42,代碼來源:serializers.py

示例12: is_arithmetic_type

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import DataType [as 別名]
def is_arithmetic_type(arrow_dtype: pa.DataType) -> bool:
    """Check whether this is a type that support arithmetics."""
    return (
        pa.types.is_integer(arrow_dtype)
        or pa.types.is_floating(arrow_dtype)
        or pa.types.is_decimal(arrow_dtype)
    ) 
開發者ID:xhochy,項目名稱:fletcher,代碼行數:9,代碼來源:test_pandas_extension.py

示例13: _get_example

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import DataType [as 別名]
def _get_example(arrow_dtype: pa.DataType) -> pa.Array:
    if isinstance(arrow_dtype, pa.ListType):
        return pa.array(
            [None, _get_example(arrow_dtype.value_type).to_pylist()], type=arrow_dtype
        )
    return _examples[arrow_dtype] 
開發者ID:xhochy,項目名稱:fletcher,代碼行數:8,代碼來源:base.py

示例14: _is_numeric

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import DataType [as 別名]
def _is_numeric(arrow_dtype: pa.DataType) -> bool:
    return (
        pa.types.is_integer(arrow_dtype)
        or pa.types.is_floating(arrow_dtype)
        or pa.types.is_decimal(arrow_dtype)
    ) 
開發者ID:xhochy,項目名稱:fletcher,代碼行數:8,代碼來源:base.py

示例15: __init__

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import DataType [as 別名]
def __init__(self, arrow_dtype: pa.DataType):
        self.arrow_dtype = arrow_dtype 
開發者ID:xhochy,項目名稱:fletcher,代碼行數:4,代碼來源:base.py


注:本文中的pyarrow.DataType方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。