Python pyarrow.Array方法代码示例

本文整理汇总了Python中pyarrow.Array方法的典型用法代码示例。如果您正苦于以下问题：Python pyarrow.Array方法的具体用法？Python pyarrow.Array怎么用？Python pyarrow.Array使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyarrow的用法示例。

在下文中一共展示了pyarrow.Array方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: update

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Array [as 别名]
def update(self, feature_array: pa.Array, presence_mask: np.ndarray,
             num_values: np.ndarray, num_values_not_none: np.ndarray,
             weights: Optional[np.ndarray]) -> None:
    """Updates the stats with a feature array."""
    self.num_non_missing += len(feature_array) - feature_array.null_count

    self.max_num_values = np.maximum.reduce(
        num_values_not_none, initial=self.max_num_values)
    self.min_num_values = np.minimum.reduce(num_values_not_none,
                                            initial=self.min_num_values)
    self.total_num_values += np.sum(num_values_not_none)

    if weights is not None:
      if weights.size != num_values.size:
        raise ValueError('Weight feature must not be missing.')
      self.weighted_total_num_values += np.sum(num_values * weights)
      self.weighted_num_non_missing += np.sum(weights[presence_mask])

开发者ID:tensorflow，项目名称:data-validation，代码行数:19，代码来源:basic_stats_generator.py

示例2: init

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Array [as 别名]
def __init__(self, array, dtype=None, copy: Optional[bool] = None):
        # Copy is not used at the moment. It's only affect will be when we
        # allow array to be a FletcherContinuousArray
        if is_array_like(array) or isinstance(array, list):
            self.data = pa.array(array, type=dtype)
        elif isinstance(array, pa.Array):
            # TODO: Assert dtype
            self.data = array
        elif isinstance(array, pa.ChunkedArray):
            # TODO: Assert dtype
            if array.num_chunks == 1:
                self.data = array.chunk(0)
            else:
                self.data = pa.concat_arrays(array.iterchunks())
        else:
            raise ValueError(
                "Unsupported type passed for {}: {}".format(
                    self.__class__.__name__, type(array)
                )
            )
        self._dtype = FletcherContinuousDtype(self.data.type)

开发者ID:xhochy，项目名称:fletcher，代码行数:23，代码来源:base.py

示例3: np_ufunc_array_array

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Array [as 别名]
def np_ufunc_array_array(a: pa.Array, b: pa.Array, op: Callable):
    np_arr_a = _extract_data_buffer_as_np_array(a)
    np_arr_b = _extract_data_buffer_as_np_array(b)
    if a.null_count > 0 and b.null_count > 0:
        # TODO: Combine them before extracting
        mask_a = extract_isnull_bytemap(a)
        mask_b = extract_isnull_bytemap(b)
        mask = mask_a | mask_b
    elif a.null_count > 0:
        mask = extract_isnull_bytemap(a)
    elif b.null_count > 0:
        mask = extract_isnull_bytemap(b)
    else:
        mask = None

    new_arr = op(np_arr_a, np_arr_b)
    # Don't set type as we might have valid casts like int->float in truediv
    return pa.array(new_arr, mask=mask)

开发者ID:xhochy，项目名称:fletcher，代码行数:20，代码来源:_algorithms.py

示例4: _text_cat

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Array [as 别名]
def _text_cat(a: pa.Array, b: pa.Array) -> pa.Array:
    if len(a) != len(b):
        raise ValueError("Lengths of arrays don't match")

    offsets_a, data_a = _extract_string_buffers(a)
    offsets_b, data_b = _extract_string_buffers(b)
    if len(a) > 0:
        valid = _merge_valid_bitmaps(a, b)
        result_offsets = np.empty(len(a) + 1, dtype=np.int32)
        result_offsets[0] = 0
        total_size = (offsets_a[-1] - offsets_a[0]) + (offsets_b[-1] - offsets_b[0])
        result_data = np.empty(total_size, dtype=np.uint8)
        _merge_string_data(
            len(a),
            valid,
            offsets_a,
            data_a,
            offsets_b,
            data_b,
            result_offsets,
            result_data,
        )
        buffers = [pa.py_buffer(x) for x in [valid, result_offsets, result_data]]
        return pa.Array.from_buffers(pa.string(), len(a), buffers)
    return a

开发者ID:xhochy，项目名称:fletcher，代码行数:27，代码来源:string.py

示例5: or_na

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Array [as 别名]
def or_na(arr: pa.Array) -> pa.Array:
    """Apply ``array | NA`` with a boolean pyarrow.Array."""
    output_length = len(arr) // 8
    if len(arr) % 8 != 0:
        output_length += 1

    if arr.null_count == 0:
        return pa.Array.from_buffers(
            pa.bool_(),
            len(arr),
            [arr.buffers()[1], arr.buffers()[1]],
            null_count=-1,
            offset=arr.offset,
        )
    else:
        output = np.zeros(output_length, dtype=np.uint8)
        null_count = _or_na(
            len(arr), arr.offset, arr.buffers()[0], arr.buffers()[1], output
        )
        buf = pa.py_buffer(output)
        return pa.Array.from_buffers(pa.bool_(), len(arr), [buf, buf], null_count)

开发者ID:xhochy，项目名称:fletcher，代码行数:23，代码来源:bool.py

示例6: _2

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Array [as 别名]
def _2(a: pa.Array, b: Any, ops: Dict[str, Callable]):
    """Apply a NumPy ufunc where at least one of the arguments is an Arrow structure."""
    if isinstance(b, pa.ChunkedArray):
        if len(a) != len(b):
            raise ValueError("Inputs don't have the same length.")
        new_chunks = []
        offsets = _calculate_chunk_offsets(b)
        for chunk, offset in zip(b.iterchunks(), offsets):
            new_chunks.append(
                dispatch_chunked_binary_map(a[offset : offset + len(chunk)], chunk, ops)
            )
        return pa.chunked_array(new_chunks)
    elif isinstance(b, pa.Array):
        if len(a) != len(b):
            raise ValueError("Inputs don't have the same length.")
        return ops.get("array_array", _not_implemented_path)(a, b)
    else:
        if np.isscalar(b):
            return ops.get("array_scalar", _not_implemented_path)(a, b)
        else:
            if len(a) != len(b):
                raise ValueError("Inputs don't have the same length.")
            return ops.get("array_nparray", _not_implemented_path)(a, b)

开发者ID:xhochy，项目名称:fletcher，代码行数:25，代码来源:chunking.py

示例7: _ListArrayToTensor

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Array [as 别名]
def _ListArrayToTensor(
      self, list_array: pa.Array,
      produce_eager_tensors: bool) -> Union[np.ndarray, tf.Tensor]:
    """Converts a ListArray to a dense tensor."""
    values = list_array.flatten()
    batch_size = len(list_array)
    expected_num_elements = batch_size * self._unbatched_flat_len
    if len(values) != expected_num_elements:
      raise ValueError(
          "Unable to convert ListArray {} to {}: size mismatch. expected {} "
          "elements but got {}".format(
              list_array, self.type_spec, expected_num_elements, len(values)))
    actual_shape = list(self._shape)
    actual_shape[0] = batch_size
    if self._convert_to_binary_fn is not None:
      values = self._convert_to_binary_fn(values)
    values_np = np.asarray(values).reshape(actual_shape)
    if produce_eager_tensors:
      return tf.convert_to_tensor(values_np)

    return values_np

开发者ID:tensorflow，项目名称:tfx-bsl，代码行数:23，代码来源:tensor_adapter.py

示例8: from_array

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Array [as 别名]
def from_array(cls, arr):
        assert isinstance(arr, pa.Array)
        return cls(pa.chunked_array([arr]))

开发者ID:Frank-qlu，项目名称:recruit，代码行数:5，代码来源:bool.py

示例9: is_binary_like

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Array [as 别名]
def is_binary_like(data_type: pa.DataType) -> bool:
  """Returns true if an Arrow type is binary-like.

  Qualified types are {Large,}BinaryArray, {Large,}StringArray.

  Args:
    data_type: a pa.Array.

  Returns:
    bool.
  """
  return (pa.types.is_binary(data_type) or
          pa.types.is_large_binary(data_type) or
          pa.types.is_unicode(data_type) or
          pa.types.is_large_unicode(data_type))

开发者ID:tensorflow，项目名称:data-validation，代码行数:17，代码来源:arrow_util.py

示例10: flatten_nested

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Array [as 别名]
def flatten_nested(
    array: pa.Array, return_parent_indices: bool = False
    ) -> Tuple[pa.Array, Optional[np.ndarray]]:
  """Flattens all the list arrays nesting an array.

  If `array` is not list-like, itself will be returned.

  Args:
    array: pa.Array to flatten.
    return_parent_indices: If True, also returns the parent indices array.

  Returns:
    A tuple. The first term is the flattened array. The second term is None
    if `return_parent_indices` is False; otherwise it's a parent indices array
    parallel to the flattened array: if parent_indices[i] = j, then
    flattened_array[i] belongs to the j-th element of the input array.
  """
  parent_indices = None

  while is_list_like(array.type):
    if return_parent_indices:
      cur_parent_indices = array_util.GetFlattenedArrayParentIndices(
          array).to_numpy()
      if parent_indices is None:
        parent_indices = cur_parent_indices
      else:
        parent_indices = parent_indices[cur_parent_indices]
    array = array.flatten()

  # the array is not nested at the first place.
  if return_parent_indices and parent_indices is None:
    parent_indices = np.arange(len(array))
  return array, parent_indices

开发者ID:tensorflow，项目名称:data-validation，代码行数:35，代码来源:arrow_util.py

示例11: add_input

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Array [as 别名]
def add_input(self, accumulator: _PartialNLStats,
                feature_path: types.FeaturePath,
                feature_array: pa.Array) -> _PartialNLStats:
    """Return result of folding a batch of inputs into accumulator.

    Args:
      accumulator: The current accumulator.
      feature_path: The path of the feature.
      feature_array: An arrow Array representing a batch of feature values
        which should be added to the accumulator.

    Returns:
      The accumulator after updating the statistics for the batch of inputs.
    """
    if accumulator.invalidate:
      return accumulator
    feature_type = stats_util.get_feature_type_from_arrow_type(
        feature_path, feature_array.type)
    # Ignore null array.
    if feature_type is None:
      return accumulator
    # If we see a different type, invalidate.
    if feature_type != statistics_pb2.FeatureNameStatistics.STRING:
      accumulator.invalidate = True
      return accumulator

    def _is_non_utf8(value):
      return (isinstance(value, bytes) and
              stats_util.maybe_get_utf8(value) is None)

    is_non_utf_vec = np.vectorize(_is_non_utf8, otypes=[np.bool])
    classify_vec = np.vectorize(self._classifier.classify, otypes=[np.bool])
    values = np.asarray(arrow_util.flatten_nested(feature_array)[0]
                        .slice(0, _CROP_AT_VALUES))
    if np.any(is_non_utf_vec(values)):
      accumulator.invalidate = True
      return accumulator
    accumulator.considered += values.size
    accumulator.matched += np.sum(classify_vec(values))
    return accumulator

开发者ID:tensorflow，项目名称:data-validation，代码行数:42，代码来源:natural_language_stats_generator.py

示例12: add_input

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Array [as 别名]
def add_input(self, accumulator: _PartialTimeStats,
                feature_path: types.FeaturePath,
                feature_array: pa.Array) -> _PartialTimeStats:
    """Returns result of folding a batch of inputs into the current accumulator.

    Args:
      accumulator: The current accumulator.
      feature_path: The path of the feature.
      feature_array: An arrow Array representing a batch of feature values
        which should be added to the accumulator.

    Returns:
      The accumulator after updating the statistics for the batch of inputs.
    """
    if accumulator.invalidated:
      return accumulator
    feature_type = stats_util.get_feature_type_from_arrow_type(
        feature_path, feature_array.type)
    # Ignore null array.
    if feature_type is None:
      return accumulator
    if feature_type == statistics_pb2.FeatureNameStatistics.STRING:

      def _maybe_get_utf8(val):
        return stats_util.maybe_get_utf8(val) if isinstance(val, bytes) else val

      values = np.asarray(arrow_util.flatten_nested(feature_array)[0])
      maybe_utf8 = np.vectorize(_maybe_get_utf8, otypes=[np.object])(values)
      if not maybe_utf8.all():
        accumulator.invalidated = True
        return accumulator
      accumulator.update(maybe_utf8, feature_type)
    elif feature_type == statistics_pb2.FeatureNameStatistics.INT:
      values = np.asarray(arrow_util.flatten_nested(feature_array)[0])
      accumulator.update(values, feature_type)
    else:
      accumulator.invalidated = True

    return accumulator

开发者ID:tensorflow，项目名称:data-validation，代码行数:41，代码来源:time_stats_generator.py

示例13: assert_content_equals_array

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Array [as 别名]
def assert_content_equals_array(result, expected):
    """Assert that the result is an Arrow structure and the content matches an array."""
    assert isinstance(result, (pa.Array, pa.ChunkedArray))
    if isinstance(result, pa.ChunkedArray):
        result = pa.concat_arrays(result.iterchunks())
    assert result.equals(expected)

开发者ID:xhochy，项目名称:fletcher，代码行数:8，代码来源:test_algorithms.py

示例14: _get_example

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Array [as 别名]
def _get_example(arrow_dtype: pa.DataType) -> pa.Array:
    if isinstance(arrow_dtype, pa.ListType):
        return pa.array(
            [None, _get_example(arrow_dtype.value_type).to_pylist()], type=arrow_dtype
        )
    return _examples[arrow_dtype]

开发者ID:xhochy，项目名称:fletcher，代码行数:8，代码来源:base.py

示例15: __arrow_array__

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Array [as 别名]
def __arrow_array__(self, type=None):
        """Convert myself to a pyarrow Array or ChunkedArray."""
        return self.data

开发者ID:xhochy，项目名称:fletcher，代码行数:5，代码来源:base.py

注：本文中的pyarrow.Array方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。