当前位置: 首页>>代码示例>>Python>>正文


Python pyarrow.ChunkedArray方法代码示例

本文整理汇总了Python中pyarrow.ChunkedArray方法的典型用法代码示例。如果您正苦于以下问题:Python pyarrow.ChunkedArray方法的具体用法?Python pyarrow.ChunkedArray怎么用?Python pyarrow.ChunkedArray使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyarrow的用法示例。


在下文中一共展示了pyarrow.ChunkedArray方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import ChunkedArray [as 别名]
def __init__(self, array, dtype=None, copy=None):
        # Copy is not used at the moment. It's only affect will be when we
        # allow array to be a FletcherChunkedArray
        if is_array_like(array) or isinstance(array, list):
            self.data = pa.chunked_array([pa.array(array, type=dtype)])
        elif isinstance(array, pa.Array):
            # ARROW-7008: pyarrow.chunked_array([array]) fails on array with all-None buffers
            if len(array) == 0 and all(b is None for b in array.buffers()):
                array = pa.array([], type=array.type)
            # TODO: Assert dtype
            self.data = pa.chunked_array([array])
        elif isinstance(array, pa.ChunkedArray):
            # TODO: Assert dtype
            self.data = array
        else:
            raise ValueError(
                "Unsupported type passed for {}: {}".format(
                    self.__class__.__name__, type(array)
                )
            )
        self._dtype = FletcherChunkedDtype(self.data.type)
        self.offsets = self._calculate_chunk_offsets() 
开发者ID:xhochy,项目名称:fletcher,代码行数:24,代码来源:base.py

示例2: _call_x_with

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import ChunkedArray [as 别名]
def _call_x_with(self, impl, needle, na=None):
        needle = NumbaString.make(needle)  # type: ignore
        result = np.zeros(len(self.data), dtype=np.uint8)

        if isinstance(self.data, pa.ChunkedArray):
            offset = 0
            for chunk in self.data.chunks:
                str_arr = NumbaStringArray.make(chunk)  # type: ignore
                impl(str_arr, needle, 2, offset, result)
                offset += len(chunk)
        else:
            str_arr = NumbaStringArray.make(self.data)  # type: ignore
            impl(str_arr, needle, 2, 0, result)

        return pd.Series(
            type(self.obj.values)(pa.array(result.astype(bool), mask=(result == 2)))
        ) 
开发者ID:xhochy,项目名称:fletcher,代码行数:19,代码来源:string_array.py

示例3: _2

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import ChunkedArray [as 别名]
def _2(a: pa.Array, b: Any, ops: Dict[str, Callable]):
    """Apply a NumPy ufunc where at least one of the arguments is an Arrow structure."""
    if isinstance(b, pa.ChunkedArray):
        if len(a) != len(b):
            raise ValueError("Inputs don't have the same length.")
        new_chunks = []
        offsets = _calculate_chunk_offsets(b)
        for chunk, offset in zip(b.iterchunks(), offsets):
            new_chunks.append(
                dispatch_chunked_binary_map(a[offset : offset + len(chunk)], chunk, ops)
            )
        return pa.chunked_array(new_chunks)
    elif isinstance(b, pa.Array):
        if len(a) != len(b):
            raise ValueError("Inputs don't have the same length.")
        return ops.get("array_array", _not_implemented_path)(a, b)
    else:
        if np.isscalar(b):
            return ops.get("array_scalar", _not_implemented_path)(a, b)
        else:
            if len(a) != len(b):
                raise ValueError("Inputs don't have the same length.")
            return ops.get("array_nparray", _not_implemented_path)(a, b) 
开发者ID:xhochy,项目名称:fletcher,代码行数:25,代码来源:chunking.py

示例4: __init__

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import ChunkedArray [as 别名]
def __init__(self, values):
        if not isinstance(values, pa.ChunkedArray):
            raise ValueError

        assert values.type == pa.bool_()
        self._data = values
        self._dtype = ArrowBoolDtype() 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:9,代码来源:bool.py

示例5: check_valid_in_offsets

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import ChunkedArray [as 别名]
def check_valid_in_offsets(
    arr: pa.ChunkedArray, in_offsets: List[Tuple[int, int, int]]
) -> None:
    if arr.num_chunks == 0:
        assert in_offsets == []
        return

    # We always start at the beginning
    assert in_offsets[0][0] == 0
    assert in_offsets[0][1] == 0

    # Overall, the chunk offsets must have the same length as the array
    assert sum(x[2] for x in in_offsets) == len(arr) 
开发者ID:xhochy,项目名称:fletcher,代码行数:15,代码来源:test_algorithms.py

示例6: assert_content_equals_array

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import ChunkedArray [as 别名]
def assert_content_equals_array(result, expected):
    """Assert that the result is an Arrow structure and the content matches an array."""
    assert isinstance(result, (pa.Array, pa.ChunkedArray))
    if isinstance(result, pa.ChunkedArray):
        result = pa.concat_arrays(result.iterchunks())
    assert result.equals(expected) 
开发者ID:xhochy,项目名称:fletcher,代码行数:8,代码来源:test_algorithms.py

示例7: __arrow_array__

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import ChunkedArray [as 别名]
def __arrow_array__(self, type=None):
        """Convert myself to a pyarrow Array or ChunkedArray."""
        return self.data 
开发者ID:xhochy,项目名称:fletcher,代码行数:5,代码来源:base.py

示例8: base

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import ChunkedArray [as 别名]
def base(self) -> Union[pa.Array, pa.ChunkedArray]:
        """Return base object of the underlying data."""
        return self.data 
开发者ID:xhochy,项目名称:fletcher,代码行数:5,代码来源:base.py

示例9: unique

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import ChunkedArray [as 别名]
def unique(self):
        """
        Compute the ExtensionArray of unique values.

        It relies on the Pyarrow.ChunkedArray.unique and if
        it fails, comes back to the naive implementation.

        Returns
        -------
        uniques : ExtensionArray
        """
        try:
            return type(self)(self.data.unique())
        except NotImplementedError:
            return super().unique() 
开发者ID:xhochy,项目名称:fletcher,代码行数:17,代码来源:base.py

示例10: pandas_from_arrow

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import ChunkedArray [as 别名]
def pandas_from_arrow(
    arrow_object: Union[pa.RecordBatch, pa.Table, pa.Array, pa.ChunkedArray],
    continuous: bool = False,
):
    """
    Convert Arrow object instance to their Pandas equivalent by using Fletcher.

    The conversion rules are:
      * {RecordBatch, Table} -> DataFrame
      * {Array, ChunkedArray} -> Series

    Parameters
    ----------
    arrow_object : RecordBatch, Table, Array or ChunkedArray
        object to be converted
    continuous : bool
        Use FletcherContinuousArray instead of FletcherChunkedArray
    """
    if continuous:
        array_type = FletcherContinuousArray
    else:
        array_type = FletcherChunkedArray
    if isinstance(arrow_object, pa.RecordBatch):
        data: OrderedDict = OrderedDict()
        for ix, arr in enumerate(arrow_object):
            col_name = arrow_object.schema.names[ix]
            data[col_name] = array_type(arr)
        return pd.DataFrame(data)
    elif isinstance(arrow_object, pa.Table):
        data = OrderedDict()
        for name, col in zip(arrow_object.column_names, arrow_object.itercolumns()):
            data[name] = array_type(col)
        return pd.DataFrame(data)
    elif isinstance(arrow_object, (pa.ChunkedArray, pa.Array)):
        return pd.Series(array_type(arrow_object))
    else:
        raise NotImplementedError(
            "Objects of type {} are not supported".format(type(arrow_object))
        ) 
开发者ID:xhochy,项目名称:fletcher,代码行数:41,代码来源:base.py

示例11: _series_like

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import ChunkedArray [as 别名]
def _series_like(self, array: Union[pa.Array, pa.ChunkedArray]) -> pd.Series:
        """Return an Arrow result as a series with the same base classes as the input."""
        return pd.Series(
            type(self.obj.values)(array),
            dtype=type(self.obj.dtype)(array.type),
            index=self.obj.index,
        ) 
开发者ID:xhochy,项目名称:fletcher,代码行数:9,代码来源:string_array.py

示例12: extract_isnull_bytemap

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import ChunkedArray [as 别名]
def extract_isnull_bytemap(array: Union[pa.ChunkedArray, pa.Array]) -> np.ndarray:
    """
    Extract the valid bitmaps of a (chunked) array into numpy isnull bytemaps.

    Parameters
    ----------
    array
        Array from which we extract the validity bits as bytes

    Returns
    -------
    valid_bytemap
    """
    if array.null_count == len(array):
        return np.ones(len(array), dtype=bool)

    if isinstance(array, pa.ChunkedArray):
        result = np.zeros(len(array), dtype=bool)
        if array.null_count == 0:
            return result

        offset = 0
        for chunk in array.chunks:
            if chunk.null_count > 0:
                _extract_isnull_bytemap(
                    chunk.buffers()[0], len(chunk), chunk.offset, offset, result
                )
            offset += len(chunk)
    else:
        valid_bitmap = array.buffers()[0]
        if valid_bitmap:
            # TODO: Can we use np.empty here to improve performance?
            result = np.zeros(len(array), dtype=bool)
            # TODO(ARROW-2664): We only need to following line to support
            #   executing the code in disabled-JIT mode.
            buf = memoryview(valid_bitmap)
            _extract_isnull_bytemap(buf, len(array), array.offset, 0, result)
        else:
            result = np.full(len(array), False)

    return result 
开发者ID:xhochy,项目名称:fletcher,代码行数:43,代码来源:_algorithms.py

示例13: pd_nanop

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import ChunkedArray [as 别名]
def pd_nanop(nanop: Callable, arr: Union[pa.ChunkedArray, pa.Array], skipna: bool):
    """Use pandas.core.nanops to provide a reduction."""
    if isinstance(arr, pa.ChunkedArray):
        data = pa.concat_arrays(arr.iterchunks())
    else:
        data = arr
    np_arr = _extract_data_buffer_as_np_array(data)
    mask = extract_isnull_bytemap(data)

    return nanop(np_arr, skipna=skipna, mask=mask) 
开发者ID:xhochy,项目名称:fletcher,代码行数:12,代码来源:_algorithms.py

示例14: _text_cat_chunked

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import ChunkedArray [as 别名]
def _text_cat_chunked(a: Any, b: pa.ChunkedArray) -> pa.ChunkedArray:
    raise NotImplementedError(
        "_text_cat_chunked is only implemented for pa.Array and pa.ChunkedArray"
    ) 
开发者ID:xhochy,项目名称:fletcher,代码行数:6,代码来源:string.py

示例15: _text_cat_chunked_1

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import ChunkedArray [as 别名]
def _text_cat_chunked_1(a: pa.ChunkedArray, b: pa.ChunkedArray) -> pa.ChunkedArray:
    in_a_offsets, in_b_offsets = _combined_in_chunk_offsets(a, b)

    new_chunks: List[pa.Array] = []
    for a_offset, b_offset in zip(in_a_offsets, in_b_offsets):
        a_slice = a.chunk(a_offset[0])[a_offset[1] : a_offset[1] + a_offset[2]]
        b_slice = b.chunk(b_offset[0])[b_offset[1] : b_offset[1] + b_offset[2]]
        new_chunks.append(_text_cat(a_slice, b_slice))
    return pa.chunked_array(new_chunks) 
开发者ID:xhochy,项目名称:fletcher,代码行数:11,代码来源:string.py


注:本文中的pyarrow.ChunkedArray方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。