当前位置: 首页>>代码示例>>Python>>正文


Python pyarrow.py_buffer方法代码示例

本文整理汇总了Python中pyarrow.py_buffer方法的典型用法代码示例。如果您正苦于以下问题:Python pyarrow.py_buffer方法的具体用法?Python pyarrow.py_buffer怎么用?Python pyarrow.py_buffer使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyarrow的用法示例。


在下文中一共展示了pyarrow.py_buffer方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: loads

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import py_buffer [as 别名]
def loads(buf):
    mv = memoryview(buf)
    header = read_file_header(mv)
    compress = header.compress

    if compress == CompressType.NONE:
        data = buf[HEADER_LENGTH:]
    else:
        data = decompressors[compress](mv[HEADER_LENGTH:])

    if header.type == SerialType.ARROW:
        try:
            return pyarrow.deserialize(memoryview(data), mars_serialize_context())
        except pyarrow.lib.ArrowInvalid:  # pragma: no cover
            # reconstruct value from buffers of arrow components
            data_view = memoryview(data)
            meta_block_size = np.frombuffer(data_view[0:4], dtype='int32').item()
            meta = pickle.loads(data_view[4:4 + meta_block_size])  # nosec
            buffer_sizes = meta.pop('buffer_sizes')
            bounds = np.cumsum([4 + meta_block_size] + buffer_sizes)
            meta['data'] = [pyarrow.py_buffer(data_view[bounds[idx]:bounds[idx + 1]])
                            for idx in range(len(buffer_sizes))]
            return pyarrow.deserialize_components(meta, mars_serialize_context())
    else:
        return pickle.loads(data) 
开发者ID:mars-project,项目名称:mars,代码行数:27,代码来源:dataserializer.py

示例2: _text_cat

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import py_buffer [as 别名]
def _text_cat(a: pa.Array, b: pa.Array) -> pa.Array:
    if len(a) != len(b):
        raise ValueError("Lengths of arrays don't match")

    offsets_a, data_a = _extract_string_buffers(a)
    offsets_b, data_b = _extract_string_buffers(b)
    if len(a) > 0:
        valid = _merge_valid_bitmaps(a, b)
        result_offsets = np.empty(len(a) + 1, dtype=np.int32)
        result_offsets[0] = 0
        total_size = (offsets_a[-1] - offsets_a[0]) + (offsets_b[-1] - offsets_b[0])
        result_data = np.empty(total_size, dtype=np.uint8)
        _merge_string_data(
            len(a),
            valid,
            offsets_a,
            data_a,
            offsets_b,
            data_b,
            result_offsets,
            result_data,
        )
        buffers = [pa.py_buffer(x) for x in [valid, result_offsets, result_data]]
        return pa.Array.from_buffers(pa.string(), len(a), buffers)
    return a 
开发者ID:xhochy,项目名称:fletcher,代码行数:27,代码来源:string.py

示例3: or_na

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import py_buffer [as 别名]
def or_na(arr: pa.Array) -> pa.Array:
    """Apply ``array | NA`` with a boolean pyarrow.Array."""
    output_length = len(arr) // 8
    if len(arr) % 8 != 0:
        output_length += 1

    if arr.null_count == 0:
        return pa.Array.from_buffers(
            pa.bool_(),
            len(arr),
            [arr.buffers()[1], arr.buffers()[1]],
            null_count=-1,
            offset=arr.offset,
        )
    else:
        output = np.zeros(output_length, dtype=np.uint8)
        null_count = _or_na(
            len(arr), arr.offset, arr.buffers()[0], arr.buffers()[1], output
        )
        buf = pa.py_buffer(output)
        return pa.Array.from_buffers(pa.bool_(), len(arr), [buf, buf], null_count) 
开发者ID:xhochy,项目名称:fletcher,代码行数:23,代码来源:bool.py

示例4: all_true_like

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import py_buffer [as 别名]
def all_true_like(arr: pa.Array) -> pa.Array:
    """Return a boolean array with all-True with the same size as the input and the same valid bitmap."""
    valid_buffer = arr.buffers()[0]
    if valid_buffer:
        valid_buffer = valid_buffer.slice(arr.offset // 8)

    output_offset = arr.offset % 8
    output_length = len(arr) + output_offset

    output_size = output_length // 8
    if output_length % 8 > 0:
        output_size += 1
    output = np.full(output_size, fill_value=255, dtype=np.uint8)

    return pa.Array.from_buffers(
        pa.bool_(),
        len(arr),
        [valid_buffer, pa.py_buffer(output)],
        arr.null_count,
        output_offset,
    ) 
开发者ID:xhochy,项目名称:fletcher,代码行数:23,代码来源:bool.py

示例5: shift_unaligned_bitmap

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import py_buffer [as 别名]
def shift_unaligned_bitmap(
    valid_buffer: pa.Buffer, offset: int, length: int
) -> pa.Buffer:
    """Shift an unaligned bitmap to be offsetted at 0."""
    output_size = length // 8
    if length % 8 > 0:
        output_size += 1
    output = np.zeros(output_size, dtype=np.uint8)

    _shift_unaligned_bitmap(valid_buffer, offset, length, output)

    return pa.py_buffer(output) 
开发者ID:xhochy,项目名称:fletcher,代码行数:14,代码来源:string.py

示例6: _text_contains_case_sensitive

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import py_buffer [as 别名]
def _text_contains_case_sensitive(data: pa.Array, pat: str) -> pa.Array:
    """
    Check for each element in the data whether it contains the pattern ``pat``.

    This implementation does basic byte-by-byte comparison and is independent
    of any locales or encodings.
    """
    # Convert to UTF-8 bytes
    pat_bytes: bytes = pat.encode()

    # Initialise boolean (bit-packaed) output array.
    output_size = len(data) // 8
    if len(data) % 8 > 0:
        output_size += 1
    output = np.empty(output_size, dtype=np.uint8)
    if len(data) % 8 > 0:
        # Zero trailing bits
        output[-1] = 0

    offsets, data_buffer = _extract_string_buffers(data)

    if data.null_count == 0:
        valid_buffer = None
        _text_contains_case_sensitive_nonnull(
            len(data), offsets, data_buffer, pat_bytes, output
        )
    else:
        valid = _buffer_to_view(data.buffers()[0])
        _text_contains_case_sensitive_nulls(
            len(data), valid, data.offset, offsets, data_buffer, pat_bytes, output
        )
        valid_buffer = data.buffers()[0].slice(data.offset // 8)
        if data.offset % 8 != 0:
            valid_buffer = shift_unaligned_bitmap(
                valid_buffer, data.offset % 8, len(data)
            )

    return pa.Array.from_buffers(
        pa.bool_(), len(data), [valid_buffer, pa.py_buffer(output)], data.null_count
    ) 
开发者ID:xhochy,项目名称:fletcher,代码行数:42,代码来源:string.py

示例7: or_array_nparray

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import py_buffer [as 别名]
def or_array_nparray(a: pa.Array, b: np.ndarray) -> pa.Array:
    """Perform ``pa.Array | np.ndarray``."""
    output_length = len(a) // 8
    if len(a) % 8 != 0:
        output_length += 1

    if a.null_count == 0:
        result = np.zeros(output_length, dtype=np.uint8)
        bitmap_or_unaligned_with_numpy_nonnull(
            len(a), a.buffers()[1], a.offset, b, result
        )
        return pa.Array.from_buffers(
            pa.bool_(), len(a), [None, pa.py_buffer(result)], 0
        )
    else:
        result = np.zeros(output_length, dtype=np.uint8)
        valid_bits = np.zeros(output_length, dtype=np.uint8)
        null_count = bitmap_or_unaligned_with_numpy(
            len(a), a.buffers()[0], a.buffers()[1], a.offset, b, result, valid_bits
        )
        return pa.Array.from_buffers(
            pa.bool_(),
            len(a),
            [pa.py_buffer(valid_bits), pa.py_buffer(result)],
            null_count,
        ) 
开发者ID:xhochy,项目名称:fletcher,代码行数:28,代码来源:bool.py

示例8: test_arrow_file_does_not_validate

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import py_buffer [as 别名]
def test_arrow_file_does_not_validate(self):
        array = pyarrow.StringArray.from_buffers(
            1,
            # value_offsets: first item spans buffer offsets 0 to 1
            pyarrow.py_buffer(struct.pack("II", 0, 1)),
            # data: a not-UTF8-safe character
            pyarrow.py_buffer(b"\xc9"),
        )
        with arrow_file({"A": array}) as path:
            with self.assertRaisesRegex(
                InvalidArrowFile, "arrow-validate: --check-utf8 failed on column A"
            ):
                validate_arrow_file(path) 
开发者ID:CJWorkbench,项目名称:cjworkbench,代码行数:15,代码来源:test_validate.py

示例9: testArrowBufferIO

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import py_buffer [as 别名]
def testArrowBufferIO(self):
        if not np:
            return
        from numpy.testing import assert_array_equal

        for compress in [dataserializer.CompressType.LZ4, dataserializer.CompressType.GZIP]:
            if compress not in dataserializer.get_supported_compressions():
                continue

            data = np.random.random((1000, 100))
            serialized = pyarrow.serialize(data).to_buffer()

            # test complete read
            reader = ArrowBufferIO(
                pyarrow.py_buffer(serialized), 'r', compress_out=compress)
            assert_array_equal(data, dataserializer.loads(reader.read()))

            # test partial read
            reader = ArrowBufferIO(
                pyarrow.py_buffer(serialized), 'r', compress_out=compress)
            block = reader.read(128)
            data_left = reader.read()
            assert_array_equal(data, dataserializer.loads(block + data_left))

            # test read by chunks
            bio = BytesIO()
            reader = ArrowBufferIO(
                pyarrow.py_buffer(serialized), 'r', compress_out=compress)
            while True:
                block = reader.read(128)
                if not block:
                    break
                bio.write(block)

            compressed = bio.getvalue()
            assert_array_equal(data, dataserializer.loads(compressed))

            # test write by chunks
            data_sink = bytearray(len(serialized))
            compressed_mv = memoryview(compressed)
            writer = ArrowBufferIO(pyarrow.py_buffer(data_sink), 'w')
            pos = 0
            while pos < len(compressed):
                endpos = min(pos + 128, len(compressed))
                writer.write(compressed_mv[pos:endpos])
                pos = endpos

            assert_array_equal(data, pyarrow.deserialize(data_sink)) 
开发者ID:mars-project,项目名称:mars,代码行数:50,代码来源:test_dataio.py

示例10: or_array_array

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import py_buffer [as 别名]
def or_array_array(a: pa.Array, b: pa.Array) -> pa.Array:
    """Perform ``pyarrow.Array | pyarrow.Array``."""
    output_length = len(a) // 8
    if len(a) % 8 != 0:
        output_length += 1

    if a.null_count == 0 and b.null_count == 0:
        result = np.zeros(output_length, dtype=np.uint8)
        bitmap_or_unaligned(
            len(a), a.buffers()[1], a.offset, b.buffers()[1], b.offset, result
        )
        return pa.Array.from_buffers(
            pa.bool_(), len(a), [None, pa.py_buffer(result)], 0
        )
    elif a.null_count == 0:
        result = np.zeros(output_length, dtype=np.uint8)
        bitmap_or_unaligned(
            len(a), a.buffers()[1], a.offset, b.buffers()[1], b.offset, result
        )
        # b has nulls, mark all occasions of b(None) & a(True) as True -> valid_bits = a.data or b.valid_bits
        valid_bits = np.zeros(output_length, dtype=np.uint8)
        bitmap_or_unaligned(
            len(a), a.buffers()[1], a.offset, b.buffers()[0], b.offset, valid_bits
        )
        return pa.Array.from_buffers(
            pa.bool_(), len(a), [pa.py_buffer(valid_bits), pa.py_buffer(result)]
        )
        pass
    elif b.null_count == 0:
        return or_array_array(b, a)
    else:
        result = np.zeros(output_length, dtype=np.uint8)
        valid_bits = np.zeros(output_length, dtype=np.uint8)
        null_count = masked_bitmap_or_unaligned(
            len(a),
            a.buffers()[0],
            a.buffers()[1],
            a.offset,
            b.buffers()[0],
            b.buffers()[1],
            b.offset,
            result,
            valid_bits,
        )
        return pa.Array.from_buffers(
            pa.bool_(),
            len(a),
            [pa.py_buffer(valid_bits), pa.py_buffer(result)],
            null_count,
        ) 
开发者ID:xhochy,项目名称:fletcher,代码行数:52,代码来源:bool.py


注:本文中的pyarrow.py_buffer方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。