本文整理汇总了Python中pyarrow.py_buffer方法的典型用法代码示例。如果您正苦于以下问题:Python pyarrow.py_buffer方法的具体用法?Python pyarrow.py_buffer怎么用?Python pyarrow.py_buffer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyarrow
的用法示例。
在下文中一共展示了pyarrow.py_buffer方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: loads
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import py_buffer [as 别名]
def loads(buf):
mv = memoryview(buf)
header = read_file_header(mv)
compress = header.compress
if compress == CompressType.NONE:
data = buf[HEADER_LENGTH:]
else:
data = decompressors[compress](mv[HEADER_LENGTH:])
if header.type == SerialType.ARROW:
try:
return pyarrow.deserialize(memoryview(data), mars_serialize_context())
except pyarrow.lib.ArrowInvalid: # pragma: no cover
# reconstruct value from buffers of arrow components
data_view = memoryview(data)
meta_block_size = np.frombuffer(data_view[0:4], dtype='int32').item()
meta = pickle.loads(data_view[4:4 + meta_block_size]) # nosec
buffer_sizes = meta.pop('buffer_sizes')
bounds = np.cumsum([4 + meta_block_size] + buffer_sizes)
meta['data'] = [pyarrow.py_buffer(data_view[bounds[idx]:bounds[idx + 1]])
for idx in range(len(buffer_sizes))]
return pyarrow.deserialize_components(meta, mars_serialize_context())
else:
return pickle.loads(data)
示例2: _text_cat
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import py_buffer [as 别名]
def _text_cat(a: pa.Array, b: pa.Array) -> pa.Array:
if len(a) != len(b):
raise ValueError("Lengths of arrays don't match")
offsets_a, data_a = _extract_string_buffers(a)
offsets_b, data_b = _extract_string_buffers(b)
if len(a) > 0:
valid = _merge_valid_bitmaps(a, b)
result_offsets = np.empty(len(a) + 1, dtype=np.int32)
result_offsets[0] = 0
total_size = (offsets_a[-1] - offsets_a[0]) + (offsets_b[-1] - offsets_b[0])
result_data = np.empty(total_size, dtype=np.uint8)
_merge_string_data(
len(a),
valid,
offsets_a,
data_a,
offsets_b,
data_b,
result_offsets,
result_data,
)
buffers = [pa.py_buffer(x) for x in [valid, result_offsets, result_data]]
return pa.Array.from_buffers(pa.string(), len(a), buffers)
return a
示例3: or_na
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import py_buffer [as 别名]
def or_na(arr: pa.Array) -> pa.Array:
"""Apply ``array | NA`` with a boolean pyarrow.Array."""
output_length = len(arr) // 8
if len(arr) % 8 != 0:
output_length += 1
if arr.null_count == 0:
return pa.Array.from_buffers(
pa.bool_(),
len(arr),
[arr.buffers()[1], arr.buffers()[1]],
null_count=-1,
offset=arr.offset,
)
else:
output = np.zeros(output_length, dtype=np.uint8)
null_count = _or_na(
len(arr), arr.offset, arr.buffers()[0], arr.buffers()[1], output
)
buf = pa.py_buffer(output)
return pa.Array.from_buffers(pa.bool_(), len(arr), [buf, buf], null_count)
示例4: all_true_like
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import py_buffer [as 别名]
def all_true_like(arr: pa.Array) -> pa.Array:
"""Return a boolean array with all-True with the same size as the input and the same valid bitmap."""
valid_buffer = arr.buffers()[0]
if valid_buffer:
valid_buffer = valid_buffer.slice(arr.offset // 8)
output_offset = arr.offset % 8
output_length = len(arr) + output_offset
output_size = output_length // 8
if output_length % 8 > 0:
output_size += 1
output = np.full(output_size, fill_value=255, dtype=np.uint8)
return pa.Array.from_buffers(
pa.bool_(),
len(arr),
[valid_buffer, pa.py_buffer(output)],
arr.null_count,
output_offset,
)
示例5: shift_unaligned_bitmap
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import py_buffer [as 别名]
def shift_unaligned_bitmap(
valid_buffer: pa.Buffer, offset: int, length: int
) -> pa.Buffer:
"""Shift an unaligned bitmap to be offsetted at 0."""
output_size = length // 8
if length % 8 > 0:
output_size += 1
output = np.zeros(output_size, dtype=np.uint8)
_shift_unaligned_bitmap(valid_buffer, offset, length, output)
return pa.py_buffer(output)
示例6: _text_contains_case_sensitive
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import py_buffer [as 别名]
def _text_contains_case_sensitive(data: pa.Array, pat: str) -> pa.Array:
"""
Check for each element in the data whether it contains the pattern ``pat``.
This implementation does basic byte-by-byte comparison and is independent
of any locales or encodings.
"""
# Convert to UTF-8 bytes
pat_bytes: bytes = pat.encode()
# Initialise boolean (bit-packaed) output array.
output_size = len(data) // 8
if len(data) % 8 > 0:
output_size += 1
output = np.empty(output_size, dtype=np.uint8)
if len(data) % 8 > 0:
# Zero trailing bits
output[-1] = 0
offsets, data_buffer = _extract_string_buffers(data)
if data.null_count == 0:
valid_buffer = None
_text_contains_case_sensitive_nonnull(
len(data), offsets, data_buffer, pat_bytes, output
)
else:
valid = _buffer_to_view(data.buffers()[0])
_text_contains_case_sensitive_nulls(
len(data), valid, data.offset, offsets, data_buffer, pat_bytes, output
)
valid_buffer = data.buffers()[0].slice(data.offset // 8)
if data.offset % 8 != 0:
valid_buffer = shift_unaligned_bitmap(
valid_buffer, data.offset % 8, len(data)
)
return pa.Array.from_buffers(
pa.bool_(), len(data), [valid_buffer, pa.py_buffer(output)], data.null_count
)
示例7: or_array_nparray
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import py_buffer [as 别名]
def or_array_nparray(a: pa.Array, b: np.ndarray) -> pa.Array:
"""Perform ``pa.Array | np.ndarray``."""
output_length = len(a) // 8
if len(a) % 8 != 0:
output_length += 1
if a.null_count == 0:
result = np.zeros(output_length, dtype=np.uint8)
bitmap_or_unaligned_with_numpy_nonnull(
len(a), a.buffers()[1], a.offset, b, result
)
return pa.Array.from_buffers(
pa.bool_(), len(a), [None, pa.py_buffer(result)], 0
)
else:
result = np.zeros(output_length, dtype=np.uint8)
valid_bits = np.zeros(output_length, dtype=np.uint8)
null_count = bitmap_or_unaligned_with_numpy(
len(a), a.buffers()[0], a.buffers()[1], a.offset, b, result, valid_bits
)
return pa.Array.from_buffers(
pa.bool_(),
len(a),
[pa.py_buffer(valid_bits), pa.py_buffer(result)],
null_count,
)
示例8: test_arrow_file_does_not_validate
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import py_buffer [as 别名]
def test_arrow_file_does_not_validate(self):
array = pyarrow.StringArray.from_buffers(
1,
# value_offsets: first item spans buffer offsets 0 to 1
pyarrow.py_buffer(struct.pack("II", 0, 1)),
# data: a not-UTF8-safe character
pyarrow.py_buffer(b"\xc9"),
)
with arrow_file({"A": array}) as path:
with self.assertRaisesRegex(
InvalidArrowFile, "arrow-validate: --check-utf8 failed on column A"
):
validate_arrow_file(path)
示例9: testArrowBufferIO
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import py_buffer [as 别名]
def testArrowBufferIO(self):
if not np:
return
from numpy.testing import assert_array_equal
for compress in [dataserializer.CompressType.LZ4, dataserializer.CompressType.GZIP]:
if compress not in dataserializer.get_supported_compressions():
continue
data = np.random.random((1000, 100))
serialized = pyarrow.serialize(data).to_buffer()
# test complete read
reader = ArrowBufferIO(
pyarrow.py_buffer(serialized), 'r', compress_out=compress)
assert_array_equal(data, dataserializer.loads(reader.read()))
# test partial read
reader = ArrowBufferIO(
pyarrow.py_buffer(serialized), 'r', compress_out=compress)
block = reader.read(128)
data_left = reader.read()
assert_array_equal(data, dataserializer.loads(block + data_left))
# test read by chunks
bio = BytesIO()
reader = ArrowBufferIO(
pyarrow.py_buffer(serialized), 'r', compress_out=compress)
while True:
block = reader.read(128)
if not block:
break
bio.write(block)
compressed = bio.getvalue()
assert_array_equal(data, dataserializer.loads(compressed))
# test write by chunks
data_sink = bytearray(len(serialized))
compressed_mv = memoryview(compressed)
writer = ArrowBufferIO(pyarrow.py_buffer(data_sink), 'w')
pos = 0
while pos < len(compressed):
endpos = min(pos + 128, len(compressed))
writer.write(compressed_mv[pos:endpos])
pos = endpos
assert_array_equal(data, pyarrow.deserialize(data_sink))
示例10: or_array_array
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import py_buffer [as 别名]
def or_array_array(a: pa.Array, b: pa.Array) -> pa.Array:
"""Perform ``pyarrow.Array | pyarrow.Array``."""
output_length = len(a) // 8
if len(a) % 8 != 0:
output_length += 1
if a.null_count == 0 and b.null_count == 0:
result = np.zeros(output_length, dtype=np.uint8)
bitmap_or_unaligned(
len(a), a.buffers()[1], a.offset, b.buffers()[1], b.offset, result
)
return pa.Array.from_buffers(
pa.bool_(), len(a), [None, pa.py_buffer(result)], 0
)
elif a.null_count == 0:
result = np.zeros(output_length, dtype=np.uint8)
bitmap_or_unaligned(
len(a), a.buffers()[1], a.offset, b.buffers()[1], b.offset, result
)
# b has nulls, mark all occasions of b(None) & a(True) as True -> valid_bits = a.data or b.valid_bits
valid_bits = np.zeros(output_length, dtype=np.uint8)
bitmap_or_unaligned(
len(a), a.buffers()[1], a.offset, b.buffers()[0], b.offset, valid_bits
)
return pa.Array.from_buffers(
pa.bool_(), len(a), [pa.py_buffer(valid_bits), pa.py_buffer(result)]
)
pass
elif b.null_count == 0:
return or_array_array(b, a)
else:
result = np.zeros(output_length, dtype=np.uint8)
valid_bits = np.zeros(output_length, dtype=np.uint8)
null_count = masked_bitmap_or_unaligned(
len(a),
a.buffers()[0],
a.buffers()[1],
a.offset,
b.buffers()[0],
b.buffers()[1],
b.offset,
result,
valid_bits,
)
return pa.Array.from_buffers(
pa.bool_(),
len(a),
[pa.py_buffer(valid_bits), pa.py_buffer(result)],
null_count,
)