Python pyarrow.BufferReader方法代碼示例

本文整理匯總了Python中pyarrow.BufferReader方法的典型用法代碼示例。如果您正苦於以下問題：Python pyarrow.BufferReader方法的具體用法？Python pyarrow.BufferReader怎麽用？Python pyarrow.BufferReader使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pyarrow的用法示例。

在下文中一共展示了pyarrow.BufferReader方法的5個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: _parquet_bytes_to_dict

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import BufferReader [as 別名]
def _parquet_bytes_to_dict(column: str, index_buffer: bytes):
    reader = pa.BufferReader(index_buffer)
    # This can be done much more efficient but would take a lot more
    # time to implement so this will be only done on request.
    table = pq.read_table(reader)
    if ARROW_LARGER_EQ_0150:
        column_type = table.schema.field(column).type
    else:
        column_type = table.schema.field_by_name(column).type

    # `datetime.datetime` objects have a precision of up to microseconds only, so arrow
    # parses the type to `pa.timestamp("us")`. Since the
    # values are normalized to `numpy.datetime64[ns]` anyways, we do not care about this
    # and load the column type as `pa.timestamp("ns")`
    if column_type == pa.timestamp("us"):
        column_type = pa.timestamp("ns")

    df = _fix_pyarrow_07992_table(table).to_pandas()  # Could eventually be phased out

    index_dct = dict(
        zip(df[column].values, (list(x) for x in df[_PARTITION_COLUMN_NAME].values))
    )
    return index_dct, column_type

開發者ID:JDASoftwareGroup，項目名稱:kartothek，代碼行數:25，代碼來源:index.py

示例2: write_mutable_tensor

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import BufferReader [as 別名]
def write_mutable_tensor(self, session_id, name, payload_type, body):
        import pyarrow

        from ..serialize import dataserializer
        from ..tensor.core import Indexes
        session_uid = SessionActor.gen_uid(session_id)
        session_ref = self.get_actor_ref(session_uid)

        index_json_size = np.frombuffer(body[0:8], dtype=np.int64).item()
        index_json = json.loads(body[8:8+index_json_size].decode('ascii'))
        index = Indexes.from_json(index_json).indexes
        if payload_type is None:
            value = dataserializer.loads(body[8+index_json_size:])
        elif payload_type == 'tensor':
            tensor_chunk_offset = 8 + index_json_size
            with pyarrow.BufferReader(body[tensor_chunk_offset:]) as reader:
                value = pyarrow.read_tensor(reader).to_numpy()
        elif payload_type == 'record_batch':
            schema_size = np.frombuffer(body[8+index_json_size:8+index_json_size+8], dtype=np.int64).item()
            schema_offset = 8 + index_json_size + 8
            with pyarrow.BufferReader(body[schema_offset:schema_offset+schema_size]) as reader:
                schema = pyarrow.read_schema(reader)
            record_batch_offset = schema_offset + schema_size
            with pyarrow.BufferReader(body[record_batch_offset:]) as reader:
                record_batch = pyarrow.read_record_batch(reader, schema)
                value = record_batch.to_pandas().to_records(index=False)
        else:
            raise ValueError('Not supported payload type: %s' % payload_type)
        return session_ref.write_mutable_tensor(name, index, value)

開發者ID:mars-project，項目名稱:mars，代碼行數:31，代碼來源:server.py

示例3: _deserialize

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import BufferReader [as 別名]
def _deserialize(data: bytes, msgpacked_cols: List[str]) -> pd.DataFrame:
        """
        Data are returned as feather-packed pandas DataFrames.
        Due to limitations in pyarrow, some objects are msgpacked inside the DataFrame.
        """
        import pyarrow

        df = pd.read_feather(pyarrow.BufferReader(data))
        for col in msgpacked_cols:
            df[col] = df[col].apply(lambda element: deserialize(element, "msgpack-ext"))

        if "index" in df.columns:
            df.set_index("index", inplace=True)  # pandas.to_feather does not support indexes,
            # so we have to send indexless frames over the wire, and set the index here.
        return df

開發者ID:MolSSI，項目名稱:QCPortal，代碼行數:17，代碼來源:dataset_view.py

示例4: test_parquet_roundtrip

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import BufferReader [as 別名]
def test_parquet_roundtrip(array_type):
    df = pd.DataFrame({"col": array_type(["A", "B"])})
    table = pa.Table.from_pandas(df)
    buf = pa.BufferOutputStream()
    pq.write_table(table, buf)
    reader = pa.BufferReader(buf.getvalue().to_pybytes())
    table = pq.read_table(reader)
    pdt.assert_frame_equal(df, table.to_pandas())

開發者ID:xhochy，項目名稱:fletcher，代碼行數:10，代碼來源:test_pyarrow_roundtrip.py

示例5: _bytes2schema

# 需要導入模塊: import pyarrow [as 別名]
# 或者: from pyarrow import BufferReader [as 別名]
def _bytes2schema(data):
    reader = pa.BufferReader(data)
    schema = pq.read_schema(reader)
    fields = []
    for idx in range(len(schema)):
        f = schema[idx]

        # schema data recovered from parquet always contains timestamp data in us-granularity, but pandas will use
        # ns-granularity, so we re-align the two different worlds here
        if f.type == pa.timestamp("us"):
            f = pa.field(f.name, pa.timestamp("ns"))

        fields.append(f)
    return pa.schema(fields, schema.metadata)

開發者ID:JDASoftwareGroup，項目名稱:kartothek，代碼行數:16，代碼來源:common_metadata.py

注：本文中的pyarrow.BufferReader方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。