当前位置: 首页>>代码示例>>Python>>正文


Python pyarrow.array函数代码示例

本文整理汇总了Python中pyarrow.array函数的典型用法代码示例。如果您正苦于以下问题:Python array函数的具体用法?Python array怎么用?Python array使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了array函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_array_slice

def test_array_slice():
    arr = pa.array(range(10))

    sliced = arr.slice(2)
    expected = pa.array(range(2, 10))
    assert sliced.equals(expected)

    sliced2 = arr.slice(2, 4)
    expected2 = pa.array(range(2, 6))
    assert sliced2.equals(expected2)

    # 0 offset
    assert arr.slice(0).equals(arr)

    # Slice past end of array
    assert len(arr.slice(len(arr))) == 0

    with pytest.raises(IndexError):
        arr.slice(-1)

    # Test slice notation
    assert arr[2:].equals(arr.slice(2))
    assert arr[2:5].equals(arr.slice(2, 3))
    assert arr[-5:].equals(arr.slice(len(arr) - 5))
    with pytest.raises(IndexError):
        arr[::-1]
    with pytest.raises(IndexError):
        arr[::2]

    n = len(arr)
    for start in range(-n * 2, n * 2):
        for stop in range(-n * 2, n * 2):
            assert arr[start:stop].to_pylist() == arr.to_pylist()[start:stop]
开发者ID:CodingCat,项目名称:arrow,代码行数:33,代码来源:test_array.py

示例2: test_list_from_arrays

def test_list_from_arrays():
    offsets_arr = np.array([0, 2, 5, 8], dtype='i4')
    offsets = pa.array(offsets_arr, type='int32')
    pyvalues = [b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h']
    values = pa.array(pyvalues, type='binary')

    result = pa.ListArray.from_arrays(offsets, values)
    expected = pa.array([pyvalues[:2], pyvalues[2:5], pyvalues[5:8]])

    assert result.equals(expected)

    # With nulls
    offsets = [0, None, 2, 6]

    values = ['a', 'b', 'c', 'd', 'e', 'f']

    result = pa.ListArray.from_arrays(offsets, values)
    expected = pa.array([values[:2], None, values[2:]])

    assert result.equals(expected)

    # Another edge case
    offsets2 = [0, 2, None, 6]
    result = pa.ListArray.from_arrays(offsets2, values)
    expected = pa.array([values[:2], values[2:], None])
    assert result.equals(expected)
开发者ID:CodingCat,项目名称:arrow,代码行数:26,代码来源:test_array.py

示例3: test_asarray

def test_asarray():
    arr = pa.array(range(4))

    # The iterator interface gives back an array of Int64Value's
    np_arr = np.asarray([_ for _ in arr])
    assert np_arr.tolist() == [0, 1, 2, 3]
    assert np_arr.dtype == np.dtype('O')
    assert type(np_arr[0]) == pa.lib.Int64Value

    # Calling with the arrow array gives back an array with 'int64' dtype
    np_arr = np.asarray(arr)
    assert np_arr.tolist() == [0, 1, 2, 3]
    assert np_arr.dtype == np.dtype('int64')

    # An optional type can be specified when calling np.asarray
    np_arr = np.asarray(arr, dtype='str')
    assert np_arr.tolist() == ['0', '1', '2', '3']

    # If PyArrow array has null values, numpy type will be changed as needed
    # to support nulls.
    arr = pa.array([0, 1, 2, None])
    assert arr.type == pa.int64()
    np_arr = np.asarray(arr)
    elements = np_arr.tolist()
    assert elements[:3] == [0., 1., 2.]
    assert np.isnan(elements[3])
    assert np_arr.dtype == np.dtype('float64')
开发者ID:emkornfield,项目名称:arrow,代码行数:27,代码来源:test_array.py

示例4: dataframe_to_types

def dataframe_to_types(df, preserve_index, columns=None):
    (all_names,
     column_names,
     index_descriptors,
     index_columns,
     columns_to_convert,
     _) = _get_columns_to_convert(df, None, preserve_index, columns)

    types = []
    # If pandas knows type, skip conversion
    for c in columns_to_convert:
        values = c.values
        if _pandas_api.is_categorical(values):
            type_ = pa.array(c, from_pandas=True).type
        else:
            values, type_ = get_datetimetz_type(values, c.dtype, None)
            type_ = pa.lib._ndarray_to_arrow_type(values, type_)
            if type_ is None:
                type_ = pa.array(c, from_pandas=True).type
        types.append(type_)

    metadata = construct_metadata(df, column_names, index_columns,
                                  index_descriptors, preserve_index, types)

    return all_names, types, metadata
开发者ID:laurentgo,项目名称:arrow,代码行数:25,代码来源:pandas_compat.py

示例5: test_buffers_nested

def test_buffers_nested():
    a = pa.array([[1, 2], None, [3, None, 4, 5]], type=pa.list_(pa.int64()))
    buffers = a.buffers()
    assert len(buffers) == 4
    # The parent buffers
    null_bitmap = buffers[0].to_pybytes()
    assert bytearray(null_bitmap)[0] == 0b00000101
    offsets = buffers[1].to_pybytes()
    assert struct.unpack('4i', offsets) == (0, 2, 2, 6)
    # The child buffers
    null_bitmap = buffers[2].to_pybytes()
    assert bytearray(null_bitmap)[0] == 0b00110111
    values = buffers[3].to_pybytes()
    assert struct.unpack('qqq8xqq', values) == (1, 2, 3, 4, 5)

    a = pa.array([(42, None), None, (None, 43)],
                 type=pa.struct([pa.field('a', pa.int8()),
                                 pa.field('b', pa.int16())]))
    buffers = a.buffers()
    assert len(buffers) == 5
    # The parent buffer
    null_bitmap = buffers[0].to_pybytes()
    assert bytearray(null_bitmap)[0] == 0b00000101
    # The child buffers: 'a'
    null_bitmap = buffers[1].to_pybytes()
    assert bytearray(null_bitmap)[0] == 0b00000001
    values = buffers[2].to_pybytes()
    assert struct.unpack('bxx', values) == (42,)
    # The child buffers: 'b'
    null_bitmap = buffers[3].to_pybytes()
    assert bytearray(null_bitmap)[0] == 0b00000100
    values = buffers[4].to_pybytes()
    assert struct.unpack('4xh', values) == (43,)
开发者ID:CodingCat,项目名称:arrow,代码行数:33,代码来源:test_array.py

示例6: test_recordbatch_slice

def test_recordbatch_slice():
    data = [
        pa.array(range(5)),
        pa.array([-10, -5, 0, 5, 10])
    ]
    names = ['c0', 'c1']

    batch = pa.RecordBatch.from_arrays(data, names)

    sliced = batch.slice(2)

    assert sliced.num_rows == 3

    expected = pa.RecordBatch.from_arrays(
        [x.slice(2) for x in data], names)
    assert sliced.equals(expected)

    sliced2 = batch.slice(2, 2)
    expected2 = pa.RecordBatch.from_arrays(
        [x.slice(2, 2) for x in data], names)
    assert sliced2.equals(expected2)

    # 0 offset
    assert batch.slice(0).equals(batch)

    # Slice past end of array
    assert len(batch.slice(len(batch))) == 0

    with pytest.raises(IndexError):
        batch.slice(-1)
开发者ID:hdfeos,项目名称:arrow,代码行数:30,代码来源:test_table.py

示例7: test_chunked_array_str

def test_chunked_array_str():
    data = [
        pa.array([1, 2, 3]),
        pa.array([4, 5, 6])
    ]
    data = pa.chunked_array(data)
    assert str(data) == """[
开发者ID:dremio,项目名称:arrow,代码行数:7,代码来源:test_table.py

示例8: test_cast_timestamp_to_int

def test_cast_timestamp_to_int():
    arr = pa.array(np.array([0, 1, 2], dtype='int64'),
                   type=pa.timestamp('us'))
    expected = pa.array([0, 1, 2], type='i8')

    result = arr.cast('i8')
    assert result.equals(expected)
开发者ID:CodingCat,项目名称:arrow,代码行数:7,代码来源:test_array.py

示例9: test_recordbatch_basics

def test_recordbatch_basics():
    data = [
        pa.array(range(5)),
        pa.array([-10, -5, 0, 5, 10])
    ]

    batch = pa.RecordBatch.from_arrays(data, ['c0', 'c1'])
    assert not batch.schema.metadata

    assert len(batch) == 5
    assert batch.num_rows == 5
    assert batch.num_columns == len(data)
    assert batch.to_pydict() == OrderedDict([
        ('c0', [0, 1, 2, 3, 4]),
        ('c1', [-10, -5, 0, 5, 10])
    ])

    with pytest.raises(IndexError):
        # bounds checking
        batch[2]

    # Schema passed explicitly
    schema = pa.schema([pa.field('c0', pa.int16()),
                        pa.field('c1', pa.int32())],
                       metadata={b'foo': b'bar'})
    batch = pa.RecordBatch.from_arrays(data, schema)
    assert batch.schema == schema
开发者ID:dremio,项目名称:arrow,代码行数:27,代码来源:test_table.py

示例10: test_recordbatch_from_arrays_validate_lengths

def test_recordbatch_from_arrays_validate_lengths():
    # ARROW-2820
    data = [pa.array([1]), pa.array(["tokyo", "like", "happy"]),
            pa.array(["derek"])]

    with pytest.raises(ValueError):
        pa.RecordBatch.from_arrays(data, ['id', 'tags', 'name'])
开发者ID:dremio,项目名称:arrow,代码行数:7,代码来源:test_table.py

示例11: test_chunked_array_asarray

def test_chunked_array_asarray():
    data = [
        pa.array([0]),
        pa.array([1, 2, 3])
    ]
    chunked_arr = pa.chunked_array(data)

    np_arr = np.asarray(chunked_arr)
    assert np_arr.tolist() == [0, 1, 2, 3]
    assert np_arr.dtype == np.dtype('int64')

    # An optional type can be specified when calling np.asarray
    np_arr = np.asarray(chunked_arr, dtype='str')
    assert np_arr.tolist() == ['0', '1', '2', '3']

    # Types are modified when there are nulls
    data = [
        pa.array([1, None]),
        pa.array([1, 2, 3])
    ]
    chunked_arr = pa.chunked_array(data)

    np_arr = np.asarray(chunked_arr)
    elements = np_arr.tolist()
    assert elements[0] == 1.
    assert np.isnan(elements[1])
    assert elements[2:] == [1., 2., 3.]
    assert np_arr.dtype == np.dtype('float64')
开发者ID:dremio,项目名称:arrow,代码行数:28,代码来源:test_table.py

示例12: test_invalid_table_construct

def test_invalid_table_construct():
    array = np.array([0, 1], dtype=np.uint8)
    u8 = pa.uint8()
    arrays = [pa.array(array, type=u8), pa.array(array[1:], type=u8)]

    with pytest.raises(pa.lib.ArrowInvalid):
        pa.Table.from_arrays(arrays, names=["a1", "a2"])
开发者ID:emkornfield,项目名称:arrow,代码行数:7,代码来源:test_table.py

示例13: test_sequence_timestamp_from_int_with_unit

def test_sequence_timestamp_from_int_with_unit():
    data = [1]

    s = pa.timestamp('s')
    ms = pa.timestamp('ms')
    us = pa.timestamp('us')
    ns = pa.timestamp('ns')

    arr_s = pa.array(data, type=s)
    assert len(arr_s) == 1
    assert arr_s.type == s
    assert str(arr_s[0]) == "Timestamp('1970-01-01 00:00:01')"

    arr_ms = pa.array(data, type=ms)
    assert len(arr_ms) == 1
    assert arr_ms.type == ms
    assert str(arr_ms[0]) == "Timestamp('1970-01-01 00:00:00.001000')"

    arr_us = pa.array(data, type=us)
    assert len(arr_us) == 1
    assert arr_us.type == us
    assert str(arr_us[0]) == "Timestamp('1970-01-01 00:00:00.000001')"

    arr_ns = pa.array(data, type=ns)
    assert len(arr_ns) == 1
    assert arr_ns.type == ns
    assert str(arr_ns[0]) == "Timestamp('1970-01-01 00:00:00.000000001')"

    with pytest.raises(pa.ArrowException):
        class CustomClass():
            pass
        pa.array([1, CustomClass()], type=ns)
        pa.array([1, CustomClass()], type=pa.date32())
        pa.array([1, CustomClass()], type=pa.date64())
开发者ID:CodingCat,项目名称:arrow,代码行数:34,代码来源:test_convert_builtin.py

示例14: test_file_reader_writer

def test_file_reader_writer():
    data = [
        pa.array([1, 2, 3, 4]),
        pa.array(['foo', 'bar', 'baz', None]),
        pa.array([True, None, False, True])
    ]
    batch = pa.RecordBatch.from_arrays(data, ['f0', 'f1', 'f2'])

    sink = pa.BufferOutputStream()

    with pytest.warns(FutureWarning):
        stream_writer = pa.StreamWriter(sink, batch.schema)
        assert isinstance(stream_writer, pa.RecordBatchStreamWriter)

    sink2 = pa.BufferOutputStream()
    with pytest.warns(FutureWarning):
        file_writer = pa.FileWriter(sink2, batch.schema)
        assert isinstance(file_writer, pa.RecordBatchFileWriter)

    file_writer.write_batch(batch)
    stream_writer.write_batch(batch)

    file_writer.close()
    stream_writer.close()

    buf = sink.get_result()
    buf2 = sink2.get_result()

    with pytest.warns(FutureWarning):
        stream_reader = pa.StreamReader(buf)
        assert isinstance(stream_reader, pa.RecordBatchStreamReader)

    with pytest.warns(FutureWarning):
        file_reader = pa.FileReader(buf2)
        assert isinstance(file_reader, pa.RecordBatchFileReader)
开发者ID:hdfeos,项目名称:arrow,代码行数:35,代码来源:test_deprecations.py

示例15: test_table_basics

def test_table_basics():
    data = [
        pa.array(range(5)),
        pa.array([-10, -5, 0, 5, 10])
    ]
    table = pa.Table.from_arrays(data, names=('a', 'b'))
    table._validate()
    assert len(table) == 5
    assert table.num_rows == 5
    assert table.num_columns == 2
    assert table.shape == (5, 2)
    assert table.to_pydict() == OrderedDict([
        ('a', [0, 1, 2, 3, 4]),
        ('b', [-10, -5, 0, 5, 10])
    ])

    columns = []
    for col in table.itercolumns():
        columns.append(col)
        for chunk in col.data.iterchunks():
            assert chunk is not None

        with pytest.raises(IndexError):
            col.data.chunk(-1)

        with pytest.raises(IndexError):
            col.data.chunk(col.data.num_chunks)

    assert table.columns == columns
开发者ID:dremio,项目名称:arrow,代码行数:29,代码来源:test_table.py


注:本文中的pyarrow.array函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。