当前位置: 首页>>代码示例>>Python>>正文


Python pyarrow.bool_方法代码示例

本文整理汇总了Python中pyarrow.bool_方法的典型用法代码示例。如果您正苦于以下问题:Python pyarrow.bool_方法的具体用法?Python pyarrow.bool_怎么用?Python pyarrow.bool_使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyarrow的用法示例。


在下文中一共展示了pyarrow.bool_方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: all_true_like

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import bool_ [as 别名]
def all_true_like(arr: pa.Array) -> pa.Array:
    """Return a boolean array with all-True with the same size as the input and the same valid bitmap."""
    valid_buffer = arr.buffers()[0]
    if valid_buffer:
        valid_buffer = valid_buffer.slice(arr.offset // 8)

    output_offset = arr.offset % 8
    output_length = len(arr) + output_offset

    output_size = output_length // 8
    if output_length % 8 > 0:
        output_size += 1
    output = np.full(output_size, fill_value=255, dtype=np.uint8)

    return pa.Array.from_buffers(
        pa.bool_(),
        len(arr),
        [valid_buffer, pa.py_buffer(output)],
        arr.null_count,
        output_offset,
    ) 
开发者ID:xhochy,项目名称:fletcher,代码行数:23,代码来源:bool.py

示例2: test_iterate_over_bool_chunk

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import bool_ [as 别名]
def test_iterate_over_bool_chunk():
    random.seed(datetime.datetime.now())
    column_meta = {"logicalType": "BOOLEAN"}

    def bool_generator():
        return bool(random.getrandbits(1))

    iterate_over_test_chunk([pyarrow.bool_(), pyarrow.bool_()],
                            [column_meta, column_meta],
                            bool_generator) 
开发者ID:snowflakedb,项目名称:snowflake-connector-python,代码行数:12,代码来源:test_unit_arrow_chunk_iterator.py

示例3: __init__

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import bool_ [as 别名]
def __init__(self, values):
        if not isinstance(values, pa.ChunkedArray):
            raise ValueError

        assert values.type == pa.bool_()
        self._data = values
        self._dtype = ArrowBoolDtype() 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:9,代码来源:bool.py

示例4: setUp

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import bool_ [as 别名]
def setUp(self):
        self.sa_meta = sa.MetaData()
        self.data = [
            [17.124, 1.12, 3.14, 13.37],
            [1, 2, 3, 4],
            [1, 2, 3, 4],
            [1, 2, 3, 4],
            [True, None, False, True],
            ['string 1', 'string 2', None, 'string 3'],
            [datetime(2007, 7, 13, 1, 23, 34, 123456),
             None,
             datetime(2006, 1, 13, 12, 34, 56, 432539),
             datetime(2010, 8, 13, 5, 46, 57, 437699), ],
            ["Test Text", "Some#More#Test#  Text", "!@#$%%^&*&", None],
        ]
        self.table = sa.Table(
            'unit_test_table',
            self.sa_meta,
            sa.Column('real_col', sa.REAL),
            sa.Column('bigint_col', sa.BIGINT),
            sa.Column('int_col', sa.INTEGER),
            sa.Column('smallint_col', sa.SMALLINT),
            sa.Column('bool_col', sa.BOOLEAN),
            sa.Column('str_col', sa.VARCHAR),
            sa.Column('timestamp_col', sa.TIMESTAMP),
            sa.Column('plaintext_col', sa.TEXT),
        )

        self.expected_datatypes = [
            pa.float32(),
            pa.int64(),
            pa.int32(),
            pa.int16(),
            pa.bool_(),
            pa.string(),
            pa.timestamp('ns'),
            pa.string(),
        ] 
开发者ID:hellonarrativ,项目名称:spectrify,代码行数:40,代码来源:test_parquet.py

示例5: test_arrow_schema_convertion

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import bool_ [as 别名]
def test_arrow_schema_convertion():
    fields = [
        pa.field('string', pa.string()),
        pa.field('int8', pa.int8()),
        pa.field('int16', pa.int16()),
        pa.field('int32', pa.int32()),
        pa.field('int64', pa.int64()),
        pa.field('float', pa.float32()),
        pa.field('double', pa.float64()),
        pa.field('bool', pa.bool_(), False),
        pa.field('fixed_size_binary', pa.binary(10)),
        pa.field('variable_size_binary', pa.binary()),
        pa.field('decimal', pa.decimal128(3, 4)),
        pa.field('timestamp_s', pa.timestamp('s')),
        pa.field('timestamp_ns', pa.timestamp('ns')),
        pa.field('date_32', pa.date32()),
        pa.field('date_64', pa.date64())
    ]
    arrow_schema = pa.schema(fields)

    mock_dataset = _mock_parquet_dataset([], arrow_schema)

    unischema = Unischema.from_arrow_schema(mock_dataset)
    for name in arrow_schema.names:
        assert getattr(unischema, name).name == name
        assert getattr(unischema, name).codec is None

        if name == 'bool':
            assert not getattr(unischema, name).nullable
        else:
            assert getattr(unischema, name).nullable

    # Test schema preserve fields order
    field_name_list = [f.name for f in fields]
    assert list(unischema.fields.keys()) == field_name_list 
开发者ID:uber,项目名称:petastorm,代码行数:37,代码来源:test_unischema.py

示例6: to_arrow_type

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import bool_ [as 别名]
def to_arrow_type(dt):
    """ Convert Spark data type to pyarrow type
    """
    from distutils.version import LooseVersion
    import pyarrow as pa
    if type(dt) == BooleanType:
        arrow_type = pa.bool_()
    elif type(dt) == ByteType:
        arrow_type = pa.int8()
    elif type(dt) == ShortType:
        arrow_type = pa.int16()
    elif type(dt) == IntegerType:
        arrow_type = pa.int32()
    elif type(dt) == LongType:
        arrow_type = pa.int64()
    elif type(dt) == FloatType:
        arrow_type = pa.float32()
    elif type(dt) == DoubleType:
        arrow_type = pa.float64()
    elif type(dt) == DecimalType:
        arrow_type = pa.decimal128(dt.precision, dt.scale)
    elif type(dt) == StringType:
        arrow_type = pa.string()
    elif type(dt) == BinaryType:
        # TODO: remove version check once minimum pyarrow version is 0.10.0
        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt) +
                            "\nPlease install pyarrow >= 0.10.0 for BinaryType support.")
        arrow_type = pa.binary()
    elif type(dt) == DateType:
        arrow_type = pa.date32()
    elif type(dt) == TimestampType:
        # Timestamps should be in UTC, JVM Arrow timestamps require a timezone to be read
        arrow_type = pa.timestamp('us', tz='UTC')
    elif type(dt) == ArrayType:
        if type(dt.elementType) == TimestampType:
            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
        arrow_type = pa.list_(to_arrow_type(dt.elementType))
    else:
        raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
    return arrow_type 
开发者ID:runawayhorse001,项目名称:LearningApacheSpark,代码行数:43,代码来源:types.py

示例7: test_bq_to_arrow_data_type_w_struct

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import bool_ [as 别名]
def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type):
    fields = (
        schema.SchemaField("field01", "STRING"),
        schema.SchemaField("field02", "BYTES"),
        schema.SchemaField("field03", "INTEGER"),
        schema.SchemaField("field04", "INT64"),
        schema.SchemaField("field05", "FLOAT"),
        schema.SchemaField("field06", "FLOAT64"),
        schema.SchemaField("field07", "NUMERIC"),
        schema.SchemaField("field08", "BOOLEAN"),
        schema.SchemaField("field09", "BOOL"),
        schema.SchemaField("field10", "TIMESTAMP"),
        schema.SchemaField("field11", "DATE"),
        schema.SchemaField("field12", "TIME"),
        schema.SchemaField("field13", "DATETIME"),
        schema.SchemaField("field14", "GEOGRAPHY"),
    )
    field = schema.SchemaField("ignored_name", bq_type, mode="NULLABLE", fields=fields)
    actual = module_under_test.bq_to_arrow_data_type(field)
    expected = pyarrow.struct(
        (
            pyarrow.field("field01", pyarrow.string()),
            pyarrow.field("field02", pyarrow.binary()),
            pyarrow.field("field03", pyarrow.int64()),
            pyarrow.field("field04", pyarrow.int64()),
            pyarrow.field("field05", pyarrow.float64()),
            pyarrow.field("field06", pyarrow.float64()),
            pyarrow.field("field07", module_under_test.pyarrow_numeric()),
            pyarrow.field("field08", pyarrow.bool_()),
            pyarrow.field("field09", pyarrow.bool_()),
            pyarrow.field("field10", module_under_test.pyarrow_timestamp()),
            pyarrow.field("field11", pyarrow.date32()),
            pyarrow.field("field12", module_under_test.pyarrow_time()),
            pyarrow.field("field13", module_under_test.pyarrow_datetime()),
            pyarrow.field("field14", pyarrow.string()),
        )
    )
    assert pyarrow.types.is_struct(actual)
    assert actual.num_children == len(fields)
    assert actual.equals(expected) 
开发者ID:googleapis,项目名称:python-bigquery,代码行数:42,代码来源:test__pandas_helpers.py

示例8: test_bq_to_arrow_data_type_w_array_struct

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import bool_ [as 别名]
def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type):
    fields = (
        schema.SchemaField("field01", "STRING"),
        schema.SchemaField("field02", "BYTES"),
        schema.SchemaField("field03", "INTEGER"),
        schema.SchemaField("field04", "INT64"),
        schema.SchemaField("field05", "FLOAT"),
        schema.SchemaField("field06", "FLOAT64"),
        schema.SchemaField("field07", "NUMERIC"),
        schema.SchemaField("field08", "BOOLEAN"),
        schema.SchemaField("field09", "BOOL"),
        schema.SchemaField("field10", "TIMESTAMP"),
        schema.SchemaField("field11", "DATE"),
        schema.SchemaField("field12", "TIME"),
        schema.SchemaField("field13", "DATETIME"),
        schema.SchemaField("field14", "GEOGRAPHY"),
    )
    field = schema.SchemaField("ignored_name", bq_type, mode="REPEATED", fields=fields)
    actual = module_under_test.bq_to_arrow_data_type(field)
    expected_value_type = pyarrow.struct(
        (
            pyarrow.field("field01", pyarrow.string()),
            pyarrow.field("field02", pyarrow.binary()),
            pyarrow.field("field03", pyarrow.int64()),
            pyarrow.field("field04", pyarrow.int64()),
            pyarrow.field("field05", pyarrow.float64()),
            pyarrow.field("field06", pyarrow.float64()),
            pyarrow.field("field07", module_under_test.pyarrow_numeric()),
            pyarrow.field("field08", pyarrow.bool_()),
            pyarrow.field("field09", pyarrow.bool_()),
            pyarrow.field("field10", module_under_test.pyarrow_timestamp()),
            pyarrow.field("field11", pyarrow.date32()),
            pyarrow.field("field12", module_under_test.pyarrow_time()),
            pyarrow.field("field13", module_under_test.pyarrow_datetime()),
            pyarrow.field("field14", pyarrow.string()),
        )
    )
    assert pyarrow.types.is_list(actual)
    assert pyarrow.types.is_struct(actual.value_type)
    assert actual.value_type.num_children == len(fields)
    assert actual.value_type.equals(expected_value_type) 
开发者ID:googleapis,项目名称:python-bigquery,代码行数:43,代码来源:test__pandas_helpers.py

示例9: test_read_parquet

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import bool_ [as 别名]
def test_read_parquet(tmpdir, continuous):
    str_arr = pa.array(["a", None, "c"], pa.string())
    int_arr = pa.array([1, None, -2], pa.int32())
    bool_arr = pa.array([True, None, False], pa.bool_())
    table = pa.Table.from_arrays([str_arr, int_arr, bool_arr], ["str", "int", "bool"])

    pq.write_table(table, "df.parquet")
    result = fr.read_parquet("df.parquet", continuous=continuous)
    expected = fr.pandas_from_arrow(table, continuous=continuous)
    tm.assert_frame_equal(result, expected) 
开发者ID:xhochy,项目名称:fletcher,代码行数:12,代码来源:test_io.py

示例10: test_any_op

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import bool_ [as 别名]
def test_any_op(data, skipna):
    arrow = pa.array(data, type=pa.bool_())
    # TODO(pandas-0.26): Use pandas.BooleanArray
    # https://github.com/pandas-dev/pandas/issues/27709 / https://github.com/pandas-dev/pandas/issues/12863
    pandas = pd.Series(data, dtype=float)

    assert any_op(arrow, skipna) == pandas.any(skipna=skipna)

    # Split in the middle and check whether this still works
    if len(data) > 2:
        arrow = pa.chunked_array(
            [data[: len(data) // 2], data[len(data) // 2 :]], type=pa.bool_()
        )
        assert any_op(arrow, skipna) == pandas.any(skipna=skipna) 
开发者ID:xhochy,项目名称:fletcher,代码行数:16,代码来源:test_boolean.py

示例11: test_all_op

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import bool_ [as 别名]
def test_all_op(data, skipna):
    arrow = pa.array(data, type=pa.bool_())
    # https://github.com/pandas-dev/pandas/issues/27709 / https://github.com/pandas-dev/pandas/issues/12863
    pandas = pd.Series(data, dtype=float)

    assert all_op(arrow, skipna) == pandas.all(skipna=skipna)

    # Split in the middle and check whether this still works
    if len(data) > 2:
        arrow = pa.chunked_array(
            [data[: len(data) // 2], data[len(data) // 2 :]], type=pa.bool_()
        )
        assert all_op(arrow, skipna) == pandas.all(skipna=skipna) 
开发者ID:xhochy,项目名称:fletcher,代码行数:15,代码来源:test_boolean.py

示例12: test_contains_no_regex_ascii

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import bool_ [as 别名]
def test_contains_no_regex_ascii(data, pat, expected, fletcher_variant):
    fr_series = _fr_series_from_data(data, fletcher_variant)
    fr_expected = _fr_series_from_data(expected, fletcher_variant, pa.bool_())

    # Run over slices to check offset handling code
    for i in range(len(data)):
        ser = fr_series.tail(len(data) - i)
        expected = fr_expected.tail(len(data) - i)
        result = ser.fr_text.contains(pat, regex=False)
        tm.assert_series_equal(result, expected) 
开发者ID:xhochy,项目名称:fletcher,代码行数:12,代码来源:test_text.py

示例13: _text_contains_case_sensitive

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import bool_ [as 别名]
def _text_contains_case_sensitive(data: pa.Array, pat: str) -> pa.Array:
    """
    Check for each element in the data whether it contains the pattern ``pat``.

    This implementation does basic byte-by-byte comparison and is independent
    of any locales or encodings.
    """
    # Convert to UTF-8 bytes
    pat_bytes: bytes = pat.encode()

    # Initialise boolean (bit-packaed) output array.
    output_size = len(data) // 8
    if len(data) % 8 > 0:
        output_size += 1
    output = np.empty(output_size, dtype=np.uint8)
    if len(data) % 8 > 0:
        # Zero trailing bits
        output[-1] = 0

    offsets, data_buffer = _extract_string_buffers(data)

    if data.null_count == 0:
        valid_buffer = None
        _text_contains_case_sensitive_nonnull(
            len(data), offsets, data_buffer, pat_bytes, output
        )
    else:
        valid = _buffer_to_view(data.buffers()[0])
        _text_contains_case_sensitive_nulls(
            len(data), valid, data.offset, offsets, data_buffer, pat_bytes, output
        )
        valid_buffer = data.buffers()[0].slice(data.offset // 8)
        if data.offset % 8 != 0:
            valid_buffer = shift_unaligned_bitmap(
                valid_buffer, data.offset % 8, len(data)
            )

    return pa.Array.from_buffers(
        pa.bool_(), len(data), [valid_buffer, pa.py_buffer(output)], data.null_count
    ) 
开发者ID:xhochy,项目名称:fletcher,代码行数:42,代码来源:string.py

示例14: all_true

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import bool_ [as 别名]
def all_true(arr: pa.Array) -> pa.Array:
    """Return a boolean array with all-True, all-valid with the same size ."""
    output_length = len(arr) // 8
    if len(arr) % 8 != 0:
        output_length += 1

    buf = pa.py_buffer(np.full(output_length, 255, dtype=np.uint8))
    return pa.Array.from_buffers(pa.bool_(), len(arr), [buf, buf], 0) 
开发者ID:xhochy,项目名称:fletcher,代码行数:10,代码来源:bool.py

示例15: or_array_nparray

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import bool_ [as 别名]
def or_array_nparray(a: pa.Array, b: np.ndarray) -> pa.Array:
    """Perform ``pa.Array | np.ndarray``."""
    output_length = len(a) // 8
    if len(a) % 8 != 0:
        output_length += 1

    if a.null_count == 0:
        result = np.zeros(output_length, dtype=np.uint8)
        bitmap_or_unaligned_with_numpy_nonnull(
            len(a), a.buffers()[1], a.offset, b, result
        )
        return pa.Array.from_buffers(
            pa.bool_(), len(a), [None, pa.py_buffer(result)], 0
        )
    else:
        result = np.zeros(output_length, dtype=np.uint8)
        valid_bits = np.zeros(output_length, dtype=np.uint8)
        null_count = bitmap_or_unaligned_with_numpy(
            len(a), a.buffers()[0], a.buffers()[1], a.offset, b, result, valid_bits
        )
        return pa.Array.from_buffers(
            pa.bool_(),
            len(a),
            [pa.py_buffer(valid_bits), pa.py_buffer(result)],
            null_count,
        ) 
开发者ID:xhochy,项目名称:fletcher,代码行数:28,代码来源:bool.py


注:本文中的pyarrow.bool_方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。