当前位置: 首页>>代码示例>>Python>>正文


Python pyarrow.date32方法代码示例

本文整理汇总了Python中pyarrow.date32方法的典型用法代码示例。如果您正苦于以下问题:Python pyarrow.date32方法的具体用法?Python pyarrow.date32怎么用?Python pyarrow.date32使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyarrow的用法示例。


在下文中一共展示了pyarrow.date32方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_iterate_over_date_chunk

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import date32 [as 别名]
def test_iterate_over_date_chunk():
    random.seed(datetime.datetime.now())
    column_meta = {
        "byteLength": "4",
        "logicalType": "DATE",
        "precision": "38",
        "scale": "0",
        "charLength": "0"
    }

    def date_generator():
        return datetime.date.fromordinal(random.randint(1, 1000000))

    iterate_over_test_chunk([pyarrow.date32(), pyarrow.date32()],
                            [column_meta, column_meta],
                            date_generator) 
开发者ID:snowflakedb,项目名称:snowflake-connector-python,代码行数:18,代码来源:test_unit_arrow_chunk_iterator.py

示例2: test_index_as_flat_series_date

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import date32 [as 别名]
def test_index_as_flat_series_date(dtype, date_as_object):
    index1 = ExplicitSecondaryIndex(
        column="col",
        index_dct={
            datetime.date(2017, 1, 2): ["part_1", "part_2"],
            datetime.date(2018, 2, 3): ["part_1"],
        },
        dtype=pa.date32(),
    )
    ser = index1.as_flat_series(date_as_object=date_as_object)
    ser = ser.sort_index()
    expected = pd.Series(
        ["part_1", "part_2", "part_1"],
        index=pd.Index(
            [
                datetime.date(2017, 1, 2),
                datetime.date(2017, 1, 2),
                datetime.date(2018, 2, 3),
            ],
            dtype=dtype,
            name="col",
        ),
        name="partition",
    )
    assert_series_equal(ser, expected) 
开发者ID:JDASoftwareGroup,项目名称:kartothek,代码行数:27,代码来源:test_index.py

示例3: test_arrow_schema_convertion

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import date32 [as 别名]
def test_arrow_schema_convertion():
    fields = [
        pa.field('string', pa.string()),
        pa.field('int8', pa.int8()),
        pa.field('int16', pa.int16()),
        pa.field('int32', pa.int32()),
        pa.field('int64', pa.int64()),
        pa.field('float', pa.float32()),
        pa.field('double', pa.float64()),
        pa.field('bool', pa.bool_(), False),
        pa.field('fixed_size_binary', pa.binary(10)),
        pa.field('variable_size_binary', pa.binary()),
        pa.field('decimal', pa.decimal128(3, 4)),
        pa.field('timestamp_s', pa.timestamp('s')),
        pa.field('timestamp_ns', pa.timestamp('ns')),
        pa.field('date_32', pa.date32()),
        pa.field('date_64', pa.date64())
    ]
    arrow_schema = pa.schema(fields)

    mock_dataset = _mock_parquet_dataset([], arrow_schema)

    unischema = Unischema.from_arrow_schema(mock_dataset)
    for name in arrow_schema.names:
        assert getattr(unischema, name).name == name
        assert getattr(unischema, name).codec is None

        if name == 'bool':
            assert not getattr(unischema, name).nullable
        else:
            assert getattr(unischema, name).nullable

    # Test schema preserve fields order
    field_name_list = [f.name for f in fields]
    assert list(unischema.fields.keys()) == field_name_list 
开发者ID:uber,项目名称:petastorm,代码行数:37,代码来源:test_unischema.py

示例4: to_arrow_type

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import date32 [as 别名]
def to_arrow_type(dt):
    """ Convert Spark data type to pyarrow type
    """
    from distutils.version import LooseVersion
    import pyarrow as pa
    if type(dt) == BooleanType:
        arrow_type = pa.bool_()
    elif type(dt) == ByteType:
        arrow_type = pa.int8()
    elif type(dt) == ShortType:
        arrow_type = pa.int16()
    elif type(dt) == IntegerType:
        arrow_type = pa.int32()
    elif type(dt) == LongType:
        arrow_type = pa.int64()
    elif type(dt) == FloatType:
        arrow_type = pa.float32()
    elif type(dt) == DoubleType:
        arrow_type = pa.float64()
    elif type(dt) == DecimalType:
        arrow_type = pa.decimal128(dt.precision, dt.scale)
    elif type(dt) == StringType:
        arrow_type = pa.string()
    elif type(dt) == BinaryType:
        # TODO: remove version check once minimum pyarrow version is 0.10.0
        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt) +
                            "\nPlease install pyarrow >= 0.10.0 for BinaryType support.")
        arrow_type = pa.binary()
    elif type(dt) == DateType:
        arrow_type = pa.date32()
    elif type(dt) == TimestampType:
        # Timestamps should be in UTC, JVM Arrow timestamps require a timezone to be read
        arrow_type = pa.timestamp('us', tz='UTC')
    elif type(dt) == ArrayType:
        if type(dt.elementType) == TimestampType:
            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
        arrow_type = pa.list_(to_arrow_type(dt.elementType))
    else:
        raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
    return arrow_type 
开发者ID:runawayhorse001,项目名称:LearningApacheSpark,代码行数:43,代码来源:types.py

示例5: test_bq_to_arrow_data_type_w_struct

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import date32 [as 别名]
def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type):
    fields = (
        schema.SchemaField("field01", "STRING"),
        schema.SchemaField("field02", "BYTES"),
        schema.SchemaField("field03", "INTEGER"),
        schema.SchemaField("field04", "INT64"),
        schema.SchemaField("field05", "FLOAT"),
        schema.SchemaField("field06", "FLOAT64"),
        schema.SchemaField("field07", "NUMERIC"),
        schema.SchemaField("field08", "BOOLEAN"),
        schema.SchemaField("field09", "BOOL"),
        schema.SchemaField("field10", "TIMESTAMP"),
        schema.SchemaField("field11", "DATE"),
        schema.SchemaField("field12", "TIME"),
        schema.SchemaField("field13", "DATETIME"),
        schema.SchemaField("field14", "GEOGRAPHY"),
    )
    field = schema.SchemaField("ignored_name", bq_type, mode="NULLABLE", fields=fields)
    actual = module_under_test.bq_to_arrow_data_type(field)
    expected = pyarrow.struct(
        (
            pyarrow.field("field01", pyarrow.string()),
            pyarrow.field("field02", pyarrow.binary()),
            pyarrow.field("field03", pyarrow.int64()),
            pyarrow.field("field04", pyarrow.int64()),
            pyarrow.field("field05", pyarrow.float64()),
            pyarrow.field("field06", pyarrow.float64()),
            pyarrow.field("field07", module_under_test.pyarrow_numeric()),
            pyarrow.field("field08", pyarrow.bool_()),
            pyarrow.field("field09", pyarrow.bool_()),
            pyarrow.field("field10", module_under_test.pyarrow_timestamp()),
            pyarrow.field("field11", pyarrow.date32()),
            pyarrow.field("field12", module_under_test.pyarrow_time()),
            pyarrow.field("field13", module_under_test.pyarrow_datetime()),
            pyarrow.field("field14", pyarrow.string()),
        )
    )
    assert pyarrow.types.is_struct(actual)
    assert actual.num_children == len(fields)
    assert actual.equals(expected) 
开发者ID:googleapis,项目名称:python-bigquery,代码行数:42,代码来源:test__pandas_helpers.py

示例6: test_bq_to_arrow_data_type_w_array_struct

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import date32 [as 别名]
def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type):
    fields = (
        schema.SchemaField("field01", "STRING"),
        schema.SchemaField("field02", "BYTES"),
        schema.SchemaField("field03", "INTEGER"),
        schema.SchemaField("field04", "INT64"),
        schema.SchemaField("field05", "FLOAT"),
        schema.SchemaField("field06", "FLOAT64"),
        schema.SchemaField("field07", "NUMERIC"),
        schema.SchemaField("field08", "BOOLEAN"),
        schema.SchemaField("field09", "BOOL"),
        schema.SchemaField("field10", "TIMESTAMP"),
        schema.SchemaField("field11", "DATE"),
        schema.SchemaField("field12", "TIME"),
        schema.SchemaField("field13", "DATETIME"),
        schema.SchemaField("field14", "GEOGRAPHY"),
    )
    field = schema.SchemaField("ignored_name", bq_type, mode="REPEATED", fields=fields)
    actual = module_under_test.bq_to_arrow_data_type(field)
    expected_value_type = pyarrow.struct(
        (
            pyarrow.field("field01", pyarrow.string()),
            pyarrow.field("field02", pyarrow.binary()),
            pyarrow.field("field03", pyarrow.int64()),
            pyarrow.field("field04", pyarrow.int64()),
            pyarrow.field("field05", pyarrow.float64()),
            pyarrow.field("field06", pyarrow.float64()),
            pyarrow.field("field07", module_under_test.pyarrow_numeric()),
            pyarrow.field("field08", pyarrow.bool_()),
            pyarrow.field("field09", pyarrow.bool_()),
            pyarrow.field("field10", module_under_test.pyarrow_timestamp()),
            pyarrow.field("field11", pyarrow.date32()),
            pyarrow.field("field12", module_under_test.pyarrow_time()),
            pyarrow.field("field13", module_under_test.pyarrow_datetime()),
            pyarrow.field("field14", pyarrow.string()),
        )
    )
    assert pyarrow.types.is_list(actual)
    assert pyarrow.types.is_struct(actual.value_type)
    assert actual.value_type.num_children == len(fields)
    assert actual.value_type.equals(expected_value_type) 
开发者ID:googleapis,项目名称:python-bigquery,代码行数:43,代码来源:test__pandas_helpers.py

示例7: _get_numba_typ_from_pa_typ

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import date32 [as 别名]
def _get_numba_typ_from_pa_typ(pa_typ):
    import pyarrow as pa
    _typ_map = {
        # boolean
        pa.bool_(): types.bool_,
        # signed int types
        pa.int8(): types.int8,
        pa.int16(): types.int16,
        pa.int32(): types.int32,
        pa.int64(): types.int64,
        # unsigned int types
        pa.uint8(): types.uint8,
        pa.uint16(): types.uint16,
        pa.uint32(): types.uint32,
        pa.uint64(): types.uint64,
        # float types (TODO: float16?)
        pa.float32(): types.float32,
        pa.float64(): types.float64,
        # String
        pa.string(): string_type,
        # date
        pa.date32(): types.NPDatetime('ns'),
        pa.date64(): types.NPDatetime('ns'),
        # time (TODO: time32, time64, ...)
        pa.timestamp('ns'): types.NPDatetime('ns'),
        pa.timestamp('us'): types.NPDatetime('ns'),
        pa.timestamp('ms'): types.NPDatetime('ns'),
        pa.timestamp('s'): types.NPDatetime('ns'),
    }
    if pa_typ not in _typ_map:
        raise ValueError("Arrow data type {} not supported yet".format(pa_typ))
    return _typ_map[pa_typ] 
开发者ID:IntelPython,项目名称:sdc,代码行数:34,代码来源:parquet_pio.py

示例8: test_observed_values_date_as_object

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import date32 [as 别名]
def test_observed_values_date_as_object(date_as_object):
    value = datetime.date(2020, 1, 1)
    ind = ExplicitSecondaryIndex(
        column="col", dtype=pa.date32(), index_dct={value: ["part_label"]}
    )
    observed = ind.observed_values(date_as_object=date_as_object)
    if date_as_object:
        expected = value
    else:
        expected = pd.Timestamp(value).to_datetime64()
    assert len(observed) == 1
    assert observed[0] == expected 
开发者ID:JDASoftwareGroup,项目名称:kartothek,代码行数:14,代码来源:test_index.py

示例9: test_date_conversion

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import date32 [as 别名]
def test_date_conversion():
    """
    Test converting DATE columns to days since epoch
    """
    schema = pa.schema([
        pa.field("foo", pa.date32())
    ])

    data = [{"foo": "2018-01-01"}, {"foo": "2018-01-02"}]

    converted_data = client.ingest_data(data, schema)
    assert converted_data.to_pydict()['foo'][0].strftime("%Y-%m-%d") == "2018-01-01"
    assert converted_data.to_pydict()['foo'][1].strftime("%Y-%m-%d") == "2018-01-02" 
开发者ID:andrewgross,项目名称:json2parquet,代码行数:15,代码来源:test_client.py

示例10: test_store_schema_metadata

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import date32 [as 别名]
def test_store_schema_metadata(store, df_all_types):
    store_schema_metadata(
        schema=make_meta(df_all_types, origin="df_all_types"),
        dataset_uuid="some_uuid",
        store=store,
        table="some_table",
    )

    key = "some_uuid/some_table/_common_metadata"
    assert key in store.keys()
    pq_file = pq.ParquetFile(store.open(key))
    actual_schema = pq_file.schema.to_arrow_schema()
    fields = [
        pa.field("array_float32", pa.list_(pa.float64())),
        pa.field("array_float64", pa.list_(pa.float64())),
        pa.field("array_int16", pa.list_(pa.int64())),
        pa.field("array_int32", pa.list_(pa.int64())),
        pa.field("array_int64", pa.list_(pa.int64())),
        pa.field("array_int8", pa.list_(pa.int64())),
        pa.field("array_uint16", pa.list_(pa.uint64())),
        pa.field("array_uint32", pa.list_(pa.uint64())),
        pa.field("array_uint64", pa.list_(pa.uint64())),
        pa.field("array_uint8", pa.list_(pa.uint64())),
        pa.field("array_unicode", pa.list_(pa.string())),
        pa.field("bool", pa.bool_()),
        pa.field("byte", pa.binary()),
        pa.field("date", pa.date32()),
        pa.field("datetime64", pa.timestamp("us")),
        pa.field("float32", pa.float64()),
        pa.field("float64", pa.float64()),
        pa.field("int16", pa.int64()),
        pa.field("int32", pa.int64()),
        pa.field("int64", pa.int64()),
        pa.field("int8", pa.int64()),
        pa.field("null", pa.null()),
        pa.field("uint16", pa.uint64()),
        pa.field("uint32", pa.uint64()),
        pa.field("uint64", pa.uint64()),
        pa.field("uint8", pa.uint64()),
        pa.field("unicode", pa.string()),
    ]
    expected_schema = pa.schema(fields)

    assert actual_schema.remove_metadata() == expected_schema 
开发者ID:JDASoftwareGroup,项目名称:kartothek,代码行数:46,代码来源:test_common_metadata.py

示例11: _convert_data_with_schema

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import date32 [as 别名]
def _convert_data_with_schema(data, schema, date_format=None, field_aliases=None):
    column_data = {}
    array_data = []
    schema_names = []
    for row in data:
        for column in schema.names:
            _col = column_data.get(column, [])
            _col.append(row.get(column))
            column_data[column] = _col
    for column in schema:
        _col = column_data.get(column.name)
        if isinstance(column.type, pa.lib.TimestampType):
            _converted_col = []
            for t in _col:
                try:
                    _converted_col.append(pd.to_datetime(t, format=date_format))
                except pd._libs.tslib.OutOfBoundsDatetime:
                    _converted_col.append(pd.Timestamp.max)
            array_data.append(pa.Array.from_pandas(pd.to_datetime(_converted_col), type=pa.timestamp('ns')))
        elif column.type.id == pa.date32().id:
            _converted_col = map(_date_converter, _col)
            array_data.append(pa.array(_converted_col, type=pa.date32()))
        # Float types are ambiguous for conversions, need to specify the exact type
        elif column.type.id == pa.float64().id:
            array_data.append(pa.array(_col, type=pa.float64()))
        elif column.type.id == pa.float32().id:
            # Python doesn't have a native float32 type
            # and PyArrow cannot cast float64 -> float32
            _col = pd.to_numeric(_col, downcast='float')
            array_data.append(pa.Array.from_pandas(_col, type=pa.float32()))
        elif column.type.id == pa.int32().id:
            # PyArrow 0.8.0 can cast int64 -> int32
            _col64 = pa.array(_col, type=pa.int64())
            array_data.append(_col64.cast(pa.int32()))
        elif column.type.id == pa.bool_().id:
            _col = map(_boolean_converter, _col)
            array_data.append(pa.array(_col, type=column.type))
        else:
            array_data.append(pa.array(_col, type=column.type))
        if isinstance(field_aliases, dict):
            schema_names.append(field_aliases.get(column.name, column.name))
        else:
            schema_names.append(column.name)
    return pa.RecordBatch.from_arrays(array_data, schema_names) 
开发者ID:andrewgross,项目名称:json2parquet,代码行数:46,代码来源:client.py


注:本文中的pyarrow.date32方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。