当前位置: 首页>>代码示例>>Python>>正文


Python pyarrow.struct方法代码示例

本文整理汇总了Python中pyarrow.struct方法的典型用法代码示例。如果您正苦于以下问题:Python pyarrow.struct方法的具体用法?Python pyarrow.struct怎么用?Python pyarrow.struct使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyarrow的用法示例。


在下文中一共展示了pyarrow.struct方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_simple

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import struct [as 别名]
def test_simple(self, factory):
    # 3 int64 values
    # 5 int32 offsets
    # 1 null bitmap byte for outer ListArray
    # 1 null bitmap byte for inner Int64Array
    # 46 bytes in total.
    list_array = pa.array([[1, 2], [None], None, None],
                          type=pa.list_(pa.int64()))

    # 1 null bitmap byte for outer StructArray.
    # 1 null bitmap byte for inner Int64Array.
    # 4 int64 values.
    # 34 bytes in total
    struct_array = pa.array([{"a": 1}, {"a": 2}, {"a": None}, None],
                            type=pa.struct([pa.field("a", pa.int64())]))
    entity = factory([list_array, struct_array], ["a1", "a2"])

    self.assertEqual(46 + 34, table_util.TotalByteSize(entity)) 
开发者ID:tensorflow,项目名称:tfx-bsl,代码行数:20,代码来源:table_util_test.py

示例2: test_sequence_feature_column_name_not_struct_in_schema

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import struct [as 别名]
def test_sequence_feature_column_name_not_struct_in_schema(self):
    schema_text_proto = """
        feature {
          name: "##SEQUENCE##"
          type: INT
        }
        """
    serialized_schema = text_format.Parse(
        schema_text_proto, schema_pb2.Schema()).SerializeToString()

    error_msg_regex = (
        "Found a feature in the schema with the sequence_feature_column_name "
        r"\(i.e., ##SEQUENCE##\) that is not a struct.*")

    with self.assertRaisesRegex(RuntimeError, error_msg_regex):
      sequence_example_coder.SequenceExamplesToRecordBatchDecoder(
          _TEST_SEQUENCE_COLUMN_NAME, serialized_schema) 
开发者ID:tensorflow,项目名称:tfx-bsl,代码行数:19,代码来源:sequence_example_coder_test.py

示例3: test_arrow_schema_arrow_1644_list_of_struct

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import struct [as 别名]
def test_arrow_schema_arrow_1644_list_of_struct():
    arrow_schema = pa.schema([
        pa.field('id', pa.string()),
        pa.field('list_of_struct', pa.list_(pa.struct([pa.field('a', pa.string()), pa.field('b', pa.int32())])))
    ])

    mock_dataset = _mock_parquet_dataset([], arrow_schema)

    unischema = Unischema.from_arrow_schema(mock_dataset)
    assert getattr(unischema, 'id').name == 'id'
    assert not hasattr(unischema, 'list_of_struct') 
开发者ID:uber,项目名称:petastorm,代码行数:13,代码来源:test_unischema.py

示例4: test_arrow_schema_arrow_1644_list_of_list

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import struct [as 别名]
def test_arrow_schema_arrow_1644_list_of_list():
    arrow_schema = pa.schema([
        pa.field('id', pa.string()),
        pa.field('list_of_list',
                 pa.list_(pa.list_(pa.struct([pa.field('a', pa.string()), pa.field('b', pa.int32())]))))
    ])

    mock_dataset = _mock_parquet_dataset([], arrow_schema)

    unischema = Unischema.from_arrow_schema(mock_dataset)
    assert getattr(unischema, 'id').name == 'id'
    assert not hasattr(unischema, 'list_of_list') 
开发者ID:uber,项目名称:petastorm,代码行数:14,代码来源:test_unischema.py

示例5: test_arrow_schema_convertion_ignore

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import struct [as 别名]
def test_arrow_schema_convertion_ignore():
    arrow_schema = pa.schema([
        pa.field('list_of_int', pa.float16()),
        pa.field('struct', pa.struct([pa.field('a', pa.string()), pa.field('b', pa.int32())])),
    ])

    mock_dataset = _mock_parquet_dataset([], arrow_schema)

    unischema = Unischema.from_arrow_schema(mock_dataset, omit_unsupported_fields=True)
    assert not hasattr(unischema, 'list_of_int') 
开发者ID:uber,项目名称:petastorm,代码行数:12,代码来源:test_unischema.py

示例6: test_bq_to_arrow_data_type_w_struct

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import struct [as 别名]
def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type):
    fields = (
        schema.SchemaField("field01", "STRING"),
        schema.SchemaField("field02", "BYTES"),
        schema.SchemaField("field03", "INTEGER"),
        schema.SchemaField("field04", "INT64"),
        schema.SchemaField("field05", "FLOAT"),
        schema.SchemaField("field06", "FLOAT64"),
        schema.SchemaField("field07", "NUMERIC"),
        schema.SchemaField("field08", "BOOLEAN"),
        schema.SchemaField("field09", "BOOL"),
        schema.SchemaField("field10", "TIMESTAMP"),
        schema.SchemaField("field11", "DATE"),
        schema.SchemaField("field12", "TIME"),
        schema.SchemaField("field13", "DATETIME"),
        schema.SchemaField("field14", "GEOGRAPHY"),
    )
    field = schema.SchemaField("ignored_name", bq_type, mode="NULLABLE", fields=fields)
    actual = module_under_test.bq_to_arrow_data_type(field)
    expected = pyarrow.struct(
        (
            pyarrow.field("field01", pyarrow.string()),
            pyarrow.field("field02", pyarrow.binary()),
            pyarrow.field("field03", pyarrow.int64()),
            pyarrow.field("field04", pyarrow.int64()),
            pyarrow.field("field05", pyarrow.float64()),
            pyarrow.field("field06", pyarrow.float64()),
            pyarrow.field("field07", module_under_test.pyarrow_numeric()),
            pyarrow.field("field08", pyarrow.bool_()),
            pyarrow.field("field09", pyarrow.bool_()),
            pyarrow.field("field10", module_under_test.pyarrow_timestamp()),
            pyarrow.field("field11", pyarrow.date32()),
            pyarrow.field("field12", module_under_test.pyarrow_time()),
            pyarrow.field("field13", module_under_test.pyarrow_datetime()),
            pyarrow.field("field14", pyarrow.string()),
        )
    )
    assert pyarrow.types.is_struct(actual)
    assert actual.num_children == len(fields)
    assert actual.equals(expected) 
开发者ID:googleapis,项目名称:python-bigquery,代码行数:42,代码来源:test__pandas_helpers.py

示例7: test_bq_to_arrow_data_type_w_array_struct

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import struct [as 别名]
def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type):
    fields = (
        schema.SchemaField("field01", "STRING"),
        schema.SchemaField("field02", "BYTES"),
        schema.SchemaField("field03", "INTEGER"),
        schema.SchemaField("field04", "INT64"),
        schema.SchemaField("field05", "FLOAT"),
        schema.SchemaField("field06", "FLOAT64"),
        schema.SchemaField("field07", "NUMERIC"),
        schema.SchemaField("field08", "BOOLEAN"),
        schema.SchemaField("field09", "BOOL"),
        schema.SchemaField("field10", "TIMESTAMP"),
        schema.SchemaField("field11", "DATE"),
        schema.SchemaField("field12", "TIME"),
        schema.SchemaField("field13", "DATETIME"),
        schema.SchemaField("field14", "GEOGRAPHY"),
    )
    field = schema.SchemaField("ignored_name", bq_type, mode="REPEATED", fields=fields)
    actual = module_under_test.bq_to_arrow_data_type(field)
    expected_value_type = pyarrow.struct(
        (
            pyarrow.field("field01", pyarrow.string()),
            pyarrow.field("field02", pyarrow.binary()),
            pyarrow.field("field03", pyarrow.int64()),
            pyarrow.field("field04", pyarrow.int64()),
            pyarrow.field("field05", pyarrow.float64()),
            pyarrow.field("field06", pyarrow.float64()),
            pyarrow.field("field07", module_under_test.pyarrow_numeric()),
            pyarrow.field("field08", pyarrow.bool_()),
            pyarrow.field("field09", pyarrow.bool_()),
            pyarrow.field("field10", module_under_test.pyarrow_timestamp()),
            pyarrow.field("field11", pyarrow.date32()),
            pyarrow.field("field12", module_under_test.pyarrow_time()),
            pyarrow.field("field13", module_under_test.pyarrow_datetime()),
            pyarrow.field("field14", pyarrow.string()),
        )
    )
    assert pyarrow.types.is_list(actual)
    assert pyarrow.types.is_struct(actual.value_type)
    assert actual.value_type.num_children == len(fields)
    assert actual.value_type.equals(expected_value_type) 
开发者ID:googleapis,项目名称:python-bigquery,代码行数:43,代码来源:test__pandas_helpers.py

示例8: bq_to_arrow_struct_data_type

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import struct [as 别名]
def bq_to_arrow_struct_data_type(field):
    arrow_fields = []
    for subfield in field.fields:
        arrow_subfield = bq_to_arrow_field(subfield)
        if arrow_subfield:
            arrow_fields.append(arrow_subfield)
        else:
            # Could not determine a subfield type. Fallback to type
            # inference.
            return None
    return pyarrow.struct(arrow_fields) 
开发者ID:googleapis,项目名称:python-bigquery,代码行数:13,代码来源:_pandas_helpers.py

示例9: test_iterate_over_timestamp_ntz_chunk

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import struct [as 别名]
def test_iterate_over_timestamp_ntz_chunk():
    random.seed(datetime.datetime.now())
    scale = random.randint(0, 9)
    column_meta = [
        {"logicalType": "TIMESTAMP_NTZ", "scale": str(scale)},
        {"logicalType": "TIMESTAMP_NTZ", "scale": str(scale)}
    ]
    data_type = pyarrow.struct([pyarrow.field('epoch', pyarrow.int64()),
                                pyarrow.field('fraction', pyarrow.int32())]) if scale > 7 else pyarrow.int64()

    def timestamp_ntz_generator(scale):
        epoch = random.randint(-621355968, 2534023007)
        frac = random.randint(0, 10**scale - 1) * (10**(9 - scale)) if scale > 7 else random.randint(0, 10**scale - 1)
        if scale > 7:
            return {'epoch': epoch, 'fraction': frac}
        else:
            epoch = str(epoch)
            frac = str(frac)
            ZEROFILL = '000000000'
            frac = ZEROFILL[:scale - len(frac)] + frac
            return int(epoch + frac) if scale else int(epoch)

    def expected_data_transform_ntz(_scale):
        def expected_data_transform_ntz_impl(data, scale=_scale):
            if scale > 7:
                frac = data['fraction']
                epoch = data['epoch']
                if epoch < 0:
                    epoch += 1
                    frac = 10**9 - frac
                frac = str(int(frac / 10**(9 - scale)))
                ZERO_FILL = '000000000'
                frac = ZERO_FILL[:scale - len(frac)] + frac
                data = int(str(epoch) + frac)

            microsec = str(data)
            if scale > 6:
                microsec = microsec[:-scale] + "." + microsec[-scale:-scale + 6]
            else:
                microsec = microsec[:-scale] + "." + microsec[-scale:] if scale else microsec

            if platform.system() == 'Windows':
                return datetime.datetime.utcfromtimestamp(0) + datetime.timedelta(seconds=(float(microsec)))
            else:
                return datetime.datetime.utcfromtimestamp(float(microsec))

        return expected_data_transform_ntz_impl

    iterate_over_test_chunk([data_type, data_type],
        column_meta, lambda: timestamp_ntz_generator(scale), expected_data_transform_ntz(scale)) 
开发者ID:snowflakedb,项目名称:snowflake-connector-python,代码行数:52,代码来源:test_unit_arrow_chunk_iterator.py

示例10: test_iterate_over_timestamp_ltz_chunk

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import struct [as 别名]
def test_iterate_over_timestamp_ltz_chunk():
    random.seed(datetime.datetime.now())
    scale = random.randint(0, 9)
    column_meta = [
        {"logicalType": "TIMESTAMP_LTZ", "scale": str(scale)},
        {"logicalType": "TIMESTAMP_LTZ", "scale": str(scale)}
    ]
    data_type = pyarrow.struct([pyarrow.field('epoch', pyarrow.int64()),
                                pyarrow.field('fraction', pyarrow.int32())]) if scale > 7 else pyarrow.int64()

    def timestamp_ltz_generator(scale):
        epoch = random.randint(-621355968, 2534023007)
        frac = random.randint(0, 10**scale - 1) * (10**(9 - scale)) if scale > 7 else random.randint(0, 10**scale - 1)
        if scale > 7:
            return {'epoch': epoch, 'fraction': frac}
        else:
            epoch = str(epoch)
            frac = str(frac)
            ZEROFILL = '000000000'
            frac = ZEROFILL[:scale - len(frac)] + frac
            return int(epoch + frac) if scale else int(epoch)

    def expected_data_transform_ltz(_scale):
        def expected_data_transform_ltz_impl(data, scale=_scale):
            tzinfo = get_timezone()   # can put a string parameter here in the future
            if scale > 7:
                frac = data['fraction']
                epoch = data['epoch']
                if epoch < 0:
                    epoch += 1
                    frac = 10**9 - frac
                frac = str(int(frac / 10**(9 - scale)))
                ZERO_FILL = '000000000'
                frac = ZERO_FILL[:scale - len(frac)] + frac
                data = int(str(epoch) + frac)

            microsec = str(data)
            if scale > 6:
                microsec = microsec[:-scale] + "." + microsec[-scale:-scale + 6]
            else:
                microsec = microsec[:-scale] + "." + microsec[-scale:] if scale else microsec

            if platform.system() == 'Windows':
                t0 = datetime.datetime.utcfromtimestamp(0) + datetime.timedelta(seconds=(float(microsec)))
                return pytz.utc.localize(t0, is_dst=False).astimezone(tzinfo)
            else:
                return datetime.datetime.fromtimestamp(float(microsec), tz=tzinfo)

        return expected_data_transform_ltz_impl

    iterate_over_test_chunk([data_type, data_type],
        column_meta, lambda: timestamp_ltz_generator(scale), expected_data_transform_ltz(scale)) 
开发者ID:snowflakedb,项目名称:snowflake-connector-python,代码行数:54,代码来源:test_unit_arrow_chunk_iterator.py

示例11: test_iterate_over_timestamp_tz_chunk

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import struct [as 别名]
def test_iterate_over_timestamp_tz_chunk():
    random.seed(datetime.datetime.now())
    scale = random.randint(0, 9)
    column_meta = [
        {"byteLength": "16" if scale > 3 else "8", "logicalType": "TIMESTAMP_TZ", "scale": str(scale)},
        {"byteLength": "16" if scale > 3 else "8", "logicalType": "TIMESTAMP_TZ", "scale": str(scale)}
    ]

    type1 = pyarrow.struct([pyarrow.field('epoch', pyarrow.int64()),
              pyarrow.field('timezone', pyarrow.int32()),
              pyarrow.field('fraction', pyarrow.int32())])
    type2 = pyarrow.struct([pyarrow.field('epoch', pyarrow.int64()),
              pyarrow.field('timezone', pyarrow.int32())])
    data_type = type1 if scale > 3 else type2

    def timestamp_tz_generator(scale):
        epoch = random.randint(-621355968, 2534023007)
        frac = random.randint(0, 10**scale - 1) * (10**(9 - scale)) if scale > 3 else random.randint(0, 10**scale - 1)
        timezone = random.randint(1, 2879)
        if scale > 3:
            return {'epoch': epoch, 'timezone': timezone, 'fraction': frac}
        else:
            epoch = str(epoch)
            frac = str(frac)
            ZEROFILL = '000000000'
            frac = ZEROFILL[:scale - len(frac)] + frac
            return {'epoch': int(epoch + frac) if scale else int(epoch), 'timezone': timezone}

    def expected_data_transform_tz(_scale):
        def expected_data_transform_tz_impl(data, scale=_scale):
            timezone = data['timezone']
            tzinfo = _generate_tzinfo_from_tzoffset(timezone - 1440)
            epoch = data['epoch']
            if scale > 3:
                frac = data['fraction']
                if epoch < 0:
                    epoch += 1
                    frac = 10**9 - frac
                frac = str(int(frac / 10**(9 - scale)))
                ZERO_FILL = '000000000'
                frac = ZERO_FILL[:scale - len(frac)] + frac
                epoch = int(str(epoch) + frac)

            microsec = str(epoch)
            if scale > 6:
                microsec = microsec[:-scale] + "." + microsec[-scale:-scale + 6]
            else:
                microsec = microsec[:-scale] + "." + microsec[-scale:] if scale else microsec

            if platform.system() == 'Windows':
                t = datetime.datetime.utcfromtimestamp(0) + datetime.timedelta(seconds=(float(microsec)))
                if pytz.utc != tzinfo:
                    t += tzinfo.utcoffset(t)
                return t.replace(tzinfo=tzinfo)
            else:
                return datetime.datetime.fromtimestamp(float(microsec), tz=tzinfo)

        return expected_data_transform_tz_impl

    iterate_over_test_chunk([data_type, data_type],
        column_meta, lambda: timestamp_tz_generator(scale), expected_data_transform_tz(scale)) 
开发者ID:snowflakedb,项目名称:snowflake-connector-python,代码行数:63,代码来源:test_unit_arrow_chunk_iterator.py


注:本文中的pyarrow.struct方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。