当前位置: 首页>>代码示例>>Python>>正文


Python pyarrow.field方法代码示例

本文整理汇总了Python中pyarrow.field方法的典型用法代码示例。如果您正苦于以下问题:Python pyarrow.field方法的具体用法?Python pyarrow.field怎么用?Python pyarrow.field使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyarrow的用法示例。


在下文中一共展示了pyarrow.field方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import field [as 别名]
def __init__(self, name, dataType, nullable=True, metadata=None):
        """
        >>> (StructField("f1", StringType(), True)
        ...      == StructField("f1", StringType(), True))
        True
        >>> (StructField("f1", StringType(), True)
        ...      == StructField("f2", StringType(), True))
        False
        """
        assert isinstance(dataType, DataType),\
            "dataType %s should be an instance of %s" % (dataType, DataType)
        assert isinstance(name, basestring), "field name %s should be string" % (name)
        if not isinstance(name, str):
            name = name.encode('utf-8')
        self.name = name
        self.dataType = dataType
        self.nullable = nullable
        self.metadata = metadata or {} 
开发者ID:runawayhorse001,项目名称:LearningApacheSpark,代码行数:20,代码来源:types.py

示例2: __getitem__

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import field [as 别名]
def __getitem__(self, key):
        """Access fields by name or slice."""
        if isinstance(key, str):
            for field in self:
                if field.name == key:
                    return field
            raise KeyError('No StructField named {0}'.format(key))
        elif isinstance(key, int):
            try:
                return self.fields[key]
            except IndexError:
                raise IndexError('StructType index out of range')
        elif isinstance(key, slice):
            return StructType(self.fields[key])
        else:
            raise TypeError('StructType keys should be strings, integers or slices') 
开发者ID:runawayhorse001,项目名称:LearningApacheSpark,代码行数:18,代码来源:types.py

示例3: test_bq_to_arrow_data_type_w_struct_unknown_subfield

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import field [as 别名]
def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test):
    fields = (
        schema.SchemaField("field1", "STRING"),
        schema.SchemaField("field2", "INTEGER"),
        # Don't know what to convert UNKNOWN_TYPE to, let type inference work,
        # instead.
        schema.SchemaField("field3", "UNKNOWN_TYPE"),
    )
    field = schema.SchemaField("ignored_name", "RECORD", mode="NULLABLE", fields=fields)

    with warnings.catch_warnings(record=True) as warned:
        actual = module_under_test.bq_to_arrow_data_type(field)

    assert actual is None
    assert len(warned) == 1
    warning = warned[0]
    assert "field3" in str(warning) 
开发者ID:googleapis,项目名称:python-bigquery,代码行数:19,代码来源:test__pandas_helpers.py

示例4: test_dataframe_to_arrow_dict_sequence_schema

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import field [as 别名]
def test_dataframe_to_arrow_dict_sequence_schema(module_under_test):
    dict_schema = [
        {"name": "field01", "type": "STRING", "mode": "REQUIRED"},
        {"name": "field02", "type": "BOOL", "mode": "NULLABLE"},
    ]

    dataframe = pandas.DataFrame(
        {"field01": [u"hello", u"world"], "field02": [True, False]}
    )

    arrow_table = module_under_test.dataframe_to_arrow(dataframe, dict_schema)
    arrow_schema = arrow_table.schema

    expected_fields = [
        pyarrow.field("field01", "string", nullable=False),
        pyarrow.field("field02", "bool", nullable=True),
    ]
    assert list(arrow_schema) == expected_fields 
开发者ID:googleapis,项目名称:python-bigquery,代码行数:20,代码来源:test__pandas_helpers.py

示例5: test_range_partitioning

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import field [as 别名]
def test_range_partitioning(self):
        from google.cloud.bigquery.table import RangePartitioning
        from google.cloud.bigquery.table import PartitionRange

        table = self._make_one("proj.dset.tbl")
        assert table.range_partitioning is None

        table.range_partitioning = RangePartitioning(
            field="col1", range_=PartitionRange(start=-512, end=1024, interval=128)
        )
        assert table.range_partitioning.field == "col1"
        assert table.range_partitioning.range_.start == -512
        assert table.range_partitioning.range_.end == 1024
        assert table.range_partitioning.range_.interval == 128

        table.range_partitioning = None
        assert table.range_partitioning is None 
开发者ID:googleapis,项目名称:python-bigquery,代码行数:19,代码来源:test_table.py

示例6: test_time_partitioning_getter

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import field [as 别名]
def test_time_partitioning_getter(self):
        from google.cloud.bigquery.table import TimePartitioning
        from google.cloud.bigquery.table import TimePartitioningType

        dataset = DatasetReference(self.PROJECT, self.DS_ID)
        table_ref = dataset.table(self.TABLE_NAME)
        table = self._make_one(table_ref)

        table._properties["timePartitioning"] = {
            "type": "DAY",
            "field": "col1",
            "expirationMs": "123456",
            "requirePartitionFilter": False,
        }
        self.assertIsInstance(table.time_partitioning, TimePartitioning)
        self.assertEqual(table.time_partitioning.type_, TimePartitioningType.DAY)
        self.assertEqual(table.time_partitioning.field, "col1")
        self.assertEqual(table.time_partitioning.expiration_ms, 123456)

        with warnings.catch_warnings(record=True) as warned:
            self.assertFalse(table.time_partitioning.require_partition_filter)

        assert len(warned) == 1
        self.assertIs(warned[0].category, PendingDeprecationWarning) 
开发者ID:googleapis,项目名称:python-bigquery,代码行数:26,代码来源:test_table.py

示例7: test_time_partitioning_getter_w_empty

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import field [as 别名]
def test_time_partitioning_getter_w_empty(self):
        from google.cloud.bigquery.table import TimePartitioning

        dataset = DatasetReference(self.PROJECT, self.DS_ID)
        table_ref = dataset.table(self.TABLE_NAME)
        table = self._make_one(table_ref)

        # Even though there are required properties according to the API
        # specification, sometimes time partitioning is populated as an empty
        # object. See internal bug 131167013.
        table._properties["timePartitioning"] = {}
        self.assertIsInstance(table.time_partitioning, TimePartitioning)
        self.assertIsNone(table.time_partitioning.type_)
        self.assertIsNone(table.time_partitioning.field)
        self.assertIsNone(table.time_partitioning.expiration_ms)

        with warnings.catch_warnings(record=True) as warned:
            self.assertIsNone(table.time_partitioning.require_partition_filter)

        for warning in warned:
            self.assertIs(warning.category, PendingDeprecationWarning) 
开发者ID:googleapis,项目名称:python-bigquery,代码行数:23,代码来源:test_table.py

示例8: test_from_api_repr_explicit

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import field [as 别名]
def test_from_api_repr_explicit(self):
        from google.cloud.bigquery.table import TimePartitioningType

        klass = self._get_target_class()
        api_repr = {
            "type": "DAY",
            "field": "name",
            "expirationMs": "10000",
            "requirePartitionFilter": True,
        }
        time_partitioning = klass.from_api_repr(api_repr)

        self.assertEqual(time_partitioning.type_, TimePartitioningType.DAY)
        self.assertEqual(time_partitioning.field, "name")
        self.assertEqual(time_partitioning.expiration_ms, 10000)

        with warnings.catch_warnings(record=True) as warned:
            self.assertTrue(time_partitioning.require_partition_filter)

        self.assertIs(warned[0].category, PendingDeprecationWarning) 
开发者ID:googleapis,项目名称:python-bigquery,代码行数:22,代码来源:test_table.py

示例9: test_to_api_repr_explicit

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import field [as 别名]
def test_to_api_repr_explicit(self):
        from google.cloud.bigquery.table import TimePartitioningType

        time_partitioning = self._make_one(
            type_=TimePartitioningType.DAY, field="name", expiration_ms=10000
        )

        with warnings.catch_warnings(record=True) as warned:
            time_partitioning.require_partition_filter = True

        self.assertIs(warned[0].category, PendingDeprecationWarning)

        expected = {
            "type": "DAY",
            "field": "name",
            "expirationMs": "10000",
            "requirePartitionFilter": True,
        }
        self.assertEqual(time_partitioning.to_api_repr(), expected) 
开发者ID:googleapis,项目名称:python-bigquery,代码行数:21,代码来源:test_table.py

示例10: bq_to_arrow_data_type

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import field [as 别名]
def bq_to_arrow_data_type(field):
    """Return the Arrow data type, corresponding to a given BigQuery column.

    Returns:
        None: if default Arrow type inspection should be used.
    """
    if field.mode is not None and field.mode.upper() == "REPEATED":
        inner_type = bq_to_arrow_data_type(
            schema.SchemaField(field.name, field.field_type, fields=field.fields)
        )
        if inner_type:
            return pyarrow.list_(inner_type)
        return None

    field_type_upper = field.field_type.upper() if field.field_type else ""
    if field_type_upper in schema._STRUCT_TYPES:
        return bq_to_arrow_struct_data_type(field)

    data_type_constructor = BQ_TO_ARROW_SCALARS.get(field_type_upper)
    if data_type_constructor is None:
        return None
    return data_type_constructor() 
开发者ID:googleapis,项目名称:python-bigquery,代码行数:24,代码来源:_pandas_helpers.py

示例11: download_arrow_tabledata_list

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import field [as 别名]
def download_arrow_tabledata_list(pages, bq_schema):
    """Use tabledata.list to construct an iterable of RecordBatches.

    Args:
        pages (Iterator[:class:`google.api_core.page_iterator.Page`]):
            An iterator over the result pages.
        bq_schema (Sequence[Union[ \
            :class:`~google.cloud.bigquery.schema.SchemaField`, \
            Mapping[str, Any] \
        ]]):
            A decription of the fields in result pages.
    Yields:
        :class:`pyarrow.RecordBatch`
        The next page of records as a ``pyarrow`` record batch.
    """
    bq_schema = schema._to_schema_fields(bq_schema)
    column_names = bq_to_arrow_schema(bq_schema) or [field.name for field in bq_schema]
    arrow_types = [bq_to_arrow_data_type(field) for field in bq_schema]

    for page in pages:
        yield _tabledata_list_page_to_arrow(page, column_names, arrow_types) 
开发者ID:googleapis,项目名称:python-bigquery,代码行数:23,代码来源:_pandas_helpers.py

示例12: download_dataframe_tabledata_list

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import field [as 别名]
def download_dataframe_tabledata_list(pages, bq_schema, dtypes):
    """Use (slower, but free) tabledata.list to construct a DataFrame.

    Args:
        pages (Iterator[:class:`google.api_core.page_iterator.Page`]):
            An iterator over the result pages.
        bq_schema (Sequence[Union[ \
            :class:`~google.cloud.bigquery.schema.SchemaField`, \
            Mapping[str, Any] \
        ]]):
            A decription of the fields in result pages.
        dtypes(Mapping[str, numpy.dtype]):
            The types of columns in result data to hint construction of the
            resulting DataFrame. Not all column types have to be specified.
    Yields:
        :class:`pandas.DataFrame`
        The next page of records as a ``pandas.DataFrame`` record batch.
    """
    bq_schema = schema._to_schema_fields(bq_schema)
    column_names = [field.name for field in bq_schema]
    for page in pages:
        yield _tabledata_list_page_to_dataframe(page, column_names, dtypes) 
开发者ID:googleapis,项目名称:python-bigquery,代码行数:24,代码来源:_pandas_helpers.py

示例13: get_pa_translated_schema

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import field [as 别名]
def get_pa_translated_schema(self):
        """Translates a BigQuery schema to an parquet schema.

        Returns: Translated parquet schema in pyarrow.Schema format.
        """

        type_conversions = {
            'STRING': pa.string(),
            'NUMERIC': pa.int64(),
        }

        # TODO(annarudy@google.com): add support for nested fields
        pa_schema_list = [
            pa.field(
                bq_field.name,
                type_conversions[bq_field.field_type],
            ) for bq_field in self.bq_schema
        ]

        return pa.schema(pa_schema_list) 
开发者ID:GoogleCloudPlatform,项目名称:professional-services,代码行数:22,代码来源:parquet_util.py

示例14: test_validate_schema_non_overlapping_nulls

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import field [as 别名]
def test_validate_schema_non_overlapping_nulls(df_all_types_schema):
    """
    Test that two schemas with non-overlapping null columns are valid
    """
    first_ix = np.random.randint(len(df_all_types_schema))
    second_ix = first_ix
    while second_ix == first_ix:
        second_ix = np.random.randint(len(df_all_types_schema))

    first_null = pa.field(name=df_all_types_schema.names[first_ix], type=pa.null())
    first_schema = df_all_types_schema.set(first_ix, first_null)

    second_null = pa.field(name=df_all_types_schema.names[second_ix], type=pa.null())
    second_schema = df_all_types_schema.set(second_ix, second_null)

    for schemas in permutations([first_schema, second_schema]):
        reference_schema = validate_compatible(schemas)

        # The reference schema should be the original schema
        # with the columns reconstructed
        assert df_all_types_schema == reference_schema 
开发者ID:JDASoftwareGroup,项目名称:kartothek,代码行数:23,代码来源:test_common_metadata.py

示例15: test_make_meta_column_normalization_pyarrow_schema

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import field [as 别名]
def test_make_meta_column_normalization_pyarrow_schema():
    # GH228
    df = pd.DataFrame(
        [{"part": 1, "id": 1, "col1": "abc"}, {"part": 2, "id": 2, "col1": np.nan}],
        # Kartothek normalizes field order s.t. partition keys are first and the
        # rest is alphabetically. This is reverse.
        columns=["col1", "id", "part"],
    )
    schema = make_meta(
        pa.Schema.from_pandas(df), origin="gh228", partition_keys=["part"]
    )
    fields = [
        pa.field("part", pa.int64()),
        pa.field("col1", pa.string()),
        pa.field("id", pa.int64()),
    ]
    expected_schema = pa.schema(fields)

    assert schema.internal().equals(expected_schema, check_metadata=False) 
开发者ID:JDASoftwareGroup,项目名称:kartothek,代码行数:21,代码来源:test_common_metadata.py


注:本文中的pyarrow.field方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。