Python pyarrow.schema方法代码示例

本文整理汇总了Python中pyarrow.schema方法的典型用法代码示例。如果您正苦于以下问题：Python pyarrow.schema方法的具体用法？Python pyarrow.schema怎么用？Python pyarrow.schema使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyarrow的用法示例。

在下文中一共展示了pyarrow.schema方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _mock_parquet_dataset

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import schema [as 别名]
def _mock_parquet_dataset(partitions, arrow_schema):
    """Creates a pyarrow.ParquetDataset mock capable of returning:

        parquet_dataset.pieces[0].get_metadata(parquet_dataset.fs.open).schema.to_arrow_schema() == schema
        parquet_dataset.partitions = partitions

    :param partitions: expected to be a list of pa.parquet.PartitionSet
    :param arrow_schema: an instance of pa.arrow_schema to be assumed by the mock parquet dataset object.
    :return:
    """
    piece_mock = mock.Mock()
    piece_mock.get_metadata().schema.to_arrow_schema.return_value = arrow_schema

    dataset_mock = mock.Mock()
    type(dataset_mock).pieces = mock.PropertyMock(return_value=[piece_mock])
    type(dataset_mock).partitions = partitions

    return dataset_mock

开发者ID:uber，项目名称:petastorm，代码行数:20，代码来源:test_unischema.py

示例2: jsonValue

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import schema [as 别名]
def jsonValue(self):
        if self.scalaUDT():
            assert self.module() != '__main__', 'UDT in __main__ cannot work with ScalaUDT'
            schema = {
                "type": "udt",
                "class": self.scalaUDT(),
                "pyClass": "%s.%s" % (self.module(), type(self).__name__),
                "sqlType": self.sqlType().jsonValue()
            }
        else:
            ser = CloudPickleSerializer()
            b = ser.dumps(type(self))
            schema = {
                "type": "udt",
                "pyClass": "%s.%s" % (self.module(), type(self).__name__),
                "serializedClass": base64.b64encode(b).decode('utf8'),
                "sqlType": self.sqlType().jsonValue()
            }
        return schema

开发者ID:runawayhorse001，项目名称:LearningApacheSpark，代码行数:21，代码来源:types.py

示例3: _infer_schema

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import schema [as 别名]
def _infer_schema(row, names=None):
    """Infer the schema from dict/namedtuple/object"""
    if isinstance(row, dict):
        items = sorted(row.items())

    elif isinstance(row, (tuple, list)):
        if hasattr(row, "__fields__"):  # Row
            items = zip(row.__fields__, tuple(row))
        elif hasattr(row, "_fields"):  # namedtuple
            items = zip(row._fields, tuple(row))
        else:
            if names is None:
                names = ['_%d' % i for i in range(1, len(row) + 1)]
            elif len(names) < len(row):
                names.extend('_%d' % i for i in range(len(names) + 1, len(row) + 1))
            items = zip(names, row)

    elif hasattr(row, "__dict__"):  # object
        items = sorted(row.__dict__.items())

    else:
        raise TypeError("Can not infer schema for type: %s" % type(row))

    fields = [StructField(k, _infer_type(v), True) for k, v in items]
    return StructType(fields)

开发者ID:runawayhorse001，项目名称:LearningApacheSpark，代码行数:27，代码来源:types.py

示例4: test__row_from_mapping_w_invalid_schema

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import schema [as 别名]
def test__row_from_mapping_w_invalid_schema(self):
        from google.cloud.bigquery.schema import SchemaField
        from google.cloud.bigquery.table import Table

        MAPPING = {
            "full_name": "Phred Phlyntstone",
            "age": 32,
            "colors": ["red", "green"],
            "bogus": "WHATEVER",
        }
        dataset = DatasetReference(self.PROJECT, self.DS_ID)
        table_ref = dataset.table(self.TABLE_NAME)
        full_name = SchemaField("full_name", "STRING", mode="REQUIRED")
        age = SchemaField("age", "INTEGER", mode="REQUIRED")
        colors = SchemaField("colors", "DATETIME", mode="REPEATED")
        bogus = SchemaField("joined", "STRING", mode="BOGUS")
        table = Table(table_ref, schema=[full_name, age, colors, bogus])

        with self.assertRaises(ValueError) as exc:
            self._call_fut(MAPPING, table.schema)

        self.assertIn("Unknown field mode: BOGUS", str(exc.exception))

开发者ID:googleapis，项目名称:python-bigquery，代码行数:24，代码来源:test_table.py

示例5: test__row_from_mapping_w_schema

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import schema [as 别名]
def test__row_from_mapping_w_schema(self):
        from google.cloud.bigquery.schema import SchemaField
        from google.cloud.bigquery.table import Table

        MAPPING = {
            "full_name": "Phred Phlyntstone",
            "age": 32,
            "colors": ["red", "green"],
            "extra": "IGNORED",
        }
        dataset = DatasetReference(self.PROJECT, self.DS_ID)
        table_ref = dataset.table(self.TABLE_NAME)
        full_name = SchemaField("full_name", "STRING", mode="REQUIRED")
        age = SchemaField("age", "INTEGER", mode="REQUIRED")
        colors = SchemaField("colors", "DATETIME", mode="REPEATED")
        joined = SchemaField("joined", "STRING", mode="NULLABLE")
        table = Table(table_ref, schema=[full_name, age, colors, joined])

        self.assertEqual(
            self._call_fut(MAPPING, table.schema),
            ("Phred Phlyntstone", 32, ["red", "green"], None),
        )

开发者ID:googleapis，项目名称:python-bigquery，代码行数:24，代码来源:test_table.py

示例6: test_to_dataframe_iterable_error_if_pandas_is_none

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import schema [as 别名]
def test_to_dataframe_iterable_error_if_pandas_is_none(self):
        from google.cloud.bigquery.schema import SchemaField

        schema = [
            SchemaField("name", "STRING", mode="REQUIRED"),
            SchemaField("age", "INTEGER", mode="REQUIRED"),
        ]
        rows = [
            {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
            {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
        ]
        path = "/foo"
        api_request = mock.Mock(return_value={"rows": rows})
        row_iterator = self._make_one(_mock_client(), api_request, path, schema)

        with pytest.raises(ValueError, match="pandas"):
            row_iterator.to_dataframe_iterable()

开发者ID:googleapis，项目名称:python-bigquery，代码行数:19，代码来源:test_table.py

示例7: test_to_dataframe

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import schema [as 别名]
def test_to_dataframe(self):
        from google.cloud.bigquery.schema import SchemaField

        schema = [
            SchemaField("name", "STRING", mode="REQUIRED"),
            SchemaField("age", "INTEGER", mode="REQUIRED"),
        ]
        rows = [
            {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
            {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
            {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]},
            {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]},
        ]
        path = "/foo"
        api_request = mock.Mock(return_value={"rows": rows})
        row_iterator = self._make_one(_mock_client(), api_request, path, schema)

        df = row_iterator.to_dataframe(create_bqstorage_client=False)

        self.assertIsInstance(df, pandas.DataFrame)
        self.assertEqual(len(df), 4)  # verify the number of rows
        self.assertEqual(list(df), ["name", "age"])  # verify the column names
        self.assertEqual(df.name.dtype.name, "object")
        self.assertEqual(df.age.dtype.name, "int64")

开发者ID:googleapis，项目名称:python-bigquery，代码行数:26，代码来源:test_table.py

示例8: test_to_dataframe_no_tqdm_no_progress_bar

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import schema [as 别名]
def test_to_dataframe_no_tqdm_no_progress_bar(self):
        from google.cloud.bigquery.schema import SchemaField

        schema = [
            SchemaField("name", "STRING", mode="REQUIRED"),
            SchemaField("age", "INTEGER", mode="REQUIRED"),
        ]
        rows = [
            {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
            {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
            {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]},
            {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]},
        ]
        path = "/foo"
        api_request = mock.Mock(return_value={"rows": rows})
        row_iterator = self._make_one(_mock_client(), api_request, path, schema)

        with warnings.catch_warnings(record=True) as warned:
            df = row_iterator.to_dataframe(create_bqstorage_client=False)

        self.assertEqual(len(warned), 0)
        self.assertEqual(len(df), 4)

开发者ID:googleapis，项目名称:python-bigquery，代码行数:24，代码来源:test_table.py

示例9: test_to_dataframe_w_empty_results_wo_pyarrow

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import schema [as 别名]
def test_to_dataframe_w_empty_results_wo_pyarrow(self):
        from google.cloud.bigquery.schema import SchemaField

        with mock.patch("google.cloud.bigquery.table.pyarrow", None):
            schema = [
                SchemaField("name", "STRING", mode="REQUIRED"),
                SchemaField("age", "INTEGER", mode="REQUIRED"),
            ]
            api_request = mock.Mock(return_value={"rows": []})
            row_iterator = self._make_one(_mock_client(), api_request, schema=schema)

            df = row_iterator.to_dataframe()

            self.assertIsInstance(df, pandas.DataFrame)
            self.assertEqual(len(df), 0)  # verify the number of rows
            self.assertEqual(list(df), ["name", "age"])  # verify the column names

开发者ID:googleapis，项目名称:python-bigquery，代码行数:18，代码来源:test_table.py

示例10: test_to_dataframe_w_no_results_wo_pyarrow

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import schema [as 别名]
def test_to_dataframe_w_no_results_wo_pyarrow(self):
        from google.cloud.bigquery.schema import SchemaField

        with mock.patch("google.cloud.bigquery.table.pyarrow", None):
            schema = [
                SchemaField("name", "STRING", mode="REQUIRED"),
                SchemaField("age", "INTEGER", mode="REQUIRED"),
            ]
            api_request = mock.Mock(return_value={"rows": []})
            row_iterator = self._make_one(_mock_client(), api_request, schema=schema)

            def empty_iterable(dtypes=None):
                return []

            row_iterator.to_dataframe_iterable = empty_iterable

            df = row_iterator.to_dataframe()

            self.assertIsInstance(df, pandas.DataFrame)
            self.assertEqual(len(df), 0)  # verify the number of rows
            self.assertEqual(list(df), ["name", "age"])  # verify the column names

开发者ID:googleapis，项目名称:python-bigquery，代码行数:23，代码来源:test_table.py

示例11: test_to_dataframe_error_if_pandas_is_none

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import schema [as 别名]
def test_to_dataframe_error_if_pandas_is_none(self):
        from google.cloud.bigquery.schema import SchemaField

        schema = [
            SchemaField("name", "STRING", mode="REQUIRED"),
            SchemaField("age", "INTEGER", mode="REQUIRED"),
        ]
        rows = [
            {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
            {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
        ]
        path = "/foo"
        api_request = mock.Mock(return_value={"rows": rows})
        row_iterator = self._make_one(_mock_client(), api_request, path, schema)

        with self.assertRaises(ValueError):
            row_iterator.to_dataframe()

开发者ID:googleapis，项目名称:python-bigquery，代码行数:19，代码来源:test_table.py

示例12: test_to_dataframe_w_bqstorage_no_streams

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import schema [as 别名]
def test_to_dataframe_w_bqstorage_no_streams(self):
        from google.cloud.bigquery import schema
        from google.cloud.bigquery import table as mut

        bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient)
        session = bigquery_storage_v1.types.ReadSession()
        bqstorage_client.create_read_session.return_value = session

        row_iterator = mut.RowIterator(
            _mock_client(),
            api_request=None,
            path=None,
            schema=[
                schema.SchemaField("colA", "INTEGER"),
                schema.SchemaField("colC", "FLOAT"),
                schema.SchemaField("colB", "STRING"),
            ],
            table=mut.TableReference.from_string("proj.dset.tbl"),
        )

        got = row_iterator.to_dataframe(bqstorage_client)
        column_names = ["colA", "colC", "colB"]
        self.assertEqual(list(got), column_names)
        self.assertTrue(got.empty)

开发者ID:googleapis，项目名称:python-bigquery，代码行数:26，代码来源:test_table.py

示例13: test_to_dataframe_w_bqstorage_partition

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import schema [as 别名]
def test_to_dataframe_w_bqstorage_partition(self):
        from google.cloud.bigquery import schema
        from google.cloud.bigquery import table as mut

        bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient)

        row_iterator = mut.RowIterator(
            _mock_client(),
            None,  # api_request: ignored
            None,  # path: ignored
            [schema.SchemaField("colA", "IGNORED")],
            table=mut.TableReference.from_string("proj.dset.tbl$20181225"),
        )

        with pytest.raises(ValueError):
            row_iterator.to_dataframe(bqstorage_client)

开发者ID:googleapis，项目名称:python-bigquery，代码行数:18，代码来源:test_table.py

示例14: test_to_dataframe_w_bqstorage_snapshot

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import schema [as 别名]
def test_to_dataframe_w_bqstorage_snapshot(self):
        from google.cloud.bigquery import schema
        from google.cloud.bigquery import table as mut

        bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient)

        row_iterator = mut.RowIterator(
            _mock_client(),
            None,  # api_request: ignored
            None,  # path: ignored
            [schema.SchemaField("colA", "IGNORED")],
            table=mut.TableReference.from_string("proj.dset.tbl@1234567890000"),
        )

        with pytest.raises(ValueError):
            row_iterator.to_dataframe(bqstorage_client)

开发者ID:googleapis，项目名称:python-bigquery，代码行数:18，代码来源:test_table.py

示例15: bq_to_arrow_data_type

# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import schema [as 别名]
def bq_to_arrow_data_type(field):
    """Return the Arrow data type, corresponding to a given BigQuery column.

    Returns:
        None: if default Arrow type inspection should be used.
    """
    if field.mode is not None and field.mode.upper() == "REPEATED":
        inner_type = bq_to_arrow_data_type(
            schema.SchemaField(field.name, field.field_type, fields=field.fields)
        )
        if inner_type:
            return pyarrow.list_(inner_type)
        return None

    field_type_upper = field.field_type.upper() if field.field_type else ""
    if field_type_upper in schema._STRUCT_TYPES:
        return bq_to_arrow_struct_data_type(field)

    data_type_constructor = BQ_TO_ARROW_SCALARS.get(field_type_upper)
    if data_type_constructor is None:
        return None
    return data_type_constructor()

开发者ID:googleapis，项目名称:python-bigquery，代码行数:24，代码来源:_pandas_helpers.py

注：本文中的pyarrow.schema方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。