本文整理汇总了Python中pyarrow.Table方法的典型用法代码示例。如果您正苦于以下问题:Python pyarrow.Table方法的具体用法?Python pyarrow.Table怎么用?Python pyarrow.Table使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyarrow
的用法示例。
在下文中一共展示了pyarrow.Table方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _make_arrow_dataset
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Table [as 别名]
def _make_arrow_dataset(self, edges: pa.Table, nodes: pa.Table, name: str, description: str) -> ArrowUploader:
au = ArrowUploader(
server_base_path=PyGraphistry.protocol() + '://' + PyGraphistry.server(),
edges=edges, nodes=nodes,
name=name, description=description,
metadata={
'usertag': PyGraphistry._tag,
'key': PyGraphistry.api_key(),
'agent': 'pygraphistry',
'apiversion' : '3',
'agentversion': sys.modules['graphistry'].__version__,
},
certificate_validation=PyGraphistry.certificate_validation())
au.edge_encodings = au.g_to_edge_encodings(self)
au.node_encodings = au.g_to_node_encodings(self)
return au
示例2: tearDown
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Table [as 别名]
def tearDown(self):
def _still_in_use(bad_request):
return any(
error["reason"] == "resourceInUse" for error in bad_request._errors
)
retry_in_use = RetryErrors(BadRequest, error_predicate=_still_in_use)
retry_storage_errors_conflict = RetryErrors(
(Conflict, TooManyRequests, InternalServerError, ServiceUnavailable)
)
for doomed in self.to_delete:
if isinstance(doomed, storage.Bucket):
retry_storage_errors_conflict(doomed.delete)(force=True)
elif isinstance(doomed, (Dataset, bigquery.DatasetReference)):
retry_in_use(Config.CLIENT.delete_dataset)(doomed, delete_contents=True)
elif isinstance(doomed, (Table, bigquery.TableReference)):
retry_in_use(Config.CLIENT.delete_table)(doomed)
else:
doomed.delete()
示例3: test_create_table_w_time_partitioning_w_clustering_fields
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Table [as 别名]
def test_create_table_w_time_partitioning_w_clustering_fields(self):
from google.cloud.bigquery.table import TimePartitioning
from google.cloud.bigquery.table import TimePartitioningType
dataset = self.temp_dataset(_make_dataset_id("create_table_tp_cf"))
table_id = "test_table"
table_arg = Table(
dataset.table(table_id), schema=TIME_PARTITIONING_CLUSTERING_FIELDS_SCHEMA
)
self.assertFalse(_table_exists(table_arg))
table_arg.time_partitioning = TimePartitioning(field="transaction_time")
table_arg.clustering_fields = ["user_email", "store_code"]
table = retry_403(Config.CLIENT.create_table)(table_arg)
self.to_delete.insert(0, table)
self.assertTrue(_table_exists(table))
self.assertEqual(table.table_id, table_id)
time_partitioning = table.time_partitioning
self.assertEqual(time_partitioning.type_, TimePartitioningType.DAY)
self.assertEqual(time_partitioning.field, "transaction_time")
self.assertEqual(table.clustering_fields, ["user_email", "store_code"])
示例4: test_update_table_schema
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Table [as 别名]
def test_update_table_schema(self):
dataset = self.temp_dataset(_make_dataset_id("update_table"))
TABLE_NAME = "test_table"
table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA)
self.assertFalse(_table_exists(table_arg))
table = retry_403(Config.CLIENT.create_table)(table_arg)
self.to_delete.insert(0, table)
self.assertTrue(_table_exists(table))
voter = bigquery.SchemaField("voter", "BOOLEAN", mode="NULLABLE")
schema = table.schema
schema.append(voter)
table.schema = schema
updated_table = Config.CLIENT.update_table(table, ["schema"])
self.assertEqual(len(updated_table.schema), len(schema))
for found, expected in zip(updated_table.schema, schema):
self.assertEqual(found.name, expected.name)
self.assertEqual(found.field_type, expected.field_type)
self.assertEqual(found.mode, expected.mode)
示例5: test_job_cancel
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Table [as 别名]
def test_job_cancel(self):
DATASET_ID = _make_dataset_id("job_cancel")
JOB_ID_PREFIX = "fetch_" + DATASET_ID
TABLE_NAME = "test_table"
QUERY = "SELECT * FROM %s.%s" % (DATASET_ID, TABLE_NAME)
dataset = self.temp_dataset(DATASET_ID)
table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA)
table = retry_403(Config.CLIENT.create_table)(table_arg)
self.to_delete.insert(0, table)
job = Config.CLIENT.query(QUERY, job_id_prefix=JOB_ID_PREFIX)
job.cancel()
retry = RetryInstanceState(_job_done, max_tries=8)
retry(job.reload)()
# The `cancel` API doesn't leave any reliable traces on
# the status of the job resource, so we can't really assert for
# them here. The best we can do is not that the API call didn't
# raise an error, and that the job completed (in the `retry()`
# above).
示例6: test_time_partitioning_setter
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Table [as 别名]
def test_time_partitioning_setter(self):
from google.cloud.bigquery.table import TimePartitioning
from google.cloud.bigquery.table import TimePartitioningType
dataset = DatasetReference(self.PROJECT, self.DS_ID)
table_ref = dataset.table(self.TABLE_NAME)
table = self._make_one(table_ref)
time_partitioning = TimePartitioning(type_=TimePartitioningType.HOUR)
table.time_partitioning = time_partitioning
self.assertEqual(table.time_partitioning.type_, TimePartitioningType.HOUR)
# Both objects point to the same properties dict
self.assertIs(
table._properties["timePartitioning"], time_partitioning._properties
)
time_partitioning.expiration_ms = 10000
# Changes to TimePartitioning object are reflected in Table properties
self.assertEqual(
table.time_partitioning.expiration_ms, time_partitioning.expiration_ms
)
示例7: test__row_from_mapping_w_invalid_schema
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Table [as 别名]
def test__row_from_mapping_w_invalid_schema(self):
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import Table
MAPPING = {
"full_name": "Phred Phlyntstone",
"age": 32,
"colors": ["red", "green"],
"bogus": "WHATEVER",
}
dataset = DatasetReference(self.PROJECT, self.DS_ID)
table_ref = dataset.table(self.TABLE_NAME)
full_name = SchemaField("full_name", "STRING", mode="REQUIRED")
age = SchemaField("age", "INTEGER", mode="REQUIRED")
colors = SchemaField("colors", "DATETIME", mode="REPEATED")
bogus = SchemaField("joined", "STRING", mode="BOGUS")
table = Table(table_ref, schema=[full_name, age, colors, bogus])
with self.assertRaises(ValueError) as exc:
self._call_fut(MAPPING, table.schema)
self.assertIn("Unknown field mode: BOGUS", str(exc.exception))
示例8: test__row_from_mapping_w_schema
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Table [as 别名]
def test__row_from_mapping_w_schema(self):
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import Table
MAPPING = {
"full_name": "Phred Phlyntstone",
"age": 32,
"colors": ["red", "green"],
"extra": "IGNORED",
}
dataset = DatasetReference(self.PROJECT, self.DS_ID)
table_ref = dataset.table(self.TABLE_NAME)
full_name = SchemaField("full_name", "STRING", mode="REQUIRED")
age = SchemaField("age", "INTEGER", mode="REQUIRED")
colors = SchemaField("colors", "DATETIME", mode="REPEATED")
joined = SchemaField("joined", "STRING", mode="NULLABLE")
table = Table(table_ref, schema=[full_name, age, colors, joined])
self.assertEqual(
self._call_fut(MAPPING, table.schema),
("Phred Phlyntstone", 32, ["red", "green"], None),
)
示例9: test_to_dataframe_w_bqstorage_logs_session
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Table [as 别名]
def test_to_dataframe_w_bqstorage_logs_session(self):
from google.cloud.bigquery.table import Table
bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient)
session = bigquery_storage_v1.types.ReadSession()
session.name = "projects/test-proj/locations/us/sessions/SOMESESSION"
bqstorage_client.create_read_session.return_value = session
mock_logger = mock.create_autospec(logging.Logger)
row_iterator = self._make_one(
_mock_client(), table=Table("debug-proj.debug_dset.debug_tbl")
)
with mock.patch("google.cloud.bigquery._pandas_helpers._LOGGER", mock_logger):
row_iterator.to_dataframe(bqstorage_client=bqstorage_client)
mock_logger.debug.assert_any_call(
"Started reading table 'debug-proj.debug_dset.debug_tbl' "
"with BQ Storage API session 'projects/test-proj/locations/us/sessions/SOMESESSION'."
)
示例10: test_table_reference_to_bqstorage_v1beta1
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Table [as 别名]
def test_table_reference_to_bqstorage_v1beta1():
from google.cloud.bigquery import table as mut
# Can't use parametrized pytest because bigquery_storage_v1beta1 may not be
# available.
expected = bigquery_storage_v1beta1.types.TableReference(
project_id="my-project", dataset_id="my_dataset", table_id="my_table"
)
cases = (
"my-project.my_dataset.my_table",
"my-project.my_dataset.my_table$20181225",
"my-project.my_dataset.my_table@1234567890",
"my-project.my_dataset.my_table$20181225@1234567890",
)
classes = (mut.TableReference, mut.Table, mut.TableListItem)
for case, cls in itertools.product(cases, classes):
got = cls.from_string(case).to_bqstorage(v1beta1=True)
assert got == expected
示例11: from_api_repr
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Table [as 别名]
def from_api_repr(cls, resource):
"""Factory: construct a table reference given its API representation
Args:
resource (Dict[str, object]):
Table reference representation returned from the API
Returns:
google.cloud.bigquery.table.TableReference:
Table reference parsed from ``resource``.
"""
from google.cloud.bigquery.dataset import DatasetReference
project = resource["projectId"]
dataset_id = resource["datasetId"]
table_id = resource["tableId"]
return cls(DatasetReference(project, dataset_id), table_id)
示例12: schema
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Table [as 别名]
def schema(self):
"""Sequence[Union[ \
:class:`~google.cloud.bigquery.schema.SchemaField`, \
Mapping[str, Any] \
]]:
Table's schema.
Raises:
Exception:
If ``schema`` is not a sequence, or if any item in the sequence
is not a :class:`~google.cloud.bigquery.schema.SchemaField`
instance or a compatible mapping representation of the field.
"""
prop = self._properties.get("schema")
if not prop:
return []
else:
return _parse_schema_resource(prop)
示例13: range_partitioning
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Table [as 别名]
def range_partitioning(self):
"""Optional[google.cloud.bigquery.table.RangePartitioning]:
Configures range-based partitioning for a table.
.. note::
**Beta**. The integer range partitioning feature is in a
pre-release state and might change or have limited support.
Only specify at most one of
:attr:`~google.cloud.bigquery.table.Table.time_partitioning` or
:attr:`~google.cloud.bigquery.table.Table.range_partitioning`.
Raises:
ValueError:
If the value is not
:class:`~google.cloud.bigquery.table.RangePartitioning` or
:data:`None`.
"""
resource = self._properties.get("rangePartitioning")
if resource is not None:
return RangePartitioning(_properties=resource)
示例14: time_partitioning
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Table [as 别名]
def time_partitioning(self):
"""Optional[google.cloud.bigquery.table.TimePartitioning]: Configures time-based
partitioning for a table.
Only specify at most one of
:attr:`~google.cloud.bigquery.table.Table.time_partitioning` or
:attr:`~google.cloud.bigquery.table.Table.range_partitioning`.
Raises:
ValueError:
If the value is not
:class:`~google.cloud.bigquery.table.TimePartitioning` or
:data:`None`.
"""
prop = self._properties.get("timePartitioning")
if prop is not None:
return TimePartitioning.from_api_repr(prop)
示例15: from_string
# 需要导入模块: import pyarrow [as 别名]
# 或者: from pyarrow import Table [as 别名]
def from_string(cls, full_table_id):
"""Construct a table from fully-qualified table ID.
Args:
full_table_id (str):
A fully-qualified table ID in standard SQL format. Must
included a project ID, dataset ID, and table ID, each
separated by ``.``.
Returns:
Table: Table parsed from ``full_table_id``.
Examples:
>>> Table.from_string('my-project.mydataset.mytable')
Table(TableRef...(D...('my-project', 'mydataset'), 'mytable'))
Raises:
ValueError:
If ``full_table_id`` is not a fully-qualified table ID in
standard SQL format.
"""
return cls(TableReference.from_string(full_table_id))