本文整理汇总了Python中google.cloud.bigquery.DatasetReference方法的典型用法代码示例。如果您正苦于以下问题:Python bigquery.DatasetReference方法的具体用法?Python bigquery.DatasetReference怎么用?Python bigquery.DatasetReference使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类google.cloud.bigquery
的用法示例。
在下文中一共展示了bigquery.DatasetReference方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_list_rows_as_dataframe
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import DatasetReference [as 别名]
def test_list_rows_as_dataframe(client):
# [START bigquery_list_rows_dataframe]
# from google.cloud import bigquery
# client = bigquery.Client()
project = "bigquery-public-data"
dataset_id = "samples"
dataset_ref = bigquery.DatasetReference(project, dataset_id)
table_ref = dataset_ref.table("shakespeare")
table = client.get_table(table_ref)
df = client.list_rows(table).to_dataframe()
# [END bigquery_list_rows_dataframe]
assert isinstance(df, pandas.DataFrame)
assert len(list(df)) == len(table.schema) # verify the number of columns
assert len(df) == table.num_rows # verify the number of rows
示例2: tearDown
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import DatasetReference [as 别名]
def tearDown(self):
def _still_in_use(bad_request):
return any(
error["reason"] == "resourceInUse" for error in bad_request._errors
)
retry_in_use = RetryErrors(BadRequest, error_predicate=_still_in_use)
retry_storage_errors_conflict = RetryErrors(
(Conflict, TooManyRequests, InternalServerError, ServiceUnavailable)
)
for doomed in self.to_delete:
if isinstance(doomed, storage.Bucket):
retry_storage_errors_conflict(doomed.delete)(force=True)
elif isinstance(doomed, (Dataset, bigquery.DatasetReference)):
retry_in_use(Config.CLIENT.delete_dataset)(doomed, delete_contents=True)
elif isinstance(doomed, (Table, bigquery.TableReference)):
retry_in_use(Config.CLIENT.delete_table)(doomed)
else:
doomed.delete()
示例3: test_get_table_w_public_dataset
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import DatasetReference [as 别名]
def test_get_table_w_public_dataset(self):
public = "bigquery-public-data"
dataset_id = "samples"
table_id = "shakespeare"
table_ref = DatasetReference(public, dataset_id).table(table_id)
# Get table with reference.
table = Config.CLIENT.get_table(table_ref)
self.assertEqual(table.table_id, table_id)
self.assertEqual(table.dataset_id, dataset_id)
self.assertEqual(table.project, public)
schema_names = [field.name for field in table.schema]
self.assertEqual(schema_names, ["word", "word_count", "corpus", "corpus_date"])
# Get table with string.
table = Config.CLIENT.get_table("{}.{}.{}".format(public, dataset_id, table_id))
self.assertEqual(table.table_id, table_id)
self.assertEqual(table.dataset_id, dataset_id)
self.assertEqual(table.project, public)
示例4: test_extract_table
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import DatasetReference [as 别名]
def test_extract_table(self):
local_id = unique_resource_id()
bucket_name = "bq_extract_test" + local_id
source_blob_name = "person_ages.csv"
dataset_id = _make_dataset_id("load_gcs_then_extract")
table_id = "test_table"
project = Config.CLIENT.project
dataset_ref = bigquery.DatasetReference(project, dataset_id)
table_ref = dataset_ref.table(table_id)
table = Table(table_ref)
self.to_delete.insert(0, table)
bucket = self._create_bucket(bucket_name)
self._load_table_for_extract_table(bucket, source_blob_name, table_ref, ROWS)
destination_blob_name = "person_ages_out.csv"
destination = bucket.blob(destination_blob_name)
destination_uri = "gs://{}/person_ages_out.csv".format(bucket_name)
job = Config.CLIENT.extract_table(table_ref, destination_uri)
job.result(timeout=100)
self.to_delete.insert(0, destination)
got_bytes = retry_storage_errors(destination.download_as_string)()
got = got_bytes.decode("utf-8")
self.assertIn("Bharney Rhubble", got)
示例5: test_copy_table
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import DatasetReference [as 别名]
def test_copy_table(self):
# If we create a new table to copy from, the test won't work
# because the new rows will be stored in the streaming buffer,
# and copy jobs don't read the streaming buffer.
# We could wait for the streaming buffer to empty, but that could
# take minutes. Instead we copy a small public table.
source_dataset = DatasetReference("bigquery-public-data", "samples")
source_ref = source_dataset.table("shakespeare")
dest_dataset = self.temp_dataset(_make_dataset_id("copy_table"))
dest_ref = dest_dataset.table("destination_table")
job_config = bigquery.CopyJobConfig()
job = Config.CLIENT.copy_table(source_ref, dest_ref, job_config=job_config)
job.result()
dest_table = Config.CLIENT.get_table(dest_ref)
self.to_delete.insert(0, dest_table)
# Just check that we got some rows.
got_rows = self._fetch_single_page(dest_table)
self.assertTrue(len(got_rows) > 0)
示例6: execute_transformation_query
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import DatasetReference [as 别名]
def execute_transformation_query(bq_client):
"""Executes transformation query to a new destination table.
Args:
bq_client: Object representing a reference to a BigQuery Client
"""
dataset_ref = bq_client.get_dataset(bigquery.DatasetReference(
project=config.config_vars['billing_project_id'],
dataset_id=config.config_vars['output_dataset_id']))
table_ref = dataset_ref.table(config.config_vars['output_table_name'])
job_config = bigquery.QueryJobConfig()
job_config.destination = table_ref
job_config.write_disposition = bigquery.WriteDisposition().WRITE_TRUNCATE
job_config.time_partitioning = bigquery.TimePartitioning(
field='usage_start_time',
expiration_ms=None)
sql = file_to_string(config.config_vars['sql_file_path'])
sql = sql.format(**config.config_vars)
logging.info('Attempting query on all dates...')
# Execute Query
query_job = bq_client.query(
sql,
job_config=job_config)
query_job.result() # Waits for the query to finish
logging.info('Transformation query complete. All partitions are updated.')
示例7: __init__
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import DatasetReference [as 别名]
def __init__(self):
if constants.ON_LOCAL:
return
self._client = bigquery.Client()
self._dataset_ref = bigquery.DatasetReference(
self._client.project, constants.BIGQUERY_DATASET_NAME)
示例8: setUp
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import DatasetReference [as 别名]
def setUp(self):
super(BigQueryClientTest, self).setUp()
bq_patcher = mock.patch.object(gcloud_bq, 'Client', autospec=True)
self.addCleanup(bq_patcher.stop)
self.bq_mock = bq_patcher.start()
self.dataset_ref = mock.Mock(spec=gcloud_bq.DatasetReference)
self.table = mock.Mock(spec=gcloud_bq.Table)
self.table.schema = []
self.dataset_ref.table.return_value = self.table
with mock.patch.object(
bigquery.BigQueryClient, '__init__', return_value=None):
self.client = bigquery.BigQueryClient()
self.client._client = self.bq_mock()
self.client._dataset_ref = self.dataset_ref
self.client._client.insert_rows.return_value = None
self.client._client.get_table.return_value = self.table
self.nested_schema = [
gcloud_bq.SchemaField('nested_string_attribute', 'STRING', 'NULLABLE')]
self.entity_schema = [
gcloud_bq.SchemaField('string_attribute', 'STRING', 'NULLABLE'),
gcloud_bq.SchemaField('integer_attribute', 'INTEGER', 'NULLABLE'),
gcloud_bq.SchemaField('boolean_attribute', 'BOOLEAN', 'NULLABLE'),
gcloud_bq.SchemaField(
'nested_attribute', 'RECORD', 'NULLABLE', fields=self.nested_schema)
]
test_device = device_model.Device(
serial_number='abc123', chrome_device_id='123123')
test_device.put()
test_row = bigquery_row_model.BigQueryRow.add(
test_device, datetime.datetime.utcnow(),
loanertest.USER_EMAIL, 'Enroll', 'This is a test')
self.test_row_dict = test_row.to_json_dict()
self.test_table = [(self.test_row_dict['ndb_key'],
self.test_row_dict['timestamp'],
self.test_row_dict['actor'],
self.test_row_dict['method'],
self.test_row_dict['summary'],
self.test_row_dict['entity'])]
示例9: temp_dataset
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import DatasetReference [as 别名]
def temp_dataset():
from google.cloud import bigquery
client = bigquery.Client()
dataset_id = "temp_dataset_{}".format(int(time.time() * 1000))
dataset_ref = bigquery.DatasetReference(client.project, dataset_id)
dataset = client.create_dataset(bigquery.Dataset(dataset_ref))
yield dataset
client.delete_dataset(dataset, delete_contents=True)
示例10: test_query_succeed
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import DatasetReference [as 别名]
def test_query_succeed(self, mock_client,
mock_kfp_context, mock_dump_json, mock_display):
mock_kfp_context().__enter__().context_id.return_value = 'ctx1'
mock_client().get_job.side_effect = exceptions.NotFound('not found')
mock_dataset = bigquery.DatasetReference('project-1', 'dataset-1')
mock_client().dataset.return_value = mock_dataset
mock_client().get_dataset.side_effect = exceptions.NotFound('not found')
mock_response = {
'configuration': {
'query': {
'query': 'SELECT * FROM table_1'
}
}
}
mock_client().query.return_value.to_api_repr.return_value = mock_response
result = query('SELECT * FROM table_1', 'project-1', 'dataset-1',
output_gcs_path='gs://output/path')
self.assertEqual(mock_response, result)
mock_client().create_dataset.assert_called()
expected_job_config = bigquery.QueryJobConfig()
expected_job_config.create_disposition = bigquery.job.CreateDisposition.CREATE_IF_NEEDED
expected_job_config.write_disposition = bigquery.job.WriteDisposition.WRITE_TRUNCATE
expected_job_config.destination = mock_dataset.table('query_ctx1')
mock_client().query.assert_called_with('SELECT * FROM table_1',mock.ANY,
job_id = 'query_ctx1')
actual_job_config = mock_client().query.call_args_list[0][0][1]
self.assertDictEqual(
expected_job_config.to_api_repr(),
actual_job_config.to_api_repr()
)
mock_client().extract_table.assert_called_with(
mock_dataset.table('query_ctx1'),
'gs://output/path')
示例11: test_query_no_output_path
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import DatasetReference [as 别名]
def test_query_no_output_path(self, mock_client,
mock_kfp_context, mock_dump_json, mock_display):
mock_kfp_context().__enter__().context_id.return_value = 'ctx1'
mock_client().get_job.side_effect = exceptions.NotFound('not found')
mock_dataset = bigquery.DatasetReference('project-1', 'dataset-1')
mock_client().dataset.return_value = mock_dataset
mock_client().get_dataset.return_value = bigquery.Dataset(mock_dataset)
mock_response = {
'configuration': {
'query': {
'query': 'SELECT * FROM table_1'
}
}
}
mock_client().query.return_value.to_api_repr.return_value = mock_response
result = query('SELECT * FROM table_1', 'project-1', 'dataset-1', 'table-1')
self.assertEqual(mock_response, result)
mock_client().create_dataset.assert_not_called()
mock_client().extract_table.assert_not_called()
expected_job_config = bigquery.QueryJobConfig()
expected_job_config.create_disposition = bigquery.job.CreateDisposition.CREATE_IF_NEEDED
expected_job_config.write_disposition = bigquery.job.WriteDisposition.WRITE_TRUNCATE
expected_job_config.destination = mock_dataset.table('table-1')
mock_client().query.assert_called_with('SELECT * FROM table_1',mock.ANY,
job_id = 'query_ctx1')
actual_job_config = mock_client().query.call_args_list[0][0][1]
self.assertDictEqual(
expected_job_config.to_api_repr(),
actual_job_config.to_api_repr()
)
示例12: to_delete
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import DatasetReference [as 别名]
def to_delete(client):
doomed = []
yield doomed
for item in doomed:
if isinstance(item, (bigquery.Dataset, bigquery.DatasetReference)):
retry_429(client.delete_dataset)(item, delete_contents=True)
elif isinstance(item, storage.Bucket):
retry_storage_errors(item.delete)()
else:
retry_429(item.delete)()
示例13: test_create_table_nested_repeated_schema
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import DatasetReference [as 别名]
def test_create_table_nested_repeated_schema(client, to_delete):
dataset_id = "create_table_nested_repeated_{}".format(_millis())
project = client.project
dataset_ref = bigquery.DatasetReference(project, dataset_id)
dataset = bigquery.Dataset(dataset_ref)
client.create_dataset(dataset)
to_delete.append(dataset)
# [START bigquery_nested_repeated_schema]
# from google.cloud import bigquery
# client = bigquery.Client()
# project = client.project
# dataset_ref = bigquery.DatasetReference(project, 'my_dataset')
schema = [
bigquery.SchemaField("id", "STRING", mode="NULLABLE"),
bigquery.SchemaField("first_name", "STRING", mode="NULLABLE"),
bigquery.SchemaField("last_name", "STRING", mode="NULLABLE"),
bigquery.SchemaField("dob", "DATE", mode="NULLABLE"),
bigquery.SchemaField(
"addresses",
"RECORD",
mode="REPEATED",
fields=[
bigquery.SchemaField("status", "STRING", mode="NULLABLE"),
bigquery.SchemaField("address", "STRING", mode="NULLABLE"),
bigquery.SchemaField("city", "STRING", mode="NULLABLE"),
bigquery.SchemaField("state", "STRING", mode="NULLABLE"),
bigquery.SchemaField("zip", "STRING", mode="NULLABLE"),
bigquery.SchemaField("numberOfYears", "STRING", mode="NULLABLE"),
],
),
]
table_ref = dataset_ref.table("my_table")
table = bigquery.Table(table_ref, schema=schema)
table = client.create_table(table) # API request
print("Created table {}".format(table.full_table_id))
# [END bigquery_nested_repeated_schema]
示例14: test_create_table_cmek
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import DatasetReference [as 别名]
def test_create_table_cmek(client, to_delete):
dataset_id = "create_table_cmek_{}".format(_millis())
project = client.project
dataset_ref = bigquery.DatasetReference(project, dataset_id)
dataset = bigquery.Dataset(dataset_ref)
client.create_dataset(dataset)
to_delete.append(dataset)
# [START bigquery_create_table_cmek]
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_id = 'my_dataset'
table_ref = dataset.table("my_table")
table = bigquery.Table(table_ref)
# Set the encryption key to use for the table.
# TODO: Replace this key with a key you have created in Cloud KMS.
kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format(
"cloud-samples-tests", "us", "test", "test"
)
table.encryption_configuration = bigquery.EncryptionConfiguration(
kms_key_name=kms_key_name
)
table = client.create_table(table) # API request
assert table.encryption_configuration.kms_key_name == kms_key_name
# [END bigquery_create_table_cmek]
示例15: test_create_partitioned_table
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import DatasetReference [as 别名]
def test_create_partitioned_table(client, to_delete):
dataset_id = "create_table_partitioned_{}".format(_millis())
project = client.project
dataset_ref = bigquery.DatasetReference(project, dataset_id)
dataset = client.create_dataset(dataset_ref)
to_delete.append(dataset)
# [START bigquery_create_table_partitioned]
# from google.cloud import bigquery
# client = bigquery.Client()
# project = client.project
# dataset_ref = bigquery.DatasetReference(project, 'my_dataset')
table_ref = dataset_ref.table("my_partitioned_table")
schema = [
bigquery.SchemaField("name", "STRING"),
bigquery.SchemaField("post_abbr", "STRING"),
bigquery.SchemaField("date", "DATE"),
]
table = bigquery.Table(table_ref, schema=schema)
table.time_partitioning = bigquery.TimePartitioning(
type_=bigquery.TimePartitioningType.DAY,
field="date", # name of column to use for partitioning
expiration_ms=7776000000,
) # 90 days
table = client.create_table(table)
print(
"Created table {}, partitioned on column {}".format(
table.table_id, table.time_partitioning.field
)
)
# [END bigquery_create_table_partitioned]
assert table.time_partitioning.type_ == "DAY"
assert table.time_partitioning.field == "date"
assert table.time_partitioning.expiration_ms == 7776000000