本文整理汇总了Python中google.cloud.bigquery.QueryJobConfig方法的典型用法代码示例。如果您正苦于以下问题:Python bigquery.QueryJobConfig方法的具体用法?Python bigquery.QueryJobConfig怎么用?Python bigquery.QueryJobConfig使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类google.cloud.bigquery
的用法示例。
在下文中一共展示了bigquery.QueryJobConfig方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_client_library_legacy_query
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import QueryJobConfig [as 别名]
def test_client_library_legacy_query():
# [START bigquery_migration_client_library_query_legacy]
from google.cloud import bigquery
client = bigquery.Client()
sql = """
SELECT name
FROM [bigquery-public-data:usa_names.usa_1910_current]
WHERE state = 'TX'
LIMIT 100
"""
query_config = bigquery.QueryJobConfig(use_legacy_sql=True)
df = client.query(sql, job_config=query_config).to_dataframe()
# [END bigquery_migration_client_library_query_legacy]
assert len(df) > 0
示例2: test_client_library_query_with_parameters
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import QueryJobConfig [as 别名]
def test_client_library_query_with_parameters():
# [START bigquery_migration_client_library_query_parameters]
from google.cloud import bigquery
client = bigquery.Client()
sql = """
SELECT name
FROM `bigquery-public-data.usa_names.usa_1910_current`
WHERE state = @state
LIMIT @limit
"""
query_config = bigquery.QueryJobConfig(
query_parameters=[
bigquery.ScalarQueryParameter('state', 'STRING', 'TX'),
bigquery.ScalarQueryParameter('limit', 'INTEGER', 100)
]
)
df = client.query(sql, job_config=query_config).to_dataframe()
# [END bigquery_migration_client_library_query_parameters]
assert len(df) > 0
示例3: client_query_legacy_sql
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import QueryJobConfig [as 别名]
def client_query_legacy_sql():
# [START bigquery_query_legacy]
from google.cloud import bigquery
# Construct a BigQuery client object.
client = bigquery.Client()
query = (
"SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] "
'WHERE state = "TX" '
"LIMIT 100"
)
# Set use_legacy_sql to True to use legacy SQL syntax.
job_config = bigquery.QueryJobConfig(use_legacy_sql=True)
# Start the query, passing in the extra configuration.
query_job = client.query(query, job_config=job_config) # Make an API request.
print("The query data:")
for row in query_job:
print(row)
# [END bigquery_query_legacy]
示例4: query_no_cache
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import QueryJobConfig [as 别名]
def query_no_cache():
# [START bigquery_query_no_cache]
from google.cloud import bigquery
# Construct a BigQuery client object.
client = bigquery.Client()
job_config = bigquery.QueryJobConfig(use_query_cache=False)
sql = """
SELECT corpus
FROM `bigquery-public-data.samples.shakespeare`
GROUP BY corpus;
"""
query_job = client.query(sql, job_config=job_config) # Make an API request.
for row in query_job:
print(row)
# [END bigquery_query_no_cache]
示例5: client_query_dry_run
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import QueryJobConfig [as 别名]
def client_query_dry_run():
# [START bigquery_query_dry_run]
from google.cloud import bigquery
# Construct a BigQuery client object.
client = bigquery.Client()
job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
# Start the query, passing in the extra configuration.
query_job = client.query(
(
"SELECT name, COUNT(*) as name_count "
"FROM `bigquery-public-data.usa_names.usa_1910_2013` "
"WHERE state = 'WA' "
"GROUP BY name"
),
job_config=job_config,
) # Make an API request.
# A dry run query completes immediately.
print("This query will process {} bytes.".format(query_job.total_bytes_processed))
# [END bigquery_query_dry_run]
return query_job
示例6: client_query_destination_table
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import QueryJobConfig [as 别名]
def client_query_destination_table(table_id):
# [START bigquery_query_destination_table]
from google.cloud import bigquery
# Construct a BigQuery client object.
client = bigquery.Client()
# TODO(developer): Set table_id to the ID of the destination table.
# table_id = "your-project.your_dataset.your_table_name"
job_config = bigquery.QueryJobConfig(destination=table_id)
sql = """
SELECT corpus
FROM `bigquery-public-data.samples.shakespeare`
GROUP BY corpus;
"""
# Start the query, passing in the extra configuration.
query_job = client.query(sql, job_config=job_config) # Make an API request.
query_job.result() # Wait for the job to complete.
print("Query results loaded to the table {}".format(table_id))
# [END bigquery_query_destination_table]
示例7: create_job
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import QueryJobConfig [as 别名]
def create_job():
# [START bigquery_create_job]
from google.cloud import bigquery
# Construct a BigQuery client object.
client = bigquery.Client()
query_job = client.query(
"SELECT country_name from `bigquery-public-data.utility_us.country_code_iso`",
# Explicitly force job execution to be routed to a specific processing
# location.
location="US",
# Specify a job configuration to set optional job resource properties.
job_config=bigquery.QueryJobConfig(
labels={"example-label": "example-value"}, maximum_bytes_billed=1000000
),
# The client libraries automatically generate a job ID. Override the
# generated ID with either the job_id_prefix or job_id parameters.
job_id_prefix="code_sample_",
) # Make an API request.
print("Started job: {}".format(query_job.job_id))
# [END bigquery_create_job]
return query_job
示例8: test_query_w_legacy_sql_types
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import QueryJobConfig [as 别名]
def test_query_w_legacy_sql_types(self):
naive = datetime.datetime(2016, 12, 5, 12, 41, 9)
stamp = "%s %s" % (naive.date().isoformat(), naive.time().isoformat())
zoned = naive.replace(tzinfo=UTC)
examples = [
{"sql": "SELECT 1", "expected": 1},
{"sql": "SELECT 1.3", "expected": 1.3},
{"sql": "SELECT TRUE", "expected": True},
{"sql": 'SELECT "ABC"', "expected": "ABC"},
{"sql": 'SELECT CAST("foo" AS BYTES)', "expected": b"foo"},
{"sql": 'SELECT CAST("%s" AS TIMESTAMP)' % (stamp,), "expected": zoned},
]
for example in examples:
job_config = bigquery.QueryJobConfig()
job_config.use_legacy_sql = True
rows = list(Config.CLIENT.query(example["sql"], job_config=job_config))
self.assertEqual(len(rows), 1)
self.assertEqual(len(rows[0]), 1)
self.assertEqual(rows[0][0], example["expected"])
示例9: test_querying_data_w_timeout
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import QueryJobConfig [as 别名]
def test_querying_data_w_timeout(self):
job_config = bigquery.QueryJobConfig()
job_config.use_query_cache = False
query_job = Config.CLIENT.query(
"""
SELECT name, SUM(number) AS total_people
FROM `bigquery-public-data.usa_names.usa_1910_current`
GROUP BY name
""",
location="US",
job_config=job_config,
)
# Specify a very tight deadline to demonstrate that the timeout
# actually has effect.
with self.assertRaises(requests.exceptions.Timeout):
query_job.done(timeout=0.1)
# Now wait for the result using a more realistic deadline.
query_job.result(timeout=30)
self.assertTrue(query_job.done(timeout=30))
示例10: default_query_job_config
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import QueryJobConfig [as 别名]
def default_query_job_config(self):
"""google.cloud.bigquery.job.QueryJobConfig: Default job
configuration for queries.
The context's :class:`~google.cloud.bigquery.job.QueryJobConfig` is
used for queries. Some properties can be overridden with arguments to
the magics.
Example:
Manually setting the default value for ``maximum_bytes_billed``
to 100 MB:
>>> from google.cloud.bigquery import magics
>>> magics.context.default_query_job_config.maximum_bytes_billed = 100000000
"""
return self._default_query_job_config
示例11: execute_transformation_query
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import QueryJobConfig [as 别名]
def execute_transformation_query(bq_client):
"""Executes transformation query to a new destination table.
Args:
bq_client: Object representing a reference to a BigQuery Client
"""
dataset_ref = bq_client.get_dataset(bigquery.DatasetReference(
project=config.config_vars['billing_project_id'],
dataset_id=config.config_vars['output_dataset_id']))
table_ref = dataset_ref.table(config.config_vars['output_table_name'])
job_config = bigquery.QueryJobConfig()
job_config.destination = table_ref
job_config.write_disposition = bigquery.WriteDisposition().WRITE_TRUNCATE
job_config.time_partitioning = bigquery.TimePartitioning(
field='usage_start_time',
expiration_ms=None)
sql = file_to_string(config.config_vars['sql_file_path'])
sql = sql.format(**config.config_vars)
logging.info('Attempting query on all dates...')
# Execute Query
query_job = bq_client.query(
sql,
job_config=job_config)
query_job.result() # Waits for the query to finish
logging.info('Transformation query complete. All partitions are updated.')
示例12: testPartitionsAndUsageDates
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import QueryJobConfig [as 别名]
def testPartitionsAndUsageDates(self):
"""Tests that the # of partitions is equal to the # of usage_start_times."""
bq_client = bigquery.Client()
job_config = bigquery.QueryJobConfig()
usage_query = """
SELECT COUNT(DISTINCT(DATE(usage_start_time))) AS cnt
FROM `{billing_project_id}.{output_dataset_id}.{output_table_name}`
"""
usage_query = usage_query.format(**config.config_vars)
query_job = bq_client.query(usage_query, job_config=job_config)
for row in query_job.result():
output_result = row.cnt
partition_query = """
SELECT COUNT(DISTINCT(partition_id)) AS cnt
FROM [{billing_project_id}.{output_dataset_id}.{output_table_name}$__PARTITIONS_SUMMARY__]
"""
partition_query = partition_query.format(**config.config_vars)
job_config = bigquery.QueryJobConfig()
job_config.use_legacy_sql = True
query_job = bq_client.query(partition_query, job_config=job_config)
for row in query_job.result():
partition_result = row.cnt
assert output_result == partition_result
示例13: get_max_ingest_timestamp
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import QueryJobConfig [as 别名]
def get_max_ingest_timestamp(self):
"""Gets the max timestamp that was set during the latest merge.
Returns:
latest_merge as a timestamp in string format.
"""
get_last_max_ts_config = bigquery.QueryJobConfig()
get_last_max_ts_config.use_legacy_sql = False
get_last_max_timestamp_query = self.bq_client.query(
query='SELECT max(ingestTimestamp) as max_ingest_timestamp '
'FROM `{0:s}.{1:s}.{2:s}`'.format(self.project_id, self.dataset_id,
self.temp_updates_table_id),
job_config=get_last_max_ts_config,
location='US')
get_last_max_timestamp_query.result()
results = list(get_last_max_timestamp_query)
max_ingest_timestamp = results[0]['max_ingest_timestamp']
if not max_ingest_timestamp:
max_ingest_timestamp = INITIAL_TIMESTAMP
return max_ingest_timestamp.strftime('%Y-%m-%d %H:%M:%S.%f %Z')
示例14: merge_updates
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import QueryJobConfig [as 别名]
def merge_updates(self, merge_updates_query):
"""Merges rows from the temp table into the final table.
Args:
merge_updates_query(str): Query for merging updates from the temp
updates table to the final table.
"""
logging.info('{0:s} Merging updates from {1:s} into {2:s}.'.format(
str(datetime.datetime.now()), self.temp_updates_table_id,
self.final_table_id))
merge_updates_job_config = bigquery.QueryJobConfig()
merge_updates_job_config.use_legacy_sql = False
merge_updates_query_job = self.bq_client.query(
query=merge_updates_query,
location='US',
job_config=merge_updates_job_config)
merge_updates_query_job.result()
logging.info('{0:s} Successfully merged updates into {1:s}.'.format(
str(datetime.datetime.now()), self.final_table_id))
示例15: get_table
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import QueryJobConfig [as 别名]
def get_table(view):
"""Helper for determining what table underlies a user-facing view, since the Storage API can't read views."""
bq = bigquery.Client()
view = view.replace(":", ".")
# partition filter is required, so try a couple options
for partition_column in ["DATE(submission_timestamp)", "submission_date"]:
try:
job = bq.query(
f"SELECT * FROM `{view}` WHERE {partition_column} = CURRENT_DATE",
bigquery.QueryJobConfig(dry_run=True),
)
break
except Exception:
continue
else:
raise ValueError("could not determine partition column")
assert len(job.referenced_tables) == 1, "View combines multiple tables"
table = job.referenced_tables[0]
return f"{table.project}:{table.dataset_id}.{table.table_id}"