本文整理汇总了Python中google.cloud.bigquery.LoadJobConfig方法的典型用法代码示例。如果您正苦于以下问题:Python bigquery.LoadJobConfig方法的具体用法?Python bigquery.LoadJobConfig怎么用?Python bigquery.LoadJobConfig使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类google.cloud.bigquery
的用法示例。
在下文中一共展示了bigquery.LoadJobConfig方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: df_to_bigquery
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def df_to_bigquery(df, table_id, dataset_id, client):
table = get_bigquery_table(table_id, dataset_id, client)
# set config: insert overwrite
job_config = bigquery.LoadJobConfig(
write_disposition=bigquery.job.WriteDisposition.WRITE_TRUNCATE
)
# insert table
job = client.load_table_from_dataframe(
dataframe=df.compute().rename_axis("id"),
destination=table,
job_config=job_config
)
job.result()
logger.info('%s rows loaded into %s.%s.%s.', job.output_rows, job.project, dataset_id, table_id)
return table
示例2: load_to_gbq
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def load_to_gbq(client, data, bq_configuration):
"""
Loading data to BigQuery using *bq_configuration* settings.
"""
client = bigquery.Client(project = bq_configuration["project_id"])
dataset_ref = client.dataset(bq_configuration["dataset_id"])
table_ref = dataset_ref.table(bq_configuration["table"])
# determine uploading options
job_config = bigquery.LoadJobConfig()
job_config.write_disposition = 'WRITE_TRUNCATE'
job_config.source_format = "NEWLINE_DELIMITED_JSON"
job_config.autodetect = True
load_job = client.load_table_from_file(
data,
table_ref,
job_config = job_config) # API request
print('Starting job {}'.format(load_job.job_id))
load_job.result() # Waits for table load to complete.
print('Job finished.')
示例3: load_to_gbq
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def load_to_gbq(filename, bq_configuration):
"""
Loading data to BigQuery using *bq_configuration* settings.
"""
# construct Client object with the path to the table in which data will be stored
client = bigquery.Client(project = bq_configuration["project_id"])
dataset_ref = client.dataset(bq_configuration["dataset_id"])
table_ref = dataset_ref.table(bq_configuration["table"])
# determine uploading options
job_config = bigquery.LoadJobConfig()
job_config.write_disposition = 'WRITE_TRUNCATE'
job_config.source_format = bq_configuration["source_format"]
job_config.autodetect = True
if bq_configuration["source_format"].upper() == "CSV":
job_config.skip_leading_rows = 1
# upload the file to BigQuery table
with open(filename, "rb") as source_file:
job = client.load_table_from_file(source_file, table_ref, location = bq_configuration["location"], job_config = job_config)
job.result()
print("The Job " + job.job_id + " in status " + job.state + " for table " + bq_configuration["project_id"] + "." +
bq_configuration["dataset_id"] + "." + bq_configuration["table"] + ".")
os.remove(filename)
示例4: give_file_gbq
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def give_file_gbq(path_to_file, bq_configuration):
"""
Download file from *path_to_file* to BigQuery table using *bq_configuration* settings.
"""
# construct Client object with the path to the table in which data will be stored
client = bigquery.Client(project = bq_configuration["project_id"])
dataset_ref = client.dataset(bq_configuration["dataset_id"])
table_ref = dataset_ref.table(bq_configuration["table_id"])
# determine uploading options
job_config = bigquery.LoadJobConfig()
job_config.source_format = bq_configuration["source_format"].upper()
job_config.write_disposition = bq_configuration["write_disposition"]
if bq_configuration["source_format"].upper() == "CSV":
job_config.field_delimiter = bq_configuration["delimiter"]
job_config.skip_leading_rows = 1
job_config.autodetect = True
# upload the file to BigQuery table
with open(path_to_file, "rb") as source_file:
job = client.load_table_from_file(source_file, table_ref, location = bq_configuration["location"], job_config = job_config)
job.result()
print("The Job " + job.job_id + " in status " + job.state + " for table " + bq_configuration["project_id"] + "." + bq_configuration["dataset_id"] + "." + bq_configuration["table_id"] + ".")
os.remove(path_to_file)
示例5: give_file_gbq
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def give_file_gbq(path_to_file, bq_configuration):
"""
Download file from *path_to_file* to BigQuery table using *bq_configuration* settings.
"""
# construct Client object with the path to the table in which data will be stored
client = bigquery.Client(project = bq_configuration["project_id"])
dataset_ref = client.dataset(bq_configuration["dataset_id"])
table_ref = dataset_ref.table(bq_configuration["table_id"])
# determine uploading options
job_config = bigquery.LoadJobConfig()
job_config.source_format = "NEWLINE_DELIMITED_JSON"
job_config.write_disposition = bq_configuration["write_disposition"]
job_config.autodetect = True
# upload the file to BigQuery table
with open(path_to_file, "rb") as source_file:
job = client.load_table_from_file(source_file, table_ref, location = bq_configuration["location"], job_config = job_config)
job.result()
print("The Job " + job.job_id + " in status " + job.state + " for table " + bq_configuration["project_id"] + "." + bq_configuration["dataset_id"] + "." + bq_configuration["table_id"] + ".")
示例6: load_table_uri_parquet
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def load_table_uri_parquet(table_id):
# [START bigquery_load_table_gcs_parquet]
from google.cloud import bigquery
# Construct a BigQuery client object.
client = bigquery.Client()
# TODO(developer): Set table_id to the ID of the table to create.
# table_id = "your-project.your_dataset.your_table_name"
job_config = bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.PARQUET,)
uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet"
load_job = client.load_table_from_uri(
uri, table_id, job_config=job_config
) # Make an API request.
load_job.result() # Waits for the job to complete.
destination_table = client.get_table(table_id)
print("Loaded {} rows.".format(destination_table.num_rows))
# [END bigquery_load_table_gcs_parquet]
示例7: load_table_uri_orc
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def load_table_uri_orc(table_id):
# [START bigquery_load_table_gcs_orc]
from google.cloud import bigquery
# Construct a BigQuery client object.
client = bigquery.Client()
# TODO(developer): Set table_id to the ID of the table to create.
# table_id = "your-project.your_dataset.your_table_name
job_config = bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.ORC)
uri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc"
load_job = client.load_table_from_uri(
uri, table_id, job_config=job_config
) # Make an API request.
load_job.result() # Waits for the job to complete.
destination_table = client.get_table(table_id)
print("Loaded {} rows.".format(destination_table.num_rows))
# [END bigquery_load_table_gcs_orc]
示例8: load_table_uri_avro
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def load_table_uri_avro(table_id):
# [START bigquery_load_table_gcs_avro]
from google.cloud import bigquery
# Construct a BigQuery client object.
client = bigquery.Client()
# TODO(developer): Set table_id to the ID of the table to create.
# table_id = "your-project.your_dataset.your_table_name
job_config = bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.AVRO)
uri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro"
load_job = client.load_table_from_uri(
uri, table_id, job_config=job_config
) # Make an API request.
load_job.result() # Waits for the job to complete.
destination_table = client.get_table(table_id)
print("Loaded {} rows.".format(destination_table.num_rows))
# [END bigquery_load_table_gcs_avro]
示例9: csv_in_gcs_to_table
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def csv_in_gcs_to_table(bucket_name: str, object_name: str, dataset_id: str,
table_id: str,
schema: List[bigquery.SchemaField]) -> None:
"""Upload CSV to BigQuery table.
If the table already exists, it overwrites the table data.
Args:
bucket_name: Bucket name for holding the object
object_name: Name of object to be uploaded
dataset_id: Dataset id where the table is located.
table_id: String holding id of hte table.
schema: Schema of the table_id
"""
client = bigquery.Client()
dataset_ref = client.dataset(dataset_id)
job_config = bigquery.LoadJobConfig()
job_config.schema = schema
job_config.source_format = bigquery.SourceFormat.CSV
job_config.write_disposition = bigquery.WriteDisposition().WRITE_TRUNCATE
uri = "gs://{}/{}".format(bucket_name, object_name)
load_job = client.load_table_from_uri(uri,
dataset_ref.table(table_id),
job_config=job_config)
load_job.result()
示例10: run_job
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def run_job(self, method_name, args, config_opts, config_default_opts):
job_config = {
"copy_table": gcbq.CopyJobConfig,
"extract_table": gcbq.ExtractJobConfig,
"load_table_from_file": gcbq.LoadJobConfig,
"load_table_from_uri": gcbq.LoadJobConfig,
"query": gcbq.QueryJobConfig,
}[method_name]()
for k, v in config_default_opts.items():
setattr(job_config, k, v)
for k, v in config_opts.items():
setattr(job_config, k, v)
method = getattr(self.gcbq_client, method_name)
job = method(*args, job_config=job_config)
if getattr(job_config, "dry_run", False):
return []
else:
return job.result()
示例11: _start_one_load_job
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def _start_one_load_job(self, suffix):
# After issue #582 is resolved we can remove the create_disposition flag.
job_config = bigquery.LoadJobConfig(
source_format=bigquery.SourceFormat.AVRO,
create_disposition='CREATE_NEVER')
uri = self._avro_root_path + suffix + '-*'
table_id = bigquery_util.compose_table_name(self._table_base_name, suffix)
load_job = self._client.load_table_from_uri(
uri, table_id, job_config=job_config)
self._suffixes_to_load_jobs.update({suffix: load_job})
示例12: to_bq_from_local_file
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def to_bq_from_local_file(temp_data_filename, bq_tablename, columns_to_export, append=True):
# import the data into bigquery
(dataset_id, table_id) = bq_tablename.split(".")
setup_bigquery_creds()
client = bigquery.Client()
dataset_ref = client.dataset(dataset_id)
table_ref = dataset_ref.table(table_id)
job_config = bigquery.LoadJobConfig()
job_config.source_format = bigquery.SourceFormat.CSV
job_config.skip_leading_rows = 1
job_config.allow_quoted_newlines = True
job_config.max_bad_records = 1000
if append:
job_config.autodetect = False
job_config.write_disposition = 'WRITE_APPEND'
else:
job_config.autodetect = True
job_config.write_disposition = 'WRITE_TRUNCATE'
if "*" in columns_to_export or "," in columns_to_export:
job_config.field_delimiter = ","
else:
job_config.field_delimiter = "þ" # placeholder when only one column and don't want to split it
with open(temp_data_filename, 'rb') as source_file:
job = client.load_table_from_file(
source_file,
bq_tablename,
location='US',
job_config=job_config) # API request
job.result() # Waits for table load to complete.
print('Loaded {} rows into {}:{}.'.format(job.output_rows, dataset_id, table_id))
示例13: _create_job_config
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def _create_job_config(self, write_disposition):
'''create a GCs JobConfiguration
Args:
write_disposition (str): JobConfig write disposition (e.g WriteDisposition.WRITE_APPEND)
Returns:
job_config (JobConfig): GCS job configuration
'''
job_config = bigquery.LoadJobConfig()
job_config.autodetect = True
job_config.write_disposition = write_disposition
job_config.source_format = bigquery.SourceFormat.CSV
return job_config
示例14: give_file_gbq
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def give_file_gbq(path_to_file, bq_configuration):
"""
Download file from *path_to_file* to BigQuery table using *bq_configuration* settings.
"""
# construct Client object with the path to the table in which data will be stored
client = bigquery.Client(project=bq_configuration["project_id"])
dataset_ref = client.dataset(bq_configuration["dataset_id"])
table_ref = dataset_ref.table(bq_configuration["table_id"])
# determine uploading options
job_config = bigquery.LoadJobConfig()
job_config.source_format = bq_configuration["source_format"].upper()
job_config.write_disposition = bq_configuration["write_disposition"]
if bq_configuration["source_format"].upper() == "CSV":
job_config.field_delimiter = bq_configuration["delimiter"]
job_config.skip_leading_rows = 1
job_config.autodetect = True
# upload the file to BigQuery table
with open(path_to_file, "rb") as source_file:
job = client.load_table_from_file(source_file, table_ref, location=bq_configuration["location"],
job_config=job_config)
job.result()
print("The Job " + job.job_id + " in status " + job.state + " for table " + bq_configuration["project_id"] + "." +
bq_configuration["dataset_id"] + "." + bq_configuration["table_id"] + ".")
os.remove(path_to_file)
示例15: load_chunks
# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def load_chunks(
client,
dataframe,
dataset_id,
table_id,
chunksize=None,
schema=None,
location=None,
):
destination_table = client.dataset(dataset_id).table(table_id)
job_config = bigquery.LoadJobConfig()
job_config.write_disposition = "WRITE_APPEND"
job_config.source_format = "CSV"
job_config.allow_quoted_newlines = True
if schema is None:
schema = pandas_gbq.schema.generate_bq_schema(dataframe)
schema = pandas_gbq.schema.add_default_nullable_mode(schema)
job_config.schema = [
bigquery.SchemaField.from_api_repr(field) for field in schema["fields"]
]
chunks = encode_chunks(dataframe, chunksize=chunksize)
for remaining_rows, chunk_buffer in chunks:
try:
yield remaining_rows
client.load_table_from_file(
chunk_buffer,
destination_table,
job_config=job_config,
location=location,
).result()
finally:
chunk_buffer.close()