当前位置: 首页>>代码示例>>Python>>正文


Python bigquery.LoadJobConfig方法代码示例

本文整理汇总了Python中google.cloud.bigquery.LoadJobConfig方法的典型用法代码示例。如果您正苦于以下问题:Python bigquery.LoadJobConfig方法的具体用法?Python bigquery.LoadJobConfig怎么用?Python bigquery.LoadJobConfig使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在google.cloud.bigquery的用法示例。


在下文中一共展示了bigquery.LoadJobConfig方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: df_to_bigquery

# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def df_to_bigquery(df, table_id, dataset_id, client):
    table = get_bigquery_table(table_id, dataset_id, client)

    # set config: insert overwrite
    job_config = bigquery.LoadJobConfig(
        write_disposition=bigquery.job.WriteDisposition.WRITE_TRUNCATE
    )

    # insert table
    job = client.load_table_from_dataframe(
        dataframe=df.compute().rename_axis("id"),
        destination=table,
        job_config=job_config
    )
    job.result()
    logger.info('%s rows loaded into %s.%s.%s.', job.output_rows, job.project, dataset_id, table_id)
    return table 
开发者ID:yxtay,项目名称:recommender-tensorflow,代码行数:19,代码来源:gcp_utils.py

示例2: load_to_gbq

# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def load_to_gbq(client, data, bq_configuration):
    """
        Loading data to BigQuery using *bq_configuration* settings.
    """
    client = bigquery.Client(project = bq_configuration["project_id"])
    dataset_ref = client.dataset(bq_configuration["dataset_id"])
    table_ref = dataset_ref.table(bq_configuration["table"])

    # determine uploading options
    job_config = bigquery.LoadJobConfig()
    job_config.write_disposition = 'WRITE_TRUNCATE'
    job_config.source_format = "NEWLINE_DELIMITED_JSON"
    job_config.autodetect = True

    load_job = client.load_table_from_file(
        data,
        table_ref,
        job_config = job_config)  # API request
    print('Starting job {}'.format(load_job.job_id))

    load_job.result()  # Waits for table load to complete.
    print('Job finished.') 
开发者ID:OWOX,项目名称:BigQuery-integrations,代码行数:24,代码来源:main.py

示例3: load_to_gbq

# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def load_to_gbq(filename, bq_configuration):
    """
        Loading data to BigQuery using *bq_configuration* settings.
    """
    # construct Client object with the path to the table in which data will be stored
    client = bigquery.Client(project = bq_configuration["project_id"])
    dataset_ref = client.dataset(bq_configuration["dataset_id"])
    table_ref = dataset_ref.table(bq_configuration["table"])

    # determine uploading options
    job_config = bigquery.LoadJobConfig()
    job_config.write_disposition = 'WRITE_TRUNCATE'
    job_config.source_format = bq_configuration["source_format"]
    job_config.autodetect = True
    if bq_configuration["source_format"].upper() == "CSV":
        job_config.skip_leading_rows = 1

    # upload the file to BigQuery table
    with open(filename, "rb") as source_file:
        job = client.load_table_from_file(source_file, table_ref, location = bq_configuration["location"], job_config = job_config)
    job.result()
    print("The Job " + job.job_id + " in status " + job.state + " for table " + bq_configuration["project_id"] + "." +
          bq_configuration["dataset_id"] + "." + bq_configuration["table"] + ".")
    os.remove(filename) 
开发者ID:OWOX,项目名称:BigQuery-integrations,代码行数:26,代码来源:main.py

示例4: give_file_gbq

# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def give_file_gbq(path_to_file, bq_configuration):
    """
        Download file from *path_to_file* to BigQuery table using *bq_configuration* settings.
    """
    # construct Client object with the path to the table in which data will be stored
    client = bigquery.Client(project = bq_configuration["project_id"])
    dataset_ref = client.dataset(bq_configuration["dataset_id"])
    table_ref = dataset_ref.table(bq_configuration["table_id"])

    # determine uploading options
    job_config = bigquery.LoadJobConfig()
    job_config.source_format = bq_configuration["source_format"].upper()
    job_config.write_disposition = bq_configuration["write_disposition"]
    if bq_configuration["source_format"].upper() == "CSV":
        job_config.field_delimiter = bq_configuration["delimiter"]
        job_config.skip_leading_rows = 1
    job_config.autodetect = True

    # upload the file to BigQuery table
    with open(path_to_file, "rb") as source_file:
        job = client.load_table_from_file(source_file, table_ref, location = bq_configuration["location"], job_config = job_config)
    job.result()
    print("The Job " + job.job_id + " in status " + job.state + " for table " + bq_configuration["project_id"] + "." + bq_configuration["dataset_id"] + "." + bq_configuration["table_id"] + ".")
    os.remove(path_to_file) 
开发者ID:OWOX,项目名称:BigQuery-integrations,代码行数:26,代码来源:main.py

示例5: give_file_gbq

# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def give_file_gbq(path_to_file, bq_configuration):
    """
        Download file from *path_to_file* to BigQuery table using *bq_configuration* settings.
    """
    # construct Client object with the path to the table in which data will be stored
    client = bigquery.Client(project = bq_configuration["project_id"])
    dataset_ref = client.dataset(bq_configuration["dataset_id"])
    table_ref = dataset_ref.table(bq_configuration["table_id"])

    # determine uploading options
    job_config = bigquery.LoadJobConfig()
    job_config.source_format = "NEWLINE_DELIMITED_JSON"
    job_config.write_disposition = bq_configuration["write_disposition"]
    job_config.autodetect = True

    # upload the file to BigQuery table
    with open(path_to_file, "rb") as source_file:
        job = client.load_table_from_file(source_file, table_ref, location = bq_configuration["location"], job_config = job_config)
    job.result()
    print("The Job " + job.job_id + " in status " + job.state + " for table " + bq_configuration["project_id"] + "." + bq_configuration["dataset_id"] + "." + bq_configuration["table_id"] + ".") 
开发者ID:OWOX,项目名称:BigQuery-integrations,代码行数:22,代码来源:main.py

示例6: load_table_uri_parquet

# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def load_table_uri_parquet(table_id):
    # [START bigquery_load_table_gcs_parquet]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name"

    job_config = bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.PARQUET,)
    uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet"

    load_job = client.load_table_from_uri(
        uri, table_id, job_config=job_config
    )  # Make an API request.

    load_job.result()  # Waits for the job to complete.

    destination_table = client.get_table(table_id)
    print("Loaded {} rows.".format(destination_table.num_rows))
    # [END bigquery_load_table_gcs_parquet] 
开发者ID:googleapis,项目名称:python-bigquery,代码行数:24,代码来源:load_table_uri_parquet.py

示例7: load_table_uri_orc

# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def load_table_uri_orc(table_id):

    # [START bigquery_load_table_gcs_orc]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name

    job_config = bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.ORC)
    uri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc"

    load_job = client.load_table_from_uri(
        uri, table_id, job_config=job_config
    )  # Make an API request.

    load_job.result()  # Waits for the job to complete.

    destination_table = client.get_table(table_id)
    print("Loaded {} rows.".format(destination_table.num_rows))
    # [END bigquery_load_table_gcs_orc] 
开发者ID:googleapis,项目名称:python-bigquery,代码行数:25,代码来源:load_table_uri_orc.py

示例8: load_table_uri_avro

# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def load_table_uri_avro(table_id):

    # [START bigquery_load_table_gcs_avro]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name

    job_config = bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.AVRO)
    uri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro"

    load_job = client.load_table_from_uri(
        uri, table_id, job_config=job_config
    )  # Make an API request.

    load_job.result()  # Waits for the job to complete.

    destination_table = client.get_table(table_id)
    print("Loaded {} rows.".format(destination_table.num_rows))
    # [END bigquery_load_table_gcs_avro] 
开发者ID:googleapis,项目名称:python-bigquery,代码行数:25,代码来源:load_table_uri_avro.py

示例9: csv_in_gcs_to_table

# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def csv_in_gcs_to_table(bucket_name: str, object_name: str, dataset_id: str,
                        table_id: str,
                        schema: List[bigquery.SchemaField]) -> None:
    """Upload CSV to BigQuery table.
        If the table already exists, it overwrites the table data.

    Args:
        bucket_name: Bucket name for holding the object
        object_name: Name of object to be uploaded
        dataset_id: Dataset id where the table is located.
        table_id: String holding id of hte table.
        schema: Schema of the table_id
    """
    client = bigquery.Client()
    dataset_ref = client.dataset(dataset_id)
    job_config = bigquery.LoadJobConfig()
    job_config.schema = schema
    job_config.source_format = bigquery.SourceFormat.CSV
    job_config.write_disposition = bigquery.WriteDisposition().WRITE_TRUNCATE
    uri = "gs://{}/{}".format(bucket_name, object_name)
    load_job = client.load_table_from_uri(uri,
                                          dataset_ref.table(table_id),
                                          job_config=job_config)
    load_job.result() 
开发者ID:GoogleCloudPlatform,项目名称:professional-services,代码行数:26,代码来源:helper_function.py

示例10: run_job

# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def run_job(self, method_name, args, config_opts, config_default_opts):
        job_config = {
            "copy_table": gcbq.CopyJobConfig,
            "extract_table": gcbq.ExtractJobConfig,
            "load_table_from_file": gcbq.LoadJobConfig,
            "load_table_from_uri": gcbq.LoadJobConfig,
            "query": gcbq.QueryJobConfig,
        }[method_name]()

        for k, v in config_default_opts.items():
            setattr(job_config, k, v)
        for k, v in config_opts.items():
            setattr(job_config, k, v)

        method = getattr(self.gcbq_client, method_name)

        job = method(*args, job_config=job_config)
        if getattr(job_config, "dry_run", False):
            return []
        else:
            return job.result() 
开发者ID:ebmdatalab,项目名称:openprescribing,代码行数:23,代码来源:bigquery.py

示例11: _start_one_load_job

# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def _start_one_load_job(self, suffix):
    # After issue #582 is resolved we can remove the create_disposition flag.
    job_config = bigquery.LoadJobConfig(
        source_format=bigquery.SourceFormat.AVRO,
        create_disposition='CREATE_NEVER')
    uri = self._avro_root_path + suffix + '-*'
    table_id = bigquery_util.compose_table_name(self._table_base_name, suffix)
    load_job = self._client.load_table_from_uri(
        uri, table_id, job_config=job_config)
    self._suffixes_to_load_jobs.update({suffix: load_job}) 
开发者ID:googlegenomics,项目名称:gcp-variant-transforms,代码行数:12,代码来源:avro_util.py

示例12: to_bq_from_local_file

# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def to_bq_from_local_file(temp_data_filename, bq_tablename, columns_to_export, append=True):

    # import the data into bigquery
    (dataset_id, table_id) = bq_tablename.split(".")

    setup_bigquery_creds()
    client = bigquery.Client()
    dataset_ref = client.dataset(dataset_id)
    table_ref = dataset_ref.table(table_id)
    job_config = bigquery.LoadJobConfig()
    job_config.source_format = bigquery.SourceFormat.CSV
    job_config.skip_leading_rows = 1
    job_config.allow_quoted_newlines = True
    job_config.max_bad_records = 1000

    if append:
        job_config.autodetect = False
        job_config.write_disposition = 'WRITE_APPEND'
    else:
        job_config.autodetect = True
        job_config.write_disposition = 'WRITE_TRUNCATE'

    if "*" in columns_to_export or "," in columns_to_export:
        job_config.field_delimiter = ","
    else:
        job_config.field_delimiter = "þ"  # placeholder when only one column and don't want to split it

    with open(temp_data_filename, 'rb') as source_file:
        job = client.load_table_from_file(
            source_file,
            bq_tablename,
            location='US',
            job_config=job_config)  # API request

    job.result()  # Waits for table load to complete.
    print('Loaded {} rows into {}:{}.'.format(job.output_rows, dataset_id, table_id)) 
开发者ID:ourresearch,项目名称:oadoi,代码行数:38,代码来源:bigquery_import.py

示例13: _create_job_config

# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def _create_job_config(self, write_disposition):
        '''create a GCs JobConfiguration

        Args:
            write_disposition (str): JobConfig write disposition (e.g WriteDisposition.WRITE_APPEND)

        Returns:
            job_config (JobConfig): GCS job configuration

        '''
        job_config = bigquery.LoadJobConfig()
        job_config.autodetect = True
        job_config.write_disposition = write_disposition
        job_config.source_format = bigquery.SourceFormat.CSV
        return job_config 
开发者ID:ww-tech,项目名称:lookml-tools,代码行数:17,代码来源:bq_writer.py

示例14: give_file_gbq

# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def give_file_gbq(path_to_file, bq_configuration):
    """
        Download file from *path_to_file* to BigQuery table using *bq_configuration* settings.
    """
    # construct Client object with the path to the table in which data will be stored
    client = bigquery.Client(project=bq_configuration["project_id"])
    dataset_ref = client.dataset(bq_configuration["dataset_id"])
    table_ref = dataset_ref.table(bq_configuration["table_id"])

    # determine uploading options
    job_config = bigquery.LoadJobConfig()
    job_config.source_format = bq_configuration["source_format"].upper()
    job_config.write_disposition = bq_configuration["write_disposition"]
    if bq_configuration["source_format"].upper() == "CSV":
        job_config.field_delimiter = bq_configuration["delimiter"]
        job_config.skip_leading_rows = 1
    job_config.autodetect = True

    # upload the file to BigQuery table
    with open(path_to_file, "rb") as source_file:
        job = client.load_table_from_file(source_file, table_ref, location=bq_configuration["location"],
                                          job_config=job_config)
    job.result()
    print("The Job " + job.job_id + " in status " + job.state + " for table " + bq_configuration["project_id"] + "." +
          bq_configuration["dataset_id"] + "." + bq_configuration["table_id"] + ".")
    os.remove(path_to_file) 
开发者ID:OWOX,项目名称:BigQuery-integrations,代码行数:28,代码来源:main.py

示例15: load_chunks

# 需要导入模块: from google.cloud import bigquery [as 别名]
# 或者: from google.cloud.bigquery import LoadJobConfig [as 别名]
def load_chunks(
    client,
    dataframe,
    dataset_id,
    table_id,
    chunksize=None,
    schema=None,
    location=None,
):
    destination_table = client.dataset(dataset_id).table(table_id)
    job_config = bigquery.LoadJobConfig()
    job_config.write_disposition = "WRITE_APPEND"
    job_config.source_format = "CSV"
    job_config.allow_quoted_newlines = True

    if schema is None:
        schema = pandas_gbq.schema.generate_bq_schema(dataframe)

    schema = pandas_gbq.schema.add_default_nullable_mode(schema)

    job_config.schema = [
        bigquery.SchemaField.from_api_repr(field) for field in schema["fields"]
    ]

    chunks = encode_chunks(dataframe, chunksize=chunksize)
    for remaining_rows, chunk_buffer in chunks:
        try:
            yield remaining_rows
            client.load_table_from_file(
                chunk_buffer,
                destination_table,
                job_config=job_config,
                location=location,
            ).result()
        finally:
            chunk_buffer.close() 
开发者ID:pydata,项目名称:pandas-gbq,代码行数:38,代码来源:load.py


注:本文中的google.cloud.bigquery.LoadJobConfig方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。