当前位置: 首页>>代码示例>>Python>>正文


Python pandas.read_gbq方法代码示例

本文整理汇总了Python中pandas.read_gbq方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.read_gbq方法的具体用法?Python pandas.read_gbq怎么用?Python pandas.read_gbq使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pandas的用法示例。


在下文中一共展示了pandas.read_gbq方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: read_df_from_bigquery

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def read_df_from_bigquery(full_table_path, project_id=None, num_samples=None):
  """Read data from BigQuery and split into train and validation sets.

  Args:
    full_table_path: (string) full path of the table containing training data
      in the format of [project_id.dataset_name.table_name].
    project_id: (string, Optional) Google BigQuery Account project ID.
    num_samples: (int, Optional) Number of data samples to read.

  Returns:
    pandas.DataFrame
  """

  query = metadata.BASE_QUERY.format(table=full_table_path)
  limit = ' LIMIT {}'.format(num_samples) if num_samples else ''
  query += limit

  # Use "application default credentials"
  # Use SQL syntax dialect
  data_df = pd.read_gbq(query, project_id=project_id, dialect='standard')

  return data_df 
开发者ID:GoogleCloudPlatform,项目名称:cloudml-samples,代码行数:24,代码来源:utils.py

示例2: test_roundtrip

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def test_roundtrip(self):
        destination_table = DESTINATION_TABLE + "1"

        test_size = 20001
        df = make_mixed_dataframe_v2(test_size)

        df.to_gbq(destination_table, _get_project_id(), chunksize=10000,
                  private_key=_get_private_key_path())

        sleep(30)  # <- Curses Google!!!

        result = pd.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}"
                             .format(destination_table),
                             project_id=_get_project_id(),
                             private_key=_get_private_key_path())
        assert result['num_rows'][0] == test_size 
开发者ID:birforce,项目名称:vnpy_crypto,代码行数:18,代码来源:test_gbq.py

示例3: test_pandas_gbq_query

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def test_pandas_gbq_query():
    # [START bigquery_migration_pandas_gbq_query]
    import pandas

    sql = """
        SELECT name
        FROM `bigquery-public-data.usa_names.usa_1910_current`
        WHERE state = 'TX'
        LIMIT 100
    """

    # Run a Standard SQL query using the environment's default project
    df = pandas.read_gbq(sql, dialect='standard')

    # Run a Standard SQL query with the project set explicitly
    project_id = 'your-project-id'
    # [END bigquery_migration_pandas_gbq_query]
    assert len(df) > 0
    project_id = os.environ['GOOGLE_CLOUD_PROJECT']
    # [START bigquery_migration_pandas_gbq_query]
    df = pandas.read_gbq(sql, project_id=project_id, dialect='standard')
    # [END bigquery_migration_pandas_gbq_query]
    assert len(df) > 0 
开发者ID:GoogleCloudPlatform,项目名称:python-docs-samples,代码行数:25,代码来源:samples_test.py

示例4: test_pandas_gbq_query_with_parameters

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def test_pandas_gbq_query_with_parameters():
    # [START bigquery_migration_pandas_gbq_query_parameters]
    import pandas

    sql = """
        SELECT name
        FROM `bigquery-public-data.usa_names.usa_1910_current`
        WHERE state = @state
        LIMIT @limit
    """
    query_config = {
        'query': {
            'parameterMode': 'NAMED',
            'queryParameters': [
                {
                    'name': 'state',
                    'parameterType': {'type': 'STRING'},
                    'parameterValue': {'value': 'TX'}
                },
                {
                    'name': 'limit',
                    'parameterType': {'type': 'INTEGER'},
                    'parameterValue': {'value': 100}
                }
            ]
        }
    }

    df = pandas.read_gbq(sql, configuration=query_config)
    # [END bigquery_migration_pandas_gbq_query_parameters]
    assert len(df) > 0 
开发者ID:GoogleCloudPlatform,项目名称:python-docs-samples,代码行数:33,代码来源:samples_test.py

示例5: load_inferred

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def load_inferred(inference_table, label_table, extractors):
    """Load inferred data and generate comparison data

    """
    query = """

    SELECT inference_table.* except (ssvid), ssvid as id FROM 
    `{}` label_table
    JOIN
   `{}*` inference_table
    ON (cast(label_table.id as string) = inference_table.ssvid)
    where split = "Test"
    """.format(label_table, inference_table)
    print(query)
    df = pd.read_gbq(query, project_id='world-fishing-827', dialect='standard')

    for row in df.itertuples():
        for ext in extractors:
            ext.extract(row)
    for ext in extractors:
        ext.finalize() 
开发者ID:GlobalFishingWatch,项目名称:vessel-classification,代码行数:23,代码来源:compute_vessel_metrics.py

示例6: load_class_weights

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def load_class_weights(inference_table):
    query = '''
        with

        core as (
        select * from `{}*`
        where max_label is not null
        ),

        count as (
        select count(*) as total from core
        )
        select max_label as label, count(*) / total as fraction
        from core
        cross join count
        group by label, total
        order by fraction desc
    '''.format(inference_table)
    df = pd.read_gbq(query, project_id='world-fishing-827', dialect='standard')
    wt_map = {x.label : x.fraction for x in df.itertuples()}
    return wt_map 
开发者ID:GlobalFishingWatch,项目名称:vessel-classification,代码行数:23,代码来源:compute_vessel_metrics.py

示例7: get_data

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def get_data(self, type, station_ids, n_years):
        query = """
            SELECT station_number, year, month, day, {type} as value, rain, snow
            FROM `publicdata.samples.gsod`
            WHERE station_number IN ({stns})
            AND year < 2010
            AND year >= {minyr}
        """.format(
            type=type,
            stns=','.join(station_ids),
            minyr=2010 - n_years
        )

        df = pd.read_gbq(query, project_id=PROJECT_ID, dialect='standard')
        df['date'] = pd.to_datetime(df[['year', 'month', 'day']])

        stations_df = pd.DataFrame({
            'location': stations.keys(),
            'station_number': [int(v) for v in stations.values()]
        })
        df = pd.merge(df, stations_df, on='station_number')
        return df 
开发者ID:adamhajari,项目名称:spyre,代码行数:24,代码来源:weatherhistoryapp.py

示例8: test_read_gbq_without_dialect_warns_future_change

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def test_read_gbq_without_dialect_warns_future_change(monkeypatch):
    # Default dialect is changing to standard SQL. See:
    # https://github.com/pydata/pandas-gbq/issues/195

    def mock_read_gbq(*args, **kwargs):
        return DataFrame([[1.0]])

    monkeypatch.setattr(pandas_gbq, 'read_gbq', mock_read_gbq)
    with tm.assert_produces_warning(FutureWarning):
        pd.read_gbq("SELECT 1") 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:12,代码来源:test_gbq.py

示例9: test_roundtrip

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def test_roundtrip(self):
        destination_table = DESTINATION_TABLE + "1"

        test_size = 20001
        df = make_mixed_dataframe_v2(test_size)

        df.to_gbq(destination_table, _get_project_id(), chunksize=None,
                  credentials=_get_credentials())

        result = pd.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}"
                             .format(destination_table),
                             project_id=_get_project_id(),
                             credentials=_get_credentials(),
                             dialect="standard")
        assert result['num_rows'][0] == test_size 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:17,代码来源:test_gbq.py

示例10: _run

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def _run(query, dialect='legacy'):
    return pd.read_gbq(
        query,
        project_id=os.environ['GOOGLE_PROJECT_ID'],
        private_key=os.environ['GOOGLE_APPLICATION_CREDENTIALS'],
        dialect=dialect
    ) 
开发者ID:GoogleCloudPlatform,项目名称:bigquery-bokeh-dashboard,代码行数:9,代码来源:utils.py

示例11: test_pandas_gbq_query_bqstorage

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def test_pandas_gbq_query_bqstorage():
    # [START bigquery_migration_pandas_gbq_query_bqstorage]
    import pandas

    sql = "SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012`"

    # Use the BigQuery Storage API to download results more quickly.
    df = pandas.read_gbq(sql, dialect='standard', use_bqstorage_api=True)
    # [END bigquery_migration_pandas_gbq_query_bqstorage]
    assert len(df) > 0 
开发者ID:GoogleCloudPlatform,项目名称:python-docs-samples,代码行数:12,代码来源:samples_test.py

示例12: test_pandas_gbq_legacy_query

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def test_pandas_gbq_legacy_query():
    # [START bigquery_migration_pandas_gbq_query_legacy]
    import pandas

    sql = """
        SELECT name
        FROM [bigquery-public-data:usa_names.usa_1910_current]
        WHERE state = 'TX'
        LIMIT 100
    """

    df = pandas.read_gbq(sql, dialect='legacy')
    # [END bigquery_migration_pandas_gbq_query_legacy]
    assert len(df) > 0 
开发者ID:GoogleCloudPlatform,项目名称:python-docs-samples,代码行数:16,代码来源:samples_test.py

示例13: get_reddit_data

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def get_reddit_data(project_id, subreddits, start_month, end_month, max_posts):
    query = '''
    # standardSQL
    SELECT
    title,
    subreddit AS context_label
    FROM (
    SELECT
        title,
        subreddit,
        ROW_NUMBER() OVER (PARTITION BY subreddit ORDER BY score DESC)
        AS rank_num
    FROM
        `fh-bigquery.reddit_posts.*`
    WHERE
        _TABLE_SUFFIX BETWEEN "{}" AND "{}"
        AND LOWER(subreddit) IN ({})
        )
    WHERE
    rank_num <= {}
    '''

    query = query.format(start_month,
                         end_month,
                         str([x.lower() for x in subreddits])[1:-1],
                         max_posts)

    df = pd.read_gbq(query, project_id, dialect='standard')
    return df 
开发者ID:minimaxir,项目名称:subreddit-generator,代码行数:31,代码来源:query.py

示例14: read_vessel_database_for_char_mmsi

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def read_vessel_database_for_char_mmsi(dbname, dataset):
    query = '''
      with multi_id as (
        select identity.ssvid as id
        from {dbname}
        group by id
        having count(identity.ssvid) > 1
      )
      
      select identity.ssvid as id, 
             feature.length_m as length, 
             feature.tonnage_gt as tonnage, 
             feature.engine_power_kw as engine_power, 
             feature.crew as crew_size,
             array_to_string(feature.geartype, '|') as label
      from {dbname} a
      where (feature.length_m is not null or 
            feature.tonnage_gt is not null or 
            feature.engine_power_kw is not null or 
            feature.crew is not null or
            (feature.geartype is not null and array_length(feature.geartype) > 0)) and
            identity.ssvid not in (select * from multi_id)
            order by id
    '''.format(**locals())
    try:
        return pd.read_gbq(query, dialect='standard', project_id='world-fishing-827')
    except:
        print(query)
        raise 
开发者ID:GlobalFishingWatch,项目名称:vessel-classification,代码行数:31,代码来源:create_train_info.py

示例15: load_inferred_fishing

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def load_inferred_fishing(table, id_list, project_id, threshold=True):
    """Load inferred data and generate comparison data

    """
    query_template = """
    SELECT vessel_id as id, start_time, end_time, nnet_score FROM 
        TABLE_DATE_RANGE([{table}],
            TIMESTAMP('{year}-01-01'), TIMESTAMP('{year}-12-31'))
        WHERE vessel_id in ({ids})
    """
    ids = ','.join('"{}"'.format(x) for x in id_list)
    ranges = defaultdict(list)
    for year in range(2012, 2019):
        query = query_template.format(table=table, year=year, ids=ids)
        try:
            df = pd.read_gbq(query, project_id=project_id, dialect='legacy')
        except pandas_gbq.gbq.GenericGBQException as err:
            if 'matches no table' in err.args[0]:
                print('skipping', year)
                continue
            else:
                print(query)
                raise
        for x in df.itertuples():
            score = x.nnet_score
            if threshold:
                score = score > 0.5
            start = x.start_time.replace(tzinfo=pytz.utc)
            end = x.end_time.replace(tzinfo=pytz.utc)
            ranges[x.id].append(FishingRange(score, start, end))
    return ranges 
开发者ID:GlobalFishingWatch,项目名称:vessel-classification,代码行数:33,代码来源:compute_fishing_metrics.py


注:本文中的pandas.read_gbq方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。