本文整理汇总了Python中pandas.read_gbq方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.read_gbq方法的具体用法?Python pandas.read_gbq怎么用?Python pandas.read_gbq使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas
的用法示例。
在下文中一共展示了pandas.read_gbq方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: read_df_from_bigquery
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def read_df_from_bigquery(full_table_path, project_id=None, num_samples=None):
"""Read data from BigQuery and split into train and validation sets.
Args:
full_table_path: (string) full path of the table containing training data
in the format of [project_id.dataset_name.table_name].
project_id: (string, Optional) Google BigQuery Account project ID.
num_samples: (int, Optional) Number of data samples to read.
Returns:
pandas.DataFrame
"""
query = metadata.BASE_QUERY.format(table=full_table_path)
limit = ' LIMIT {}'.format(num_samples) if num_samples else ''
query += limit
# Use "application default credentials"
# Use SQL syntax dialect
data_df = pd.read_gbq(query, project_id=project_id, dialect='standard')
return data_df
示例2: test_roundtrip
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def test_roundtrip(self):
destination_table = DESTINATION_TABLE + "1"
test_size = 20001
df = make_mixed_dataframe_v2(test_size)
df.to_gbq(destination_table, _get_project_id(), chunksize=10000,
private_key=_get_private_key_path())
sleep(30) # <- Curses Google!!!
result = pd.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}"
.format(destination_table),
project_id=_get_project_id(),
private_key=_get_private_key_path())
assert result['num_rows'][0] == test_size
示例3: test_pandas_gbq_query
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def test_pandas_gbq_query():
# [START bigquery_migration_pandas_gbq_query]
import pandas
sql = """
SELECT name
FROM `bigquery-public-data.usa_names.usa_1910_current`
WHERE state = 'TX'
LIMIT 100
"""
# Run a Standard SQL query using the environment's default project
df = pandas.read_gbq(sql, dialect='standard')
# Run a Standard SQL query with the project set explicitly
project_id = 'your-project-id'
# [END bigquery_migration_pandas_gbq_query]
assert len(df) > 0
project_id = os.environ['GOOGLE_CLOUD_PROJECT']
# [START bigquery_migration_pandas_gbq_query]
df = pandas.read_gbq(sql, project_id=project_id, dialect='standard')
# [END bigquery_migration_pandas_gbq_query]
assert len(df) > 0
示例4: test_pandas_gbq_query_with_parameters
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def test_pandas_gbq_query_with_parameters():
# [START bigquery_migration_pandas_gbq_query_parameters]
import pandas
sql = """
SELECT name
FROM `bigquery-public-data.usa_names.usa_1910_current`
WHERE state = @state
LIMIT @limit
"""
query_config = {
'query': {
'parameterMode': 'NAMED',
'queryParameters': [
{
'name': 'state',
'parameterType': {'type': 'STRING'},
'parameterValue': {'value': 'TX'}
},
{
'name': 'limit',
'parameterType': {'type': 'INTEGER'},
'parameterValue': {'value': 100}
}
]
}
}
df = pandas.read_gbq(sql, configuration=query_config)
# [END bigquery_migration_pandas_gbq_query_parameters]
assert len(df) > 0
示例5: load_inferred
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def load_inferred(inference_table, label_table, extractors):
"""Load inferred data and generate comparison data
"""
query = """
SELECT inference_table.* except (ssvid), ssvid as id FROM
`{}` label_table
JOIN
`{}*` inference_table
ON (cast(label_table.id as string) = inference_table.ssvid)
where split = "Test"
""".format(label_table, inference_table)
print(query)
df = pd.read_gbq(query, project_id='world-fishing-827', dialect='standard')
for row in df.itertuples():
for ext in extractors:
ext.extract(row)
for ext in extractors:
ext.finalize()
示例6: load_class_weights
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def load_class_weights(inference_table):
query = '''
with
core as (
select * from `{}*`
where max_label is not null
),
count as (
select count(*) as total from core
)
select max_label as label, count(*) / total as fraction
from core
cross join count
group by label, total
order by fraction desc
'''.format(inference_table)
df = pd.read_gbq(query, project_id='world-fishing-827', dialect='standard')
wt_map = {x.label : x.fraction for x in df.itertuples()}
return wt_map
示例7: get_data
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def get_data(self, type, station_ids, n_years):
query = """
SELECT station_number, year, month, day, {type} as value, rain, snow
FROM `publicdata.samples.gsod`
WHERE station_number IN ({stns})
AND year < 2010
AND year >= {minyr}
""".format(
type=type,
stns=','.join(station_ids),
minyr=2010 - n_years
)
df = pd.read_gbq(query, project_id=PROJECT_ID, dialect='standard')
df['date'] = pd.to_datetime(df[['year', 'month', 'day']])
stations_df = pd.DataFrame({
'location': stations.keys(),
'station_number': [int(v) for v in stations.values()]
})
df = pd.merge(df, stations_df, on='station_number')
return df
示例8: test_read_gbq_without_dialect_warns_future_change
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def test_read_gbq_without_dialect_warns_future_change(monkeypatch):
# Default dialect is changing to standard SQL. See:
# https://github.com/pydata/pandas-gbq/issues/195
def mock_read_gbq(*args, **kwargs):
return DataFrame([[1.0]])
monkeypatch.setattr(pandas_gbq, 'read_gbq', mock_read_gbq)
with tm.assert_produces_warning(FutureWarning):
pd.read_gbq("SELECT 1")
示例9: test_roundtrip
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def test_roundtrip(self):
destination_table = DESTINATION_TABLE + "1"
test_size = 20001
df = make_mixed_dataframe_v2(test_size)
df.to_gbq(destination_table, _get_project_id(), chunksize=None,
credentials=_get_credentials())
result = pd.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}"
.format(destination_table),
project_id=_get_project_id(),
credentials=_get_credentials(),
dialect="standard")
assert result['num_rows'][0] == test_size
示例10: _run
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def _run(query, dialect='legacy'):
return pd.read_gbq(
query,
project_id=os.environ['GOOGLE_PROJECT_ID'],
private_key=os.environ['GOOGLE_APPLICATION_CREDENTIALS'],
dialect=dialect
)
示例11: test_pandas_gbq_query_bqstorage
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def test_pandas_gbq_query_bqstorage():
# [START bigquery_migration_pandas_gbq_query_bqstorage]
import pandas
sql = "SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012`"
# Use the BigQuery Storage API to download results more quickly.
df = pandas.read_gbq(sql, dialect='standard', use_bqstorage_api=True)
# [END bigquery_migration_pandas_gbq_query_bqstorage]
assert len(df) > 0
示例12: test_pandas_gbq_legacy_query
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def test_pandas_gbq_legacy_query():
# [START bigquery_migration_pandas_gbq_query_legacy]
import pandas
sql = """
SELECT name
FROM [bigquery-public-data:usa_names.usa_1910_current]
WHERE state = 'TX'
LIMIT 100
"""
df = pandas.read_gbq(sql, dialect='legacy')
# [END bigquery_migration_pandas_gbq_query_legacy]
assert len(df) > 0
示例13: get_reddit_data
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def get_reddit_data(project_id, subreddits, start_month, end_month, max_posts):
query = '''
# standardSQL
SELECT
title,
subreddit AS context_label
FROM (
SELECT
title,
subreddit,
ROW_NUMBER() OVER (PARTITION BY subreddit ORDER BY score DESC)
AS rank_num
FROM
`fh-bigquery.reddit_posts.*`
WHERE
_TABLE_SUFFIX BETWEEN "{}" AND "{}"
AND LOWER(subreddit) IN ({})
)
WHERE
rank_num <= {}
'''
query = query.format(start_month,
end_month,
str([x.lower() for x in subreddits])[1:-1],
max_posts)
df = pd.read_gbq(query, project_id, dialect='standard')
return df
示例14: read_vessel_database_for_char_mmsi
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def read_vessel_database_for_char_mmsi(dbname, dataset):
query = '''
with multi_id as (
select identity.ssvid as id
from {dbname}
group by id
having count(identity.ssvid) > 1
)
select identity.ssvid as id,
feature.length_m as length,
feature.tonnage_gt as tonnage,
feature.engine_power_kw as engine_power,
feature.crew as crew_size,
array_to_string(feature.geartype, '|') as label
from {dbname} a
where (feature.length_m is not null or
feature.tonnage_gt is not null or
feature.engine_power_kw is not null or
feature.crew is not null or
(feature.geartype is not null and array_length(feature.geartype) > 0)) and
identity.ssvid not in (select * from multi_id)
order by id
'''.format(**locals())
try:
return pd.read_gbq(query, dialect='standard', project_id='world-fishing-827')
except:
print(query)
raise
示例15: load_inferred_fishing
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_gbq [as 别名]
def load_inferred_fishing(table, id_list, project_id, threshold=True):
"""Load inferred data and generate comparison data
"""
query_template = """
SELECT vessel_id as id, start_time, end_time, nnet_score FROM
TABLE_DATE_RANGE([{table}],
TIMESTAMP('{year}-01-01'), TIMESTAMP('{year}-12-31'))
WHERE vessel_id in ({ids})
"""
ids = ','.join('"{}"'.format(x) for x in id_list)
ranges = defaultdict(list)
for year in range(2012, 2019):
query = query_template.format(table=table, year=year, ids=ids)
try:
df = pd.read_gbq(query, project_id=project_id, dialect='legacy')
except pandas_gbq.gbq.GenericGBQException as err:
if 'matches no table' in err.args[0]:
print('skipping', year)
continue
else:
print(query)
raise
for x in df.itertuples():
score = x.nnet_score
if threshold:
score = score > 0.5
start = x.start_time.replace(tzinfo=pytz.utc)
end = x.end_time.replace(tzinfo=pytz.utc)
ranges[x.id].append(FishingRange(score, start, end))
return ranges