本文整理汇总了Python中airflow.hooks.hive_hooks.HiveMetastoreHook.get_table方法的典型用法代码示例。如果您正苦于以下问题:Python HiveMetastoreHook.get_table方法的具体用法?Python HiveMetastoreHook.get_table怎么用?Python HiveMetastoreHook.get_table使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类airflow.hooks.hive_hooks.HiveMetastoreHook
的用法示例。
在下文中一共展示了HiveMetastoreHook.get_table方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: execute
# 需要导入模块: from airflow.hooks.hive_hooks import HiveMetastoreHook [as 别名]
# 或者: from airflow.hooks.hive_hooks.HiveMetastoreHook import get_table [as 别名]
def execute(self, context):
hive = HiveCliHook(hive_cli_conn_id=self.hive_cli_conn_id)
logging.info("Extracting data from Hive")
hive_table = "druid." + context["task_instance_key_str"].replace(".", "_")
sql = self.sql.strip().strip(";")
hql = """\
set mapred.output.compress=false;
set hive.exec.compress.output=false;
DROP TABLE IF EXISTS {hive_table};
CREATE TABLE {hive_table}
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE
TBLPROPERTIES ('serialization.null.format' = '')
AS
{sql}
""".format(
**locals()
)
logging.info("Running command:\n {}".format(hql))
hive.run_cli(hql)
m = HiveMetastoreHook(self.metastore_conn_id)
t = m.get_table(hive_table)
columns = [col.name for col in t.sd.cols]
hdfs_uri = m.get_table(hive_table).sd.location
pos = hdfs_uri.find("/user")
static_path = hdfs_uri[pos:]
schema, table = hive_table.split(".")
druid = DruidHook(druid_ingest_conn_id=self.druid_ingest_conn_id)
logging.info("Inserting rows into Druid")
logging.info("HDFS path: " + static_path)
try:
druid.load_from_hdfs(
datasource=self.druid_datasource,
intervals=self.intervals,
static_path=static_path,
ts_dim=self.ts_dim,
columns=columns,
num_shards=self.num_shards,
target_partition_size=self.target_partition_size,
query_granularity=self.query_granularity,
segment_granularity=self.segment_granularity,
metric_spec=self.metric_spec,
hadoop_dependency_coordinates=self.hadoop_dependency_coordinates,
)
logging.info("Load seems to have succeeded!")
finally:
logging.info("Cleaning up by dropping the temp " "Hive table {}".format(hive_table))
hql = "DROP TABLE IF EXISTS {}".format(hive_table)
hive.run_cli(hql)
示例2: execute
# 需要导入模块: from airflow.hooks.hive_hooks import HiveMetastoreHook [as 别名]
# 或者: from airflow.hooks.hive_hooks.HiveMetastoreHook import get_table [as 别名]
def execute(self, context):
hive = HiveCliHook(hive_cli_conn_id=self.hive_cli_conn_id)
self.log.info("Extracting data from Hive")
hive_table = 'druid.' + context['task_instance_key_str'].replace('.', '_')
sql = self.sql.strip().strip(';')
tblproperties = ''.join([", '{}' = '{}'"
.format(k, v)
for k, v in self.hive_tblproperties.items()])
hql = """\
SET mapred.output.compress=false;
SET hive.exec.compress.output=false;
DROP TABLE IF EXISTS {hive_table};
CREATE TABLE {hive_table}
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE
TBLPROPERTIES ('serialization.null.format' = ''{tblproperties})
AS
{sql}
""".format(hive_table=hive_table, tblproperties=tblproperties, sql=sql)
self.log.info("Running command:\n %s", hql)
hive.run_cli(hql)
m = HiveMetastoreHook(self.metastore_conn_id)
# Get the Hive table and extract the columns
t = m.get_table(hive_table)
columns = [col.name for col in t.sd.cols]
# Get the path on hdfs
static_path = m.get_table(hive_table).sd.location
schema, table = hive_table.split('.')
druid = DruidHook(druid_ingest_conn_id=self.druid_ingest_conn_id)
try:
index_spec = self.construct_ingest_query(
static_path=static_path,
columns=columns,
)
self.log.info("Inserting rows into Druid, hdfs path: %s", static_path)
druid.submit_indexing_job(index_spec)
self.log.info("Load seems to have succeeded!")
finally:
self.log.info(
"Cleaning up by dropping the temp Hive table %s",
hive_table
)
hql = "DROP TABLE IF EXISTS {}".format(hive_table)
hive.run_cli(hql)
示例3: table
# 需要导入模块: from airflow.hooks.hive_hooks import HiveMetastoreHook [as 别名]
# 或者: from airflow.hooks.hive_hooks.HiveMetastoreHook import get_table [as 别名]
def table(self):
table_name = request.args.get("table")
m = HiveMetastoreHook(METASTORE_CONN_ID)
table = m.get_table(table_name)
return self.render(
"metastore_browser/table.html",
table=table, table_name=table_name, datetime=datetime, int=int)
示例4: execute
# 需要导入模块: from airflow.hooks.hive_hooks import HiveMetastoreHook [as 别名]
# 或者: from airflow.hooks.hive_hooks.HiveMetastoreHook import get_table [as 别名]
def execute(self, context=None):
metastore = HiveMetastoreHook(metastore_conn_id=self.metastore_conn_id)
table = metastore.get_table(table_name=self.table)
field_types = {col.name: col.type for col in table.sd.cols}
exprs = {
('', 'count'): 'COUNT(*)'
}
for col, col_type in list(field_types.items()):
d = {}
if self.assignment_func:
d = self.assignment_func(col, col_type)
if d is None:
d = self.get_default_exprs(col, col_type)
else:
d = self.get_default_exprs(col, col_type)
exprs.update(d)
exprs.update(self.extra_exprs)
exprs = OrderedDict(exprs)
exprs_str = ",\n ".join([
v + " AS " + k[0] + '__' + k[1]
for k, v in exprs.items()])
where_clause = ["{} = '{}'".format(k, v) for k, v in self.partition.items()]
where_clause = " AND\n ".join(where_clause)
sql = "SELECT {exprs_str} FROM {table} WHERE {where_clause};".format(
exprs_str=exprs_str, table=self.table, where_clause=where_clause)
presto = PrestoHook(presto_conn_id=self.presto_conn_id)
self.log.info('Executing SQL check: %s', sql)
row = presto.get_first(hql=sql)
self.log.info("Record: %s", row)
if not row:
raise AirflowException("The query returned None")
part_json = json.dumps(self.partition, sort_keys=True)
self.log.info("Deleting rows from previous runs if they exist")
mysql = MySqlHook(self.mysql_conn_id)
sql = """
SELECT 1 FROM hive_stats
WHERE
table_name='{table}' AND
partition_repr='{part_json}' AND
dttm='{dttm}'
LIMIT 1;
""".format(table=self.table, part_json=part_json, dttm=self.dttm)
if mysql.get_records(sql):
sql = """
DELETE FROM hive_stats
WHERE
table_name='{table}' AND
partition_repr='{part_json}' AND
dttm='{dttm}';
""".format(table=self.table, part_json=part_json, dttm=self.dttm)
mysql.run(sql)
self.log.info("Pivoting and loading cells into the Airflow db")
rows = [(self.ds, self.dttm, self.table, part_json) + (r[0][0], r[0][1], r[1])
for r in zip(exprs, row)]
mysql.insert_rows(
table='hive_stats',
rows=rows,
target_fields=[
'ds',
'dttm',
'table_name',
'partition_repr',
'col',
'metric',
'value',
]
)