当前位置: 首页>>代码示例>>Python>>正文


Python HiveMetastoreHook.get_table方法代码示例

本文整理汇总了Python中airflow.hooks.hive_hooks.HiveMetastoreHook.get_table方法的典型用法代码示例。如果您正苦于以下问题:Python HiveMetastoreHook.get_table方法的具体用法?Python HiveMetastoreHook.get_table怎么用?Python HiveMetastoreHook.get_table使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在airflow.hooks.hive_hooks.HiveMetastoreHook的用法示例。


在下文中一共展示了HiveMetastoreHook.get_table方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: execute

# 需要导入模块: from airflow.hooks.hive_hooks import HiveMetastoreHook [as 别名]
# 或者: from airflow.hooks.hive_hooks.HiveMetastoreHook import get_table [as 别名]
    def execute(self, context):
        hive = HiveCliHook(hive_cli_conn_id=self.hive_cli_conn_id)
        logging.info("Extracting data from Hive")
        hive_table = "druid." + context["task_instance_key_str"].replace(".", "_")
        sql = self.sql.strip().strip(";")
        hql = """\
        set mapred.output.compress=false;
        set hive.exec.compress.output=false;
        DROP TABLE IF EXISTS {hive_table};
        CREATE TABLE {hive_table}
        ROW FORMAT DELIMITED FIELDS TERMINATED BY  '\t'
        STORED AS TEXTFILE
        TBLPROPERTIES ('serialization.null.format' = '')
        AS
        {sql}
        """.format(
            **locals()
        )
        logging.info("Running command:\n {}".format(hql))
        hive.run_cli(hql)

        m = HiveMetastoreHook(self.metastore_conn_id)
        t = m.get_table(hive_table)

        columns = [col.name for col in t.sd.cols]

        hdfs_uri = m.get_table(hive_table).sd.location
        pos = hdfs_uri.find("/user")
        static_path = hdfs_uri[pos:]

        schema, table = hive_table.split(".")

        druid = DruidHook(druid_ingest_conn_id=self.druid_ingest_conn_id)
        logging.info("Inserting rows into Druid")
        logging.info("HDFS path: " + static_path)

        try:
            druid.load_from_hdfs(
                datasource=self.druid_datasource,
                intervals=self.intervals,
                static_path=static_path,
                ts_dim=self.ts_dim,
                columns=columns,
                num_shards=self.num_shards,
                target_partition_size=self.target_partition_size,
                query_granularity=self.query_granularity,
                segment_granularity=self.segment_granularity,
                metric_spec=self.metric_spec,
                hadoop_dependency_coordinates=self.hadoop_dependency_coordinates,
            )
            logging.info("Load seems to have succeeded!")
        finally:
            logging.info("Cleaning up by dropping the temp " "Hive table {}".format(hive_table))
            hql = "DROP TABLE IF EXISTS {}".format(hive_table)
            hive.run_cli(hql)
开发者ID:asnir,项目名称:airflow,代码行数:57,代码来源:hive_to_druid.py

示例2: execute

# 需要导入模块: from airflow.hooks.hive_hooks import HiveMetastoreHook [as 别名]
# 或者: from airflow.hooks.hive_hooks.HiveMetastoreHook import get_table [as 别名]
    def execute(self, context):
        hive = HiveCliHook(hive_cli_conn_id=self.hive_cli_conn_id)
        self.log.info("Extracting data from Hive")
        hive_table = 'druid.' + context['task_instance_key_str'].replace('.', '_')
        sql = self.sql.strip().strip(';')
        tblproperties = ''.join([", '{}' = '{}'"
                                .format(k, v)
                                 for k, v in self.hive_tblproperties.items()])
        hql = """\
        SET mapred.output.compress=false;
        SET hive.exec.compress.output=false;
        DROP TABLE IF EXISTS {hive_table};
        CREATE TABLE {hive_table}
        ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
        STORED AS TEXTFILE
        TBLPROPERTIES ('serialization.null.format' = ''{tblproperties})
        AS
        {sql}
        """.format(hive_table=hive_table, tblproperties=tblproperties, sql=sql)
        self.log.info("Running command:\n %s", hql)
        hive.run_cli(hql)

        m = HiveMetastoreHook(self.metastore_conn_id)

        # Get the Hive table and extract the columns
        t = m.get_table(hive_table)
        columns = [col.name for col in t.sd.cols]

        # Get the path on hdfs
        static_path = m.get_table(hive_table).sd.location

        schema, table = hive_table.split('.')

        druid = DruidHook(druid_ingest_conn_id=self.druid_ingest_conn_id)

        try:
            index_spec = self.construct_ingest_query(
                static_path=static_path,
                columns=columns,
            )

            self.log.info("Inserting rows into Druid, hdfs path: %s", static_path)

            druid.submit_indexing_job(index_spec)

            self.log.info("Load seems to have succeeded!")
        finally:
            self.log.info(
                "Cleaning up by dropping the temp Hive table %s",
                hive_table
            )
            hql = "DROP TABLE IF EXISTS {}".format(hive_table)
            hive.run_cli(hql)
开发者ID:Fokko,项目名称:incubator-airflow,代码行数:55,代码来源:hive_to_druid.py

示例3: table

# 需要导入模块: from airflow.hooks.hive_hooks import HiveMetastoreHook [as 别名]
# 或者: from airflow.hooks.hive_hooks.HiveMetastoreHook import get_table [as 别名]
 def table(self):
     table_name = request.args.get("table")
     m = HiveMetastoreHook(METASTORE_CONN_ID)
     table = m.get_table(table_name)
     return self.render(
         "metastore_browser/table.html",
         table=table, table_name=table_name, datetime=datetime, int=int)
开发者ID:AdamUnger,项目名称:incubator-airflow,代码行数:9,代码来源:main.py

示例4: execute

# 需要导入模块: from airflow.hooks.hive_hooks import HiveMetastoreHook [as 别名]
# 或者: from airflow.hooks.hive_hooks.HiveMetastoreHook import get_table [as 别名]
    def execute(self, context=None):
        metastore = HiveMetastoreHook(metastore_conn_id=self.metastore_conn_id)
        table = metastore.get_table(table_name=self.table)
        field_types = {col.name: col.type for col in table.sd.cols}

        exprs = {
            ('', 'count'): 'COUNT(*)'
        }
        for col, col_type in list(field_types.items()):
            d = {}
            if self.assignment_func:
                d = self.assignment_func(col, col_type)
                if d is None:
                    d = self.get_default_exprs(col, col_type)
            else:
                d = self.get_default_exprs(col, col_type)
            exprs.update(d)
        exprs.update(self.extra_exprs)
        exprs = OrderedDict(exprs)
        exprs_str = ",\n        ".join([
            v + " AS " + k[0] + '__' + k[1]
            for k, v in exprs.items()])

        where_clause = ["{} = '{}'".format(k, v) for k, v in self.partition.items()]
        where_clause = " AND\n        ".join(where_clause)
        sql = "SELECT {exprs_str} FROM {table} WHERE {where_clause};".format(
            exprs_str=exprs_str, table=self.table, where_clause=where_clause)

        presto = PrestoHook(presto_conn_id=self.presto_conn_id)
        self.log.info('Executing SQL check: %s', sql)
        row = presto.get_first(hql=sql)
        self.log.info("Record: %s", row)
        if not row:
            raise AirflowException("The query returned None")

        part_json = json.dumps(self.partition, sort_keys=True)

        self.log.info("Deleting rows from previous runs if they exist")
        mysql = MySqlHook(self.mysql_conn_id)
        sql = """
        SELECT 1 FROM hive_stats
        WHERE
            table_name='{table}' AND
            partition_repr='{part_json}' AND
            dttm='{dttm}'
        LIMIT 1;
        """.format(table=self.table, part_json=part_json, dttm=self.dttm)
        if mysql.get_records(sql):
            sql = """
            DELETE FROM hive_stats
            WHERE
                table_name='{table}' AND
                partition_repr='{part_json}' AND
                dttm='{dttm}';
            """.format(table=self.table, part_json=part_json, dttm=self.dttm)
            mysql.run(sql)

        self.log.info("Pivoting and loading cells into the Airflow db")
        rows = [(self.ds, self.dttm, self.table, part_json) + (r[0][0], r[0][1], r[1])
                for r in zip(exprs, row)]
        mysql.insert_rows(
            table='hive_stats',
            rows=rows,
            target_fields=[
                'ds',
                'dttm',
                'table_name',
                'partition_repr',
                'col',
                'metric',
                'value',
            ]
        )
开发者ID:Fokko,项目名称:incubator-airflow,代码行数:75,代码来源:hive_stats_operator.py


注:本文中的airflow.hooks.hive_hooks.HiveMetastoreHook.get_table方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。