当前位置: 首页>>代码示例>>Python>>正文


Python hive_hooks.HiveMetastoreHook类代码示例

本文整理汇总了Python中airflow.hooks.hive_hooks.HiveMetastoreHook的典型用法代码示例。如果您正苦于以下问题:Python HiveMetastoreHook类的具体用法?Python HiveMetastoreHook怎么用?Python HiveMetastoreHook使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了HiveMetastoreHook类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: execute

    def execute(self, context):
        hive = HiveCliHook(hive_cli_conn_id=self.hive_cli_conn_id)
        logging.info("Extracting data from Hive")
        hive_table = "druid." + context["task_instance_key_str"].replace(".", "_")
        sql = self.sql.strip().strip(";")
        hql = """\
        set mapred.output.compress=false;
        set hive.exec.compress.output=false;
        DROP TABLE IF EXISTS {hive_table};
        CREATE TABLE {hive_table}
        ROW FORMAT DELIMITED FIELDS TERMINATED BY  '\t'
        STORED AS TEXTFILE
        TBLPROPERTIES ('serialization.null.format' = '')
        AS
        {sql}
        """.format(
            **locals()
        )
        logging.info("Running command:\n {}".format(hql))
        hive.run_cli(hql)

        m = HiveMetastoreHook(self.metastore_conn_id)
        t = m.get_table(hive_table)

        columns = [col.name for col in t.sd.cols]

        hdfs_uri = m.get_table(hive_table).sd.location
        pos = hdfs_uri.find("/user")
        static_path = hdfs_uri[pos:]

        schema, table = hive_table.split(".")

        druid = DruidHook(druid_ingest_conn_id=self.druid_ingest_conn_id)
        logging.info("Inserting rows into Druid")
        logging.info("HDFS path: " + static_path)

        try:
            druid.load_from_hdfs(
                datasource=self.druid_datasource,
                intervals=self.intervals,
                static_path=static_path,
                ts_dim=self.ts_dim,
                columns=columns,
                num_shards=self.num_shards,
                target_partition_size=self.target_partition_size,
                query_granularity=self.query_granularity,
                segment_granularity=self.segment_granularity,
                metric_spec=self.metric_spec,
                hadoop_dependency_coordinates=self.hadoop_dependency_coordinates,
            )
            logging.info("Load seems to have succeeded!")
        finally:
            logging.info("Cleaning up by dropping the temp " "Hive table {}".format(hive_table))
            hql = "DROP TABLE IF EXISTS {}".format(hive_table)
            hive.run_cli(hql)
开发者ID:asnir,项目名称:airflow,代码行数:55,代码来源:hive_to_druid.py

示例2: max_partition

def max_partition(
        table, schema="default", field=None, filter_map=None,
        metastore_conn_id='metastore_default'):
    """
    Gets the max partition for a table.

    :param schema: The hive schema the table lives in
    :type schema: str
    :param table: The hive table you are interested in, supports the dot
        notation as in "my_database.my_table", if a dot is found,
        the schema param is disregarded
    :type table: str
    :param metastore_conn_id: The hive connection you are interested in.
        If your default is set you don't need to use this parameter.
    :type metastore_conn_id: str
    :param filter_map: partition_key:partition_value map used for partition filtering,
                       e.g. {'key1': 'value1', 'key2': 'value2'}.
                       Only partitions matching all partition_key:partition_value
                       pairs will be considered as candidates of max partition.
    :type filter_map: map
    :param field: the field to get the max value from. If there's only
        one partition field, this will be inferred
    :type field: str

    >>> max_partition('airflow.static_babynames_partitioned')
    '2015-01-01'
    """
    from airflow.hooks.hive_hooks import HiveMetastoreHook
    if '.' in table:
        schema, table = table.split('.')
    hh = HiveMetastoreHook(metastore_conn_id=metastore_conn_id)
    return hh.max_partition(
        schema=schema, table_name=table, field=field, filter_map=filter_map)
开发者ID:Fokko,项目名称:incubator-airflow,代码行数:33,代码来源:hive.py

示例3: test_get_max_partition_from_valid_part_specs_and_invalid_filter_map

 def test_get_max_partition_from_valid_part_specs_and_invalid_filter_map(self):
     with self.assertRaises(AirflowException):
         HiveMetastoreHook._get_max_partition_from_part_specs(
             [{'key1': 'value1', 'key2': 'value2'},
              {'key1': 'value3', 'key2': 'value4'}],
             'key1',
             {'key3': 'value5'})
开发者ID:AdamUnger,项目名称:incubator-airflow,代码行数:7,代码来源:test_hive_hook.py

示例4: max_partition

def max_partition(
        table, schema="default", field=None, filter=None,
        metastore_conn_id='metastore_default'):
    '''
    Gets the max partition for a table.

    :param schema: The hive schema the table lives in
    :type schema: string
    :param table: The hive table you are interested in, supports the dot
        notation as in "my_database.my_table", if a dot is found,
        the schema param is disregarded
    :type table: string
    :param hive_conn_id: The hive connection you are interested in.
        If your default is set you don't need to use this parameter.
    :type hive_conn_id: string
    :param filter: filter on a subset of partition as in
        `sub_part='specific_value'`
    :type filter: string
    :param field: the field to get the max value from. If there's only
        one partition field, this will be inferred

    >>> max_partition('airflow.static_babynames_partitioned')
    '2015-01-01'
    '''
    from airflow.hooks.hive_hooks import HiveMetastoreHook
    if '.' in table:
        schema, table = table.split('.')
    hh = HiveMetastoreHook(metastore_conn_id=metastore_conn_id)
    return hh.max_partition(
        schema=schema, table_name=table, field=field, filter=filter)
开发者ID:AndreiDev,项目名称:incubator-airflow,代码行数:30,代码来源:hive.py

示例5: test_get_max_partition_from_valid_part_specs_and_none_partition_key

 def test_get_max_partition_from_valid_part_specs_and_none_partition_key(self):
     with self.assertRaises(AirflowException):
         HiveMetastoreHook._get_max_partition_from_part_specs(
             [{'key1': 'value1', 'key2': 'value2'},
              {'key1': 'value3', 'key2': 'value4'}],
             None,
             self.VALID_FILTER_MAP)
开发者ID:AdamUnger,项目名称:incubator-airflow,代码行数:7,代码来源:test_hive_hook.py

示例6: table

 def table(self):
     table_name = request.args.get("table")
     m = HiveMetastoreHook(METASTORE_CONN_ID)
     table = m.get_table(table_name)
     return self.render(
         "metastore_browser/table.html",
         table=table, table_name=table_name, datetime=datetime, int=int)
开发者ID:AdamUnger,项目名称:incubator-airflow,代码行数:7,代码来源:main.py

示例7: execute

    def execute(self, context):
        hive = HiveCliHook(hive_cli_conn_id=self.hive_cli_conn_id)
        self.log.info("Extracting data from Hive")
        hive_table = 'druid.' + context['task_instance_key_str'].replace('.', '_')
        sql = self.sql.strip().strip(';')
        tblproperties = ''.join([", '{}' = '{}'"
                                .format(k, v)
                                 for k, v in self.hive_tblproperties.items()])
        hql = """\
        SET mapred.output.compress=false;
        SET hive.exec.compress.output=false;
        DROP TABLE IF EXISTS {hive_table};
        CREATE TABLE {hive_table}
        ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
        STORED AS TEXTFILE
        TBLPROPERTIES ('serialization.null.format' = ''{tblproperties})
        AS
        {sql}
        """.format(hive_table=hive_table, tblproperties=tblproperties, sql=sql)
        self.log.info("Running command:\n %s", hql)
        hive.run_cli(hql)

        m = HiveMetastoreHook(self.metastore_conn_id)

        # Get the Hive table and extract the columns
        t = m.get_table(hive_table)
        columns = [col.name for col in t.sd.cols]

        # Get the path on hdfs
        static_path = m.get_table(hive_table).sd.location

        schema, table = hive_table.split('.')

        druid = DruidHook(druid_ingest_conn_id=self.druid_ingest_conn_id)

        try:
            index_spec = self.construct_ingest_query(
                static_path=static_path,
                columns=columns,
            )

            self.log.info("Inserting rows into Druid, hdfs path: %s", static_path)

            druid.submit_indexing_job(index_spec)

            self.log.info("Load seems to have succeeded!")
        finally:
            self.log.info(
                "Cleaning up by dropping the temp Hive table %s",
                hive_table
            )
            hql = "DROP TABLE IF EXISTS {}".format(hive_table)
            hive.run_cli(hql)
开发者ID:Fokko,项目名称:incubator-airflow,代码行数:53,代码来源:hive_to_druid.py

示例8: test_get_max_partition_from_mal_valid_part_names

 def test_get_max_partition_from_mal_valid_part_names(self):
     max_partition = \
         HiveMetastoreHook._get_max_partition_from_part_names(['some_key=value1',
                                                               'some_key=value2',
                                                               'some_key=value3'],
                                                              'some_key')
     self.assertEqual(max_partition, 'value3')
开发者ID:iansuvak,项目名称:airflow,代码行数:7,代码来源:test_hive_hook.py

示例9: test_get_max_partition_from_valid_part_specs

 def test_get_max_partition_from_valid_part_specs(self):
     max_partition = \
         HiveMetastoreHook._get_max_partition_from_part_specs(
             [{'key1': 'value1', 'key2': 'value2'},
              {'key1': 'value3', 'key2': 'value4'}],
             'key1',
             self.VALID_FILTER_MAP)
     self.assertEqual(max_partition, b'value1')
开发者ID:AdamUnger,项目名称:incubator-airflow,代码行数:8,代码来源:test_hive_hook.py

示例10: test_get_max_partition_from_valid_part_specs_and_none_filter_map

    def test_get_max_partition_from_valid_part_specs_and_none_filter_map(self):
        max_partition = \
            HiveMetastoreHook._get_max_partition_from_part_specs(
                [{'key1': 'value1', 'key2': 'value2'},
                 {'key1': 'value3', 'key2': 'value4'}],
                'key1',
                None)

        # No partition will be filtered out.
        self.assertEqual(max_partition, b'value3')
开发者ID:AdamUnger,项目名称:incubator-airflow,代码行数:10,代码来源:test_hive_hook.py

示例11: poke_partition

    def poke_partition(self, partition):
        if not self.hook:
            from airflow.hooks.hive_hooks import HiveMetastoreHook
            self.hook = HiveMetastoreHook(
                metastore_conn_id=self.metastore_conn_id)

        schema, table, partition = self.parse_partition_name(partition)

        self.log.info('Poking for %s.%s/%s', schema, table, partition)
        return self.hook.check_for_named_partition(
            schema, table, partition)
开发者ID:apache,项目名称:incubator-airflow,代码行数:11,代码来源:named_hive_partition_sensor.py

示例12: poke

 def poke(self, context):
     if '.' in self.table:
         self.schema, self.table = self.table.split('.')
     self.log.info(
         'Poking for table {self.schema}.{self.table}, '
         'partition {self.partition}'.format(**locals()))
     if not hasattr(self, 'hook'):
         from airflow.hooks.hive_hooks import HiveMetastoreHook
         self.hook = HiveMetastoreHook(
             metastore_conn_id=self.metastore_conn_id)
     return self.hook.check_for_partition(
         self.schema, self.table, self.partition)
开发者ID:7digital,项目名称:incubator-airflow,代码行数:12,代码来源:sensors.py

示例13: poke

 def poke(self, context):
     if '.' in self.table:
         self.schema, self.table = self.table.split('.')
     self.log.info(
         'Poking for table %s.%s, partition %s', self.schema, self.table, self.partition
     )
     if not hasattr(self, 'hook'):
         from airflow.hooks.hive_hooks import HiveMetastoreHook
         self.hook = HiveMetastoreHook(
             metastore_conn_id=self.metastore_conn_id)
     return self.hook.check_for_partition(
         self.schema, self.table, self.partition)
开发者ID:apache,项目名称:incubator-airflow,代码行数:12,代码来源:hive_partition_sensor.py

示例14: HivePartitionSensor

class HivePartitionSensor(BaseSensorOperator):
    """
    Waits for a partition to show up in Hive.

    Note: Because ``partition`` supports general logical operators, it
    can be inefficient. Consider using NamedHivePartitionSensor instead if
    you don't need the full flexibility of HivePartitionSensor.

    :param table: The name of the table to wait for, supports the dot
        notation (my_database.my_table)
    :type table: string
    :param partition: The partition clause to wait for. This is passed as
        is to the metastore Thrift client ``get_partitions_by_filter`` method,
        and apparently supports SQL like notation as in ``ds='2015-01-01'
        AND type='value'`` and comparison operators as in ``"ds>=2015-01-01"``
    :type partition: string
    :param metastore_conn_id: reference to the metastore thrift service
        connection id
    :type metastore_conn_id: str
    """
    template_fields = ('schema', 'table', 'partition',)
    ui_color = '#C5CAE9'

    @apply_defaults
    def __init__(
            self,
            table, partition="ds='{{ ds }}'",
            metastore_conn_id='metastore_default',
            schema='default',
            poke_interval=60*3,
            *args, **kwargs):
        super(HivePartitionSensor, self).__init__(
            poke_interval=poke_interval, *args, **kwargs)
        if not partition:
            partition = "ds='{{ ds }}'"
        self.metastore_conn_id = metastore_conn_id
        self.table = table
        self.partition = partition
        self.schema = schema

    def poke(self, context):
        if '.' in self.table:
            self.schema, self.table = self.table.split('.')
        self.log.info(
            'Poking for table {self.schema}.{self.table}, '
            'partition {self.partition}'.format(**locals()))
        if not hasattr(self, 'hook'):
            from airflow.hooks.hive_hooks import HiveMetastoreHook
            self.hook = HiveMetastoreHook(
                metastore_conn_id=self.metastore_conn_id)
        return self.hook.check_for_partition(
            self.schema, self.table, self.partition)
开发者ID:7digital,项目名称:incubator-airflow,代码行数:52,代码来源:sensors.py

示例15: poke_partition

    def poke_partition(self, partition):
        if not self.hook:
            from airflow.hooks.hive_hooks import HiveMetastoreHook
            self.hook = HiveMetastoreHook(
                metastore_conn_id=self.metastore_conn_id)

        schema, table, partition = self.parse_partition_name(partition)

        self.log.info(
            'Poking for {schema}.{table}/{partition}'.format(**locals())
        )
        return self.hook.check_for_named_partition(
            schema, table, partition)
开发者ID:wooga,项目名称:airflow,代码行数:13,代码来源:named_hive_partition_sensor.py


注:本文中的airflow.hooks.hive_hooks.HiveMetastoreHook类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。