当前位置: 首页>>代码示例>>Python>>正文


Python HiveCliHook.load_file方法代码示例

本文整理汇总了Python中airflow.hooks.hive_hooks.HiveCliHook.load_file方法的典型用法代码示例。如果您正苦于以下问题:Python HiveCliHook.load_file方法的具体用法?Python HiveCliHook.load_file怎么用?Python HiveCliHook.load_file使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在airflow.hooks.hive_hooks.HiveCliHook的用法示例。


在下文中一共展示了HiveCliHook.load_file方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: execute

# 需要导入模块: from airflow.hooks.hive_hooks import HiveCliHook [as 别名]
# 或者: from airflow.hooks.hive_hooks.HiveCliHook import load_file [as 别名]
    def execute(self, context):
        hive = HiveCliHook(hive_cli_conn_id=self.hive_cli_conn_id)
        mssql = MsSqlHook(mssql_conn_id=self.mssql_conn_id)

        logging.info("Dumping Microsoft SQL Server query results to local file")
        conn = mssql.get_conn()
        cursor = conn.cursor()
        cursor.execute(self.sql)
        with NamedTemporaryFile("w") as f:
            csv_writer = csv.writer(f, delimiter=self.delimiter, encoding='utf-8')
            field_dict = OrderedDict()
            col_count = 0
            for field in cursor.description:
                col_count += 1
                col_position = "Column{position}".format(position=col_count)
                field_dict[col_position if field[0] == '' else field[0]] = self.type_map(field[1])
            csv_writer.writerows(cursor)
            f.flush()
            cursor.close()
            conn.close()
            logging.info("Loading file into Hive")
            hive.load_file(
                f.name,
                self.hive_table,
                field_dict=field_dict,
                create=self.create,
                partition=self.partition,
                delimiter=self.delimiter,
                recreate=self.recreate)
开发者ID:bioteam,项目名称:incubator-airflow,代码行数:31,代码来源:mssql_to_hive.py

示例2: execute

# 需要导入模块: from airflow.hooks.hive_hooks import HiveCliHook [as 别名]
# 或者: from airflow.hooks.hive_hooks.HiveCliHook import load_file [as 别名]
    def execute(self, context):
        hive = HiveCliHook(hive_cli_conn_id=self.hive_cli_conn_id)
        mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id)

        self.log.info("Dumping MySQL query results to local file")
        conn = mysql.get_conn()
        cursor = conn.cursor()
        cursor.execute(self.sql)
        with NamedTemporaryFile("wb") as f:
            csv_writer = csv.writer(f, delimiter=self.delimiter, encoding="utf-8")
            field_dict = OrderedDict()
            for field in cursor.description:
                field_dict[field[0]] = self.type_map(field[1])
            csv_writer.writerows(cursor)
            f.flush()
            cursor.close()
            conn.close()
            self.log.info("Loading file into Hive")
            hive.load_file(
                f.name,
                self.hive_table,
                field_dict=field_dict,
                create=self.create,
                partition=self.partition,
                delimiter=self.delimiter,
                recreate=self.recreate,
                tblproperties=self.tblproperties)
开发者ID:7digital,项目名称:incubator-airflow,代码行数:29,代码来源:mysql_to_hive.py

示例3: test_load_file

# 需要导入模块: from airflow.hooks.hive_hooks import HiveCliHook [as 别名]
# 或者: from airflow.hooks.hive_hooks.HiveCliHook import load_file [as 别名]
    def test_load_file(self, mock_run_cli):
        filepath = "/path/to/input/file"
        table = "output_table"

        hook = HiveCliHook()
        hook.load_file(filepath=filepath, table=table, create=False)

        query = (
            "LOAD DATA LOCAL INPATH '{filepath}' "
            "OVERWRITE INTO TABLE {table} \n"
            .format(filepath=filepath, table=table)
        )
        mock_run_cli.assert_called_with(query)
开发者ID:AdamUnger,项目名称:incubator-airflow,代码行数:15,代码来源:test_hive_hook.py

示例4: S3ToHiveTransfer

# 需要导入模块: from airflow.hooks.hive_hooks import HiveCliHook [as 别名]
# 或者: from airflow.hooks.hive_hooks.HiveCliHook import load_file [as 别名]

#.........这里部分代码省略.........
        self.hive_cli_conn_id = hive_cli_conn_id
        self.aws_conn_id = aws_conn_id
        self.input_compressed = input_compressed
        self.tblproperties = tblproperties

        if (self.check_headers and
                not (self.field_dict is not None and self.headers)):
            raise AirflowException("To check_headers provide " +
                                   "field_dict and headers")

    def execute(self, context):
        # Downloading file from S3
        self.s3 = S3Hook(aws_conn_id=self.aws_conn_id)
        self.hive = HiveCliHook(hive_cli_conn_id=self.hive_cli_conn_id)
        self.log.info("Downloading S3 file")

        if self.wildcard_match:
            if not self.s3.check_for_wildcard_key(self.s3_key):
                raise AirflowException("No key matches {0}"
                                       .format(self.s3_key))
            s3_key_object = self.s3.get_wildcard_key(self.s3_key)
        else:
            if not self.s3.check_for_key(self.s3_key):
                raise AirflowException(
                    "The key {0} does not exists".format(self.s3_key))
            s3_key_object = self.s3.get_key(self.s3_key)
        root, file_ext = os.path.splitext(s3_key_object.key)
        with TemporaryDirectory(prefix='tmps32hive_') as tmp_dir,\
                NamedTemporaryFile(mode="wb",
                                   dir=tmp_dir,
                                   suffix=file_ext) as f:
            self.log.info("Dumping S3 key {0} contents to local file {1}"
                          .format(s3_key_object.key, f.name))
            s3_key_object.download_fileobj(f)
            f.flush()
            if not self.headers:
                self.log.info("Loading file %s into Hive", f.name)
                self.hive.load_file(
                    f.name,
                    self.hive_table,
                    field_dict=self.field_dict,
                    create=self.create,
                    partition=self.partition,
                    delimiter=self.delimiter,
                    recreate=self.recreate,
                    tblproperties=self.tblproperties)
            else:
                # Decompressing file
                if self.input_compressed:
                    self.log.info("Uncompressing file %s", f.name)
                    fn_uncompressed = uncompress_file(f.name,
                                                      file_ext,
                                                      tmp_dir)
                    self.log.info("Uncompressed to %s", fn_uncompressed)
                    # uncompressed file available now so deleting
                    # compressed file to save disk space
                    f.close()
                else:
                    fn_uncompressed = f.name

                # Testing if header matches field_dict
                if self.check_headers:
                    self.log.info("Matching file header against field_dict")
                    header_list = self._get_top_row_as_list(fn_uncompressed)
                    if not self._match_headers(header_list):
                        raise AirflowException("Header check failed")
开发者ID:7digital,项目名称:incubator-airflow,代码行数:70,代码来源:s3_to_hive_operator.py

示例5: S3ToHiveTransfer

# 需要导入模块: from airflow.hooks.hive_hooks import HiveCliHook [as 别名]
# 或者: from airflow.hooks.hive_hooks.HiveCliHook import load_file [as 别名]

#.........这里部分代码省略.........
    :param s3_conn_id: source s3 connection
    :type s3_conn_id: str
    :param hive_conn_id: destination hive connection
    :type hive_conn_id: str
    """

    template_fields = ('s3_key', 'partition', 'hive_table')
    template_ext = ()
    ui_color = '#a0e08c'

    @apply_defaults
    def __init__(
            self,
            s3_key,
            field_dict,
            hive_table,
            delimiter=',',
            create=True,
            recreate=False,
            partition=None,
            headers=False,
            check_headers=False,
            wildcard_match=False,
            s3_conn_id='s3_default',
            hive_cli_conn_id='hive_cli_default',
            *args, **kwargs):
        super(S3ToHiveTransfer, self).__init__(*args, **kwargs)
        self.s3_key = s3_key
        self.field_dict = field_dict
        self.hive_table = hive_table
        self.delimiter = delimiter
        self.create = create
        self.recreate = recreate
        self.partition = partition
        self.headers = headers
        self.check_headers = check_headers
        self.wildcard_match = wildcard_match
        self.hive_cli_conn_id = hive_cli_conn_id
        self.s3_conn_id = s3_conn_id

    def execute(self, context):
        self.hive = HiveCliHook(hive_cli_conn_id=self.hive_cli_conn_id)
        self.s3 = S3Hook(s3_conn_id=self.s3_conn_id)
        logging.info("Downloading S3 file")
        if self.wildcard_match:
            if not self.s3.check_for_wildcard_key(self.s3_key):
                raise AirflowException("No key matches {0}".format(self.s3_key))
            s3_key_object = self.s3.get_wildcard_key(self.s3_key)
        else:
            if not self.s3.check_for_key(self.s3_key):
                raise AirflowException(
                    "The key {0} does not exists".format(self.s3_key))
            s3_key_object = self.s3.get_key(self.s3_key)
        with NamedTemporaryFile("w") as f:
            logging.info("Dumping S3 key {0} contents to local"
                         " file {1}".format(s3_key_object.key, f.name))
            s3_key_object.get_contents_to_file(f)
            f.flush()
            self.s3.connection.close()
            if not self.headers:
                logging.info("Loading file into Hive")
                self.hive.load_file(
                    f.name,
                    self.hive_table,
                    field_dict=self.field_dict,
                    create=self.create,
                    partition=self.partition,
                    delimiter=self.delimiter,
                    recreate=self.recreate)
            else:
                with open(f.name, 'r') as tmpf:
                    if self.check_headers:
                        header_l = tmpf.readline()
                        header_line = header_l.rstrip()
                        header_list = header_line.split(self.delimiter)
                        field_names = list(self.field_dict.keys())
                        test_field_match = [h1.lower() == h2.lower() for h1, h2
                                            in zip(header_list, field_names)]
                        if not all(test_field_match):
                            logging.warning("Headers do not match field names"
                                            "File headers:\n {header_list}\n"
                                            "Field names: \n {field_names}\n"
                                            "".format(**locals()))
                            raise AirflowException("Headers do not match the "
                                            "field_dict keys")
                    with NamedTemporaryFile("w") as f_no_headers:
                        tmpf.seek(0)
                        next(tmpf)
                        for line in tmpf:
                            f_no_headers.write(line)
                        f_no_headers.flush()
                        logging.info("Loading file without headers into Hive")
                        self.hive.load_file(
                            f_no_headers.name,
                            self.hive_table,
                            field_dict=self.field_dict,
                            create=self.create,
                            partition=self.partition,
                            delimiter=self.delimiter,
                            recreate=self.recreate)
开发者ID:AndreiDev,项目名称:incubator-airflow,代码行数:104,代码来源:s3_to_hive_operator.py


注:本文中的airflow.hooks.hive_hooks.HiveCliHook.load_file方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。