本文整理汇总了Python中airflow.hooks.hive_hooks.HiveMetastoreHook.check_for_named_partition方法的典型用法代码示例。如果您正苦于以下问题:Python HiveMetastoreHook.check_for_named_partition方法的具体用法?Python HiveMetastoreHook.check_for_named_partition怎么用?Python HiveMetastoreHook.check_for_named_partition使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类airflow.hooks.hive_hooks.HiveMetastoreHook
的用法示例。
在下文中一共展示了HiveMetastoreHook.check_for_named_partition方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: NamedHivePartitionSensor
# 需要导入模块: from airflow.hooks.hive_hooks import HiveMetastoreHook [as 别名]
# 或者: from airflow.hooks.hive_hooks.HiveMetastoreHook import check_for_named_partition [as 别名]
class NamedHivePartitionSensor(BaseSensorOperator):
"""
Waits for a set of partitions to show up in Hive.
:param partition_names: List of fully qualified names of the
partitions to wait for. A fully qualified name is of the
form ``schema.table/pk1=pv1/pk2=pv2``, for example,
default.users/ds=2016-01-01. This is passed as is to the metastore
Thrift client ``get_partitions_by_name`` method. Note that
you cannot use logical or comparison operators as in
HivePartitionSensor.
:type partition_names: list of strings
:param metastore_conn_id: reference to the metastore thrift service
connection id
:type metastore_conn_id: str
"""
template_fields = ('partition_names', )
ui_color = '#8d99ae'
@apply_defaults
def __init__(
self,
partition_names,
metastore_conn_id='metastore_default',
poke_interval=60 * 3,
*args,
**kwargs):
super(NamedHivePartitionSensor, self).__init__(
poke_interval=poke_interval, *args, **kwargs)
if isinstance(partition_names, basestring):
raise TypeError('partition_names must be an array of strings')
self.metastore_conn_id = metastore_conn_id
self.partition_names = partition_names
self.next_poke_idx = 0
@classmethod
def parse_partition_name(self, partition):
try:
schema, table_partition = partition.split('.', 1)
table, partition = table_partition.split('/', 1)
return schema, table, partition
except ValueError as e:
raise ValueError('Could not parse ' + partition)
def poke(self, context):
if not hasattr(self, 'hook'):
from airflow.hooks.hive_hooks import HiveMetastoreHook
self.hook = HiveMetastoreHook(
metastore_conn_id=self.metastore_conn_id)
def poke_partition(partition):
schema, table, partition = self.parse_partition_name(partition)
self.log.info(
'Poking for {schema}.{table}/{partition}'.format(**locals())
)
return self.hook.check_for_named_partition(
schema, table, partition)
while self.next_poke_idx < len(self.partition_names):
if poke_partition(self.partition_names[self.next_poke_idx]):
self.next_poke_idx += 1
else:
return False
return True
示例2: NamedHivePartitionSensor
# 需要导入模块: from airflow.hooks.hive_hooks import HiveMetastoreHook [as 别名]
# 或者: from airflow.hooks.hive_hooks.HiveMetastoreHook import check_for_named_partition [as 别名]
class NamedHivePartitionSensor(BaseSensorOperator):
"""
Waits for a set of partitions to show up in Hive.
:param partition_names: List of fully qualified names of the
partitions to wait for. A fully qualified name is of the
form ``schema.table/pk1=pv1/pk2=pv2``, for example,
default.users/ds=2016-01-01. This is passed as is to the metastore
Thrift client ``get_partitions_by_name`` method. Note that
you cannot use logical or comparison operators as in
HivePartitionSensor.
:type partition_names: list[str]
:param metastore_conn_id: reference to the metastore thrift service
connection id
:type metastore_conn_id: str
"""
template_fields = ('partition_names',)
ui_color = '#8d99ae'
@apply_defaults
def __init__(self,
partition_names,
metastore_conn_id='metastore_default',
poke_interval=60 * 3,
hook=None,
*args,
**kwargs):
super().__init__(
poke_interval=poke_interval, *args, **kwargs)
if isinstance(partition_names, basestring):
raise TypeError('partition_names must be an array of strings')
self.metastore_conn_id = metastore_conn_id
self.partition_names = partition_names
self.hook = hook
if self.hook and metastore_conn_id != 'metastore_default':
self.log.warning(
'A hook was passed but a non defaul metastore_conn_id=%s was used', metastore_conn_id
)
@staticmethod
def parse_partition_name(partition):
first_split = partition.split('.', 1)
if len(first_split) == 1:
schema = 'default'
table_partition = max(first_split) # poor man first
else:
schema, table_partition = first_split
second_split = table_partition.split('/', 1)
if len(second_split) == 1:
raise ValueError('Could not parse ' + partition +
'into table, partition')
else:
table, partition = second_split
return schema, table, partition
def poke_partition(self, partition):
if not self.hook:
from airflow.hooks.hive_hooks import HiveMetastoreHook
self.hook = HiveMetastoreHook(
metastore_conn_id=self.metastore_conn_id)
schema, table, partition = self.parse_partition_name(partition)
self.log.info('Poking for %s.%s/%s', schema, table, partition)
return self.hook.check_for_named_partition(
schema, table, partition)
def poke(self, context):
self.partition_names = [
partition_name for partition_name in self.partition_names
if not self.poke_partition(partition_name)
]
return not self.partition_names