本文整理汇总了Python中pydruid.utils.filters.Filter.append方法的典型用法代码示例。如果您正苦于以下问题:Python Filter.append方法的具体用法?Python Filter.append怎么用?Python Filter.append使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pydruid.utils.filters.Filter
的用法示例。
在下文中一共展示了Filter.append方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: query
# 需要导入模块: from pydruid.utils.filters import Filter [as 别名]
# 或者: from pydruid.utils.filters.Filter import append [as 别名]
def query( # druid
self, groupby, metrics,
granularity,
from_dttm, to_dttm,
filter=None, # noqa
is_timeseries=True,
timeseries_limit=None,
row_limit=None,
inner_from_dttm=None, inner_to_dttm=None,
extras=None, # noqa
select=None,): # noqa
"""Runs a query against Druid and returns a dataframe.
This query interface is common to SqlAlchemy and Druid
"""
# TODO refactor into using a TBD Query object
qry_start_dttm = datetime.now()
inner_from_dttm = inner_from_dttm or from_dttm
inner_to_dttm = inner_to_dttm or to_dttm
# add tzinfo to native datetime with config
from_dttm = from_dttm.replace(tzinfo=config.get("DRUID_TZ"))
to_dttm = to_dttm.replace(tzinfo=config.get("DRUID_TZ"))
query_str = ""
metrics_dict = {m.metric_name: m for m in self.metrics}
all_metrics = []
post_aggs = {}
for metric_name in metrics:
metric = metrics_dict[metric_name]
if metric.metric_type != 'postagg':
all_metrics.append(metric_name)
else:
conf = metric.json_obj
fields = conf.get('fields', [])
all_metrics += [
f.get('fieldName') for f in fields
if f.get('type') == 'fieldAccess']
all_metrics += conf.get('fieldNames', [])
if conf.get('type') == 'javascript':
post_aggs[metric_name] = JavascriptPostAggregator(
name=conf.get('name'),
field_names=conf.get('fieldNames'),
function=conf.get('function'))
else:
post_aggs[metric_name] = Postaggregator(
conf.get('fn', "/"),
conf.get('fields', []),
conf.get('name', ''))
aggregations = {
m.metric_name: m.json_obj
for m in self.metrics
if m.metric_name in all_metrics
}
granularity = granularity or "all"
if granularity != "all":
granularity = utils.parse_human_timedelta(
granularity).total_seconds() * 1000
if not isinstance(granularity, string_types):
granularity = {"type": "duration", "duration": granularity}
origin = extras.get('druid_time_origin')
if origin:
dttm = utils.parse_human_datetime(origin)
granularity['origin'] = dttm.isoformat()
qry = dict(
datasource=self.datasource_name,
dimensions=groupby,
aggregations=aggregations,
granularity=granularity,
post_aggregations=post_aggs,
intervals=from_dttm.isoformat() + '/' + to_dttm.isoformat(),
)
filters = None
for col, op, eq in filter:
cond = None
if op == '==':
cond = Dimension(col) == eq
elif op == '!=':
cond = ~(Dimension(col) == eq)
elif op in ('in', 'not in'):
fields = []
splitted = eq.split(',')
if len(splitted) > 1:
for s in eq.split(','):
s = s.strip()
fields.append(Filter.build_filter(Dimension(col) == s))
cond = Filter(type="or", fields=fields)
else:
cond = Dimension(col) == eq
if op == 'not in':
cond = ~cond
if filters:
filters = Filter(type="and", fields=[
Filter.build_filter(cond),
Filter.build_filter(filters)
])
else:
filters = cond
#.........这里部分代码省略.........
示例2: query
# 需要导入模块: from pydruid.utils.filters import Filter [as 别名]
# 或者: from pydruid.utils.filters.Filter import append [as 别名]
def query(
self, groupby, metrics,
granularity,
from_dttm, to_dttm,
limit_spec=None,
filter=None,
is_timeseries=True,
timeseries_limit=None,
row_limit=None,
inner_from_dttm=None, inner_to_dttm=None,
extras=None):
qry_start_dttm = datetime.now()
inner_from_dttm = inner_from_dttm or from_dttm
inner_to_dttm = inner_to_dttm or to_dttm
# add tzinfo to native datetime with config
from_dttm = from_dttm.replace(tzinfo=config.get("DRUID_TZ"))
to_dttm = to_dttm.replace(tzinfo=config.get("DRUID_TZ"))
query_str = ""
aggregations = {
m.metric_name: m.json_obj
for m in self.metrics if m.metric_name in metrics
}
if granularity != "all":
granularity = utils.parse_human_timedelta(
granularity).total_seconds() * 1000
if not isinstance(granularity, basestring):
granularity = {"type": "duration", "duration": granularity}
qry = dict(
datasource=self.datasource_name,
dimensions=groupby,
aggregations=aggregations,
granularity=granularity,
intervals=from_dttm.isoformat() + '/' + to_dttm.isoformat(),
)
filters = None
for col, op, eq in filter:
cond = None
if op == '==':
cond = Dimension(col) == eq
elif op == '!=':
cond = ~(Dimension(col) == eq)
elif op in ('in', 'not in'):
fields = []
splitted = eq.split(',')
if len(splitted) > 1:
for s in eq.split(','):
s = s.strip()
fields.append(Filter.build_filter(Dimension(col) == s))
cond = Filter(type="or", fields=fields)
else:
cond = Dimension(col) == eq
if op == 'not in':
cond = ~cond
if filters:
filters = Filter(type="and", fields=[
Filter.build_filter(cond),
Filter.build_filter(filters)
])
else:
filters = cond
if filters:
qry['filter'] = filters
client = self.cluster.get_pydruid_client()
orig_filters = filters
if timeseries_limit and is_timeseries:
# Limit on the number of timeseries, doing a two-phases query
pre_qry = deepcopy(qry)
pre_qry['granularity'] = "all"
pre_qry['limit_spec'] = {
"type": "default",
"limit": timeseries_limit,
'intervals': inner_from_dttm.isoformat() + '/' + inner_to_dttm.isoformat(),
"columns": [{
"dimension": metrics[0] if metrics else self.metrics[0],
"direction": "descending",
}],
}
client.groupby(**pre_qry)
query_str += "// Two phase query\n// Phase 1\n"
query_str += json.dumps(client.query_dict, indent=2) + "\n"
query_str += "//\nPhase 2 (built based on phase one's results)\n"
df = client.export_pandas()
if df is not None and not df.empty:
dims = qry['dimensions']
filters = []
for index, row in df.iterrows():
fields = []
for dim in dims:
f = Filter.build_filter(Dimension(dim) == row[dim])
fields.append(f)
if len(fields) > 1:
filt = Filter(type="and", fields=fields)
filters.append(Filter.build_filter(filt))
elif fields:
#.........这里部分代码省略.........