本文整理汇总了Python中pyspark.sql.functions.expr方法的典型用法代码示例。如果您正苦于以下问题:Python functions.expr方法的具体用法?Python functions.expr怎么用?Python functions.expr使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyspark.sql.functions
的用法示例。
在下文中一共展示了functions.expr方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: compile_aggregator
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_aggregator(t, expr, scope, fn, context=None, **kwargs):
op = expr.op()
src_col = t.translate(op.arg, scope)
if getattr(op, 'where', None) is not None:
condition = t.translate(op.where, scope)
src_col = F.when(condition, src_col)
col = fn(src_col)
if context is None:
# We are trying to compile a expr such as some_col.max()
# to a Spark expression.
# Here we get the root table df of that column and compile
# the expr to:
# df.select(max(some_col))
return t.translate(expr.op().arg.op().table, scope).select(col)
elif context == AggregationContext.WINDOW:
window = kwargs['window']
return col.over(window)
else:
return col
示例2: compile_cast
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_cast(t, expr, scope, **kwargs):
op = expr.op()
if isinstance(op.to, dtypes.Interval):
if isinstance(op.arg.op(), ops.Literal):
return interval(op.arg.op().value, op.to.unit)
else:
raise com.UnsupportedArgumentError(
'Casting to intervals is only supported for literals '
'in the PySpark backend. {} not allowed.'.format(type(op.arg))
)
if isinstance(op.to, dtypes.Array):
cast_type = ibis_array_dtype_to_spark_dtype(op.to)
else:
cast_type = ibis_dtype_to_spark_dtype(op.to)
src_column = t.translate(op.arg, scope)
return src_column.cast(cast_type)
示例3: compile_literal
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_literal(t, expr, scope, raw=False, **kwargs):
""" If raw is True, don't wrap the result with F.lit()
"""
value = expr.op().value
if raw:
return value
if isinstance(value, collections.abc.Set):
# Don't wrap set with F.lit
if isinstance(value, frozenset):
# Spark doens't like frozenset
return set(value)
else:
return value
elif isinstance(value, list):
return F.array(*[F.lit(v) for v in value])
else:
return F.lit(expr.op().value)
示例4: compile_notany
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_notany(t, expr, scope, *, context=None, window=None, **kwargs):
# The code here is a little ugly because the translation are different
# with different context.
# When translating col.notany() (context is None), we returns the dataframe
# so we need to negate the aggregator, i.e., df.select(~F.max(col))
# When traslating col.notany().over(w), we need to negate the result
# after the window translation, i.e., ~(F.max(col).over(w))
if context is None:
def fn(col):
return ~(F.max(col))
return compile_aggregator(t, expr, scope, fn, context, **kwargs)
else:
return ~compile_any(
t, expr, scope, context=context, window=window, **kwargs
)
示例5: compile_join
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_join(t, expr, scope, how):
op = expr.op()
left_df = t.translate(op.left, scope)
right_df = t.translate(op.right, scope)
pred_columns = []
for pred in op.predicates:
pred_op = pred.op()
if not isinstance(pred_op, ops.Equals):
raise NotImplementedError(
"Only equality predicate is supported, but got {}".format(
type(pred_op)
)
)
pred_columns.append(pred_op.left.get_name())
return left_df.join(right_df, pred_columns, how)
示例6: with_norm_query
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def with_norm_query(df: DataFrame) -> DataFrame:
return df.withColumn(
'norm_query',
F.expr('stemmer(query, substring(wikiid, 1, 2))'))
示例7: with_exploded_hits
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def with_exploded_hits(df: DataFrame) -> DataFrame:
return (
df
.select(
F.posexplode('hit_page_ids').alias('hit_position', 'hit_page_id'),
*df.columns)
.drop('hit_page_ids')
.withColumn('clicked', F.expr('array_contains(click_page_ids, hit_page_id)'))
.drop('click_page_ids'))
示例8: translate
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def translate(self, expr, scope, **kwargs):
"""
Translate Ibis expression into a PySpark object.
All translated expressions are cached within scope. If an expression is
found within scope, it's returned. Otherwise, the it's translated and
cached for future reference.
:param expr: ibis expression
:param scope: dictionary mapping from operation to translated result
:param kwargs: parameters passed as keyword args (e.g. window)
:return: translated PySpark DataFrame or Column object
"""
# The operation node type the typed expression wraps
op = expr.op()
if op in scope:
return scope[op]
elif type(op) in self._registry:
formatter = self._registry[type(op)]
result = formatter(self, expr, scope, **kwargs)
scope[op] = result
return result
else:
raise com.OperationNotDefinedError(
'No translation rule for {}'.format(type(op))
)
示例9: compile_datasource
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_datasource(t, expr, scope):
op = expr.op()
name, _, client = op.args
return client._session.table(name)
示例10: compile_sql_query_result
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_sql_query_result(t, expr, scope, **kwargs):
op = expr.op()
query, _, client = op.args
return client._session.sql(query)
示例11: compile_selection
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_selection(t, expr, scope, **kwargs):
op = expr.op()
src_table = t.translate(op.table, scope)
col_in_selection_order = []
for selection in op.selections:
if isinstance(selection, types.TableExpr):
col_in_selection_order.extend(selection.columns)
elif isinstance(selection, (types.ColumnExpr, types.ScalarExpr)):
col = t.translate(selection, scope=scope).alias(
selection.get_name()
)
col_in_selection_order.append(col)
else:
raise NotImplementedError(
f"Unrecoginized type in selections: {type(selection)}"
)
if col_in_selection_order:
src_table = src_table[col_in_selection_order]
for predicate in op.predicates:
col = t.translate(predicate, scope)
src_table = src_table[col]
if op.sort_keys:
sort_cols = [t.translate(key, scope) for key in op.sort_keys]
return src_table.sort(*sort_cols)
else:
return src_table
示例12: compile_sort_key
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_sort_key(t, expr, scope, **kwargs):
op = expr.op()
col = t.translate(op.expr, scope)
if op.ascending:
return col.asc()
else:
return col.desc()
示例13: compile_distinct
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_distinct(t, expr, scope, **kwargs):
op = expr.op()
root_table_expr = op.root_tables()[0].to_expr()
src_table = t.translate(root_table_expr, scope)
src_column_name = op.arg.get_name()
return src_table.select(src_column_name).distinct()[src_column_name]
示例14: compile_self_reference
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_self_reference(t, expr, scope, **kwargs):
op = expr.op()
return t.translate(op.table, scope)
示例15: compile_limit
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_limit(t, expr, scope, **kwargs):
op = expr.op()
if op.offset != 0:
raise com.UnsupportedArgumentError(
'PySpark backend does not support non-zero offset is for '
'limit operation. Got offset {}.'.format(op.offset)
)
df = t.translate(op.table, scope)
return df.limit(op.n)