当前位置: 首页>>代码示例>>Python>>正文


Python functions.expr方法代码示例

本文整理汇总了Python中pyspark.sql.functions.expr方法的典型用法代码示例。如果您正苦于以下问题:Python functions.expr方法的具体用法?Python functions.expr怎么用?Python functions.expr使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyspark.sql.functions的用法示例。


在下文中一共展示了functions.expr方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: compile_aggregator

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_aggregator(t, expr, scope, fn, context=None, **kwargs):
    op = expr.op()
    src_col = t.translate(op.arg, scope)

    if getattr(op, 'where', None) is not None:
        condition = t.translate(op.where, scope)
        src_col = F.when(condition, src_col)

    col = fn(src_col)
    if context is None:
        # We are trying to compile a expr such as some_col.max()
        # to a Spark expression.
        # Here we get the root table df of that column and compile
        # the expr to:
        # df.select(max(some_col))
        return t.translate(expr.op().arg.op().table, scope).select(col)
    elif context == AggregationContext.WINDOW:
        window = kwargs['window']
        return col.over(window)
    else:
        return col 
开发者ID:ibis-project,项目名称:ibis,代码行数:23,代码来源:compiler.py

示例2: compile_cast

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_cast(t, expr, scope, **kwargs):
    op = expr.op()

    if isinstance(op.to, dtypes.Interval):
        if isinstance(op.arg.op(), ops.Literal):
            return interval(op.arg.op().value, op.to.unit)
        else:
            raise com.UnsupportedArgumentError(
                'Casting to intervals is only supported for literals '
                'in the PySpark backend. {} not allowed.'.format(type(op.arg))
            )

    if isinstance(op.to, dtypes.Array):
        cast_type = ibis_array_dtype_to_spark_dtype(op.to)
    else:
        cast_type = ibis_dtype_to_spark_dtype(op.to)

    src_column = t.translate(op.arg, scope)
    return src_column.cast(cast_type) 
开发者ID:ibis-project,项目名称:ibis,代码行数:21,代码来源:compiler.py

示例3: compile_literal

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_literal(t, expr, scope, raw=False, **kwargs):
    """ If raw is True, don't wrap the result with F.lit()
    """
    value = expr.op().value

    if raw:
        return value

    if isinstance(value, collections.abc.Set):
        # Don't wrap set with F.lit
        if isinstance(value, frozenset):
            # Spark doens't like frozenset
            return set(value)
        else:
            return value
    elif isinstance(value, list):
        return F.array(*[F.lit(v) for v in value])
    else:
        return F.lit(expr.op().value) 
开发者ID:ibis-project,项目名称:ibis,代码行数:21,代码来源:compiler.py

示例4: compile_notany

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_notany(t, expr, scope, *, context=None, window=None, **kwargs):
    # The code here is a little ugly because the translation are different
    # with different context.
    # When translating col.notany() (context is None), we returns the dataframe
    # so we need to negate the aggregator, i.e., df.select(~F.max(col))
    # When traslating col.notany().over(w), we need to negate the result
    # after the window translation, i.e., ~(F.max(col).over(w))

    if context is None:

        def fn(col):
            return ~(F.max(col))

        return compile_aggregator(t, expr, scope, fn, context, **kwargs)
    else:
        return ~compile_any(
            t, expr, scope, context=context, window=window, **kwargs
        ) 
开发者ID:ibis-project,项目名称:ibis,代码行数:20,代码来源:compiler.py

示例5: compile_join

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_join(t, expr, scope, how):
    op = expr.op()

    left_df = t.translate(op.left, scope)
    right_df = t.translate(op.right, scope)

    pred_columns = []
    for pred in op.predicates:
        pred_op = pred.op()
        if not isinstance(pred_op, ops.Equals):
            raise NotImplementedError(
                "Only equality predicate is supported, but got {}".format(
                    type(pred_op)
                )
            )
        pred_columns.append(pred_op.left.get_name())

    return left_df.join(right_df, pred_columns, how) 
开发者ID:ibis-project,项目名称:ibis,代码行数:20,代码来源:compiler.py

示例6: with_norm_query

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def with_norm_query(df: DataFrame) -> DataFrame:
    return df.withColumn(
        'norm_query',
        F.expr('stemmer(query, substring(wikiid, 1, 2))')) 
开发者ID:wikimedia,项目名称:search-MjoLniR,代码行数:6,代码来源:norm_query_clustering.py

示例7: with_exploded_hits

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def with_exploded_hits(df: DataFrame) -> DataFrame:
    return (
        df
        .select(
            F.posexplode('hit_page_ids').alias('hit_position', 'hit_page_id'),
            *df.columns)
        .drop('hit_page_ids')
        .withColumn('clicked', F.expr('array_contains(click_page_ids, hit_page_id)'))
        .drop('click_page_ids')) 
开发者ID:wikimedia,项目名称:search-MjoLniR,代码行数:11,代码来源:dbn.py

示例8: translate

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def translate(self, expr, scope, **kwargs):
        """
        Translate Ibis expression into a PySpark object.

        All translated expressions are cached within scope. If an expression is
        found within scope, it's returned. Otherwise, the it's translated and
        cached for future reference.

        :param expr: ibis expression
        :param scope: dictionary mapping from operation to translated result
        :param kwargs: parameters passed as keyword args (e.g. window)
        :return: translated PySpark DataFrame or Column object
        """
        # The operation node type the typed expression wraps
        op = expr.op()

        if op in scope:
            return scope[op]
        elif type(op) in self._registry:
            formatter = self._registry[type(op)]
            result = formatter(self, expr, scope, **kwargs)
            scope[op] = result
            return result
        else:
            raise com.OperationNotDefinedError(
                'No translation rule for {}'.format(type(op))
            ) 
开发者ID:ibis-project,项目名称:ibis,代码行数:29,代码来源:compiler.py

示例9: compile_datasource

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_datasource(t, expr, scope):
    op = expr.op()
    name, _, client = op.args
    return client._session.table(name) 
开发者ID:ibis-project,项目名称:ibis,代码行数:6,代码来源:compiler.py

示例10: compile_sql_query_result

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_sql_query_result(t, expr, scope, **kwargs):
    op = expr.op()
    query, _, client = op.args
    return client._session.sql(query) 
开发者ID:ibis-project,项目名称:ibis,代码行数:6,代码来源:compiler.py

示例11: compile_selection

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_selection(t, expr, scope, **kwargs):
    op = expr.op()

    src_table = t.translate(op.table, scope)

    col_in_selection_order = []
    for selection in op.selections:
        if isinstance(selection, types.TableExpr):
            col_in_selection_order.extend(selection.columns)
        elif isinstance(selection, (types.ColumnExpr, types.ScalarExpr)):
            col = t.translate(selection, scope=scope).alias(
                selection.get_name()
            )
            col_in_selection_order.append(col)
        else:
            raise NotImplementedError(
                f"Unrecoginized type in selections: {type(selection)}"
            )

    if col_in_selection_order:
        src_table = src_table[col_in_selection_order]

    for predicate in op.predicates:
        col = t.translate(predicate, scope)
        src_table = src_table[col]

    if op.sort_keys:
        sort_cols = [t.translate(key, scope) for key in op.sort_keys]

        return src_table.sort(*sort_cols)
    else:
        return src_table 
开发者ID:ibis-project,项目名称:ibis,代码行数:34,代码来源:compiler.py

示例12: compile_sort_key

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_sort_key(t, expr, scope, **kwargs):
    op = expr.op()
    col = t.translate(op.expr, scope)

    if op.ascending:
        return col.asc()
    else:
        return col.desc() 
开发者ID:ibis-project,项目名称:ibis,代码行数:10,代码来源:compiler.py

示例13: compile_distinct

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_distinct(t, expr, scope, **kwargs):
    op = expr.op()
    root_table_expr = op.root_tables()[0].to_expr()
    src_table = t.translate(root_table_expr, scope)
    src_column_name = op.arg.get_name()
    return src_table.select(src_column_name).distinct()[src_column_name] 
开发者ID:ibis-project,项目名称:ibis,代码行数:8,代码来源:compiler.py

示例14: compile_self_reference

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_self_reference(t, expr, scope, **kwargs):
    op = expr.op()
    return t.translate(op.table, scope) 
开发者ID:ibis-project,项目名称:ibis,代码行数:5,代码来源:compiler.py

示例15: compile_limit

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import expr [as 别名]
def compile_limit(t, expr, scope, **kwargs):
    op = expr.op()
    if op.offset != 0:
        raise com.UnsupportedArgumentError(
            'PySpark backend does not support non-zero offset is for '
            'limit operation. Got offset {}.'.format(op.offset)
        )
    df = t.translate(op.table, scope)
    return df.limit(op.n) 
开发者ID:ibis-project,项目名称:ibis,代码行数:11,代码来源:compiler.py


注:本文中的pyspark.sql.functions.expr方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。