当前位置: 首页>>代码示例>>Python>>正文


Python functions.when方法代码示例

本文整理汇总了Python中pyspark.sql.functions.when方法的典型用法代码示例。如果您正苦于以下问题:Python functions.when方法的具体用法?Python functions.when怎么用?Python functions.when使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyspark.sql.functions的用法示例。


在下文中一共展示了functions.when方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: compile_aggregator

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def compile_aggregator(t, expr, scope, fn, context=None, **kwargs):
    op = expr.op()
    src_col = t.translate(op.arg, scope)

    if getattr(op, 'where', None) is not None:
        condition = t.translate(op.where, scope)
        src_col = F.when(condition, src_col)

    col = fn(src_col)
    if context is None:
        # We are trying to compile a expr such as some_col.max()
        # to a Spark expression.
        # Here we get the root table df of that column and compile
        # the expr to:
        # df.select(max(some_col))
        return t.translate(expr.op().arg.op().table, scope).select(col)
    elif context == AggregationContext.WINDOW:
        window = kwargs['window']
        return col.over(window)
    else:
        return col 
开发者ID:ibis-project,项目名称:ibis,代码行数:23,代码来源:compiler.py

示例2: add_protein_fold_type

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def add_protein_fold_type(data, minThreshold, maxThreshold):
    '''
    Adds a column "foldType" with three major secondary structure class:
    "alpha", "beta", "alpha+beta", and "other" based upon the fraction of alpha/beta content.

    The simplified syntax used in this method relies on two imports:
        from pyspark.sql.functions import when
        from pyspark.sql.functions import col

    Attributes:
        data (Dataset<Row>): input dataset with alpha, beta composition
        minThreshold (float): below this threshold, the secondary structure is ignored
        maxThreshold (float): above this threshold, the secondary structure is ignored
    '''

    return data.withColumn("foldType",                            when((col("alpha") > maxThreshold) & (col("beta") < minThreshold), "alpha").                            when((col("beta") > maxThreshold) & (col("alpha") < minThreshold), "beta").                            when((col("alpha") > maxThreshold) & (col("beta") > maxThreshold), "alpha+beta").                            otherwise("other")                           )


# ## Classify chains by secondary structure type

# In[22]: 
开发者ID:sbl-sdsc,项目名称:mmtf-pyspark,代码行数:23,代码来源:ProteinChainClassification.ipynb.py

示例3: add_protein_fold_type

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def add_protein_fold_type(data, minThreshold, maxThreshold):
    '''
    Adds a column "foldType" with three major secondary structure class:
    "alpha", "beta", "alpha+beta", and "other" based upon the fraction of alpha/beta content.

    The simplified syntax used in this method relies on two imports:
        from pyspark.sql.functions import when
        from pyspark.sql.functions import col

    Attributes:
        data (Dataset<Row>): input dataset with alpha, beta composition
        minThreshold (float): below this threshold, the secondary structure is ignored
        maxThreshold (float): above this threshold, the secondary structure is ignored
    '''

    return data.withColumn("foldType",                            when((col("alpha") > maxThreshold) & (col("beta") < minThreshold), "alpha").                            when((col("beta") > maxThreshold) & (col("alpha") < minThreshold), "beta").                            when((col("alpha") > maxThreshold) & (col("beta") > minThreshold), "alpha+beta").                            otherwise("other")                           )


# ## Configure Spark Context

# In[14]: 
开发者ID:sbl-sdsc,项目名称:mmtf-pyspark,代码行数:23,代码来源:ProteinFoldDatasetCreatorDemo.ipynb.py

示例4: booleanize_null

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def booleanize_null(left_scol, scol, f):
    """
    Booleanize Null in Spark Column
    """
    comp_ops = [
        getattr(Column, "__{}__".format(comp_op))
        for comp_op in ["eq", "ne", "lt", "le", "ge", "gt"]
    ]

    if f in comp_ops:
        # if `f` is "!=", fill null with True otherwise False
        filler = f == Column.__ne__
        scol = F.when(scol.isNull(), filler).otherwise(scol)

    elif f == Column.__or__:
        scol = F.when(left_scol.isNull() | scol.isNull(), False).otherwise(scol)

    elif f == Column.__and__:
        scol = F.when(scol.isNull(), False).otherwise(scol)

    return scol 
开发者ID:databricks,项目名称:koalas,代码行数:23,代码来源:base.py

示例5: _is_monotonic_increasing

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def _is_monotonic_increasing(self):
        scol = self.spark.column
        window = Window.orderBy(NATURAL_ORDER_COLUMN_NAME).rowsBetween(-1, -1)
        prev = F.lag(scol, 1).over(window)

        cond = F.lit(True)
        for field in self.spark.data_type[::-1]:
            left = scol.getField(field.name)
            right = prev.getField(field.name)
            compare = MultiIndex._comparator_for_monotonic_increasing(field.dataType)
            cond = F.when(left.eqNullSafe(right), cond).otherwise(
                compare(left, right, spark.Column.__gt__)
            )

        cond = prev.isNull() | cond

        internal = InternalFrame(
            spark_frame=self._internal.spark_frame.select(
                self._internal.index_spark_columns + [cond]
            ),
            index_map=self._internal.index_map,
        )

        return first_series(DataFrame(internal)) 
开发者ID:databricks,项目名称:koalas,代码行数:26,代码来源:indexes.py

示例6: _is_monotonic_decreasing

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def _is_monotonic_decreasing(self):
        scol = self.spark.column
        window = Window.orderBy(NATURAL_ORDER_COLUMN_NAME).rowsBetween(-1, -1)
        prev = F.lag(scol, 1).over(window)

        cond = F.lit(True)
        for field in self.spark.data_type[::-1]:
            left = scol.getField(field.name)
            right = prev.getField(field.name)
            compare = MultiIndex._comparator_for_monotonic_decreasing(field.dataType)
            cond = F.when(left.eqNullSafe(right), cond).otherwise(
                compare(left, right, spark.Column.__lt__)
            )

        cond = prev.isNull() | cond

        internal = InternalFrame(
            spark_frame=self._internal.spark_frame.select(
                self._internal.index_spark_columns + [cond]
            ),
            index_map=self._internal.index_map,
        )

        return first_series(DataFrame(internal)) 
开发者ID:databricks,项目名称:koalas,代码行数:26,代码来源:indexes.py

示例7: __getitem__

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def __getitem__(self, key):
        try:
            if (isinstance(key, slice) and any(type(n) == int for n in [key.start, key.stop])) or (
                type(key) == int
                and not isinstance(self.index.spark.data_type, (IntegerType, LongType))
            ):
                # Seems like pandas Series always uses int as positional search when slicing
                # with ints, searches based on index values when the value is int.
                return self.iloc[key]
            return self.loc[key]
        except SparkPandasIndexingError:
            raise KeyError(
                "Key length ({}) exceeds index depth ({})".format(
                    len(key), len(self._internal.index_map)
                )
            ) 
开发者ID:databricks,项目名称:koalas,代码行数:18,代码来源:series.py

示例8: compile_sign

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def compile_sign(t, expr, scope, **kwargs):
    op = expr.op()

    src_column = t.translate(op.arg, scope)

    return F.when(src_column == 0, F.lit(0.0)).otherwise(
        F.when(src_column > 0, F.lit(1.0)).otherwise(-1.0)
    ) 
开发者ID:ibis-project,项目名称:ibis,代码行数:10,代码来源:compiler.py

示例9: compile_if_null

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def compile_if_null(t, expr, scope, **kwargs):
    op = expr.op()
    col = t.translate(op.arg, scope)
    ifnull_col = t.translate(op.ifnull_expr, scope)
    return F.when(col.isNull(), ifnull_col).otherwise(col) 
开发者ID:ibis-project,项目名称:ibis,代码行数:7,代码来源:compiler.py

示例10: compile_null_if

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def compile_null_if(t, expr, scope, **kwargs):
    op = expr.op()
    col = t.translate(op.arg, scope)
    nullif_col = t.translate(op.null_if_expr, scope)
    return F.when(col == nullif_col, F.lit(None)).otherwise(col) 
开发者ID:ibis-project,项目名称:ibis,代码行数:7,代码来源:compiler.py

示例11: sum

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def sum(self):
        def sum(scol):
            return F.when(
                F.row_number().over(self._unbounded_window) >= self._min_periods,
                F.sum(scol).over(self._window),
            ).otherwise(F.lit(None))

        return self._apply_as_series_or_frame(sum) 
开发者ID:databricks,项目名称:koalas,代码行数:10,代码来源:window.py

示例12: min

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def min(self):
        def min(scol):
            return F.when(
                F.row_number().over(self._unbounded_window) >= self._min_periods,
                F.min(scol).over(self._window),
            ).otherwise(F.lit(None))

        return self._apply_as_series_or_frame(min) 
开发者ID:databricks,项目名称:koalas,代码行数:10,代码来源:window.py

示例13: max

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def max(self):
        def max(scol):
            return F.when(
                F.row_number().over(self._unbounded_window) >= self._min_periods,
                F.max(scol).over(self._window),
            ).otherwise(F.lit(None))

        return self._apply_as_series_or_frame(max) 
开发者ID:databricks,项目名称:koalas,代码行数:10,代码来源:window.py

示例14: mean

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def mean(self):
        def mean(scol):
            return F.when(
                F.row_number().over(self._unbounded_window) >= self._min_periods,
                F.mean(scol).over(self._window),
            ).otherwise(F.lit(None))

        return self._apply_as_series_or_frame(mean) 
开发者ID:databricks,项目名称:koalas,代码行数:10,代码来源:window.py

示例15: std

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def std(self):
        def std(scol):
            return F.when(
                F.row_number().over(self._unbounded_window) >= self._min_periods,
                F.stddev(scol).over(self._window),
            ).otherwise(F.lit(None))

        return self._apply_as_series_or_frame(std) 
开发者ID:databricks,项目名称:koalas,代码行数:10,代码来源:window.py


注:本文中的pyspark.sql.functions.when方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。