Python functions.when方法代码示例

本文整理汇总了Python中pyspark.sql.functions.when方法的典型用法代码示例。如果您正苦于以下问题：Python functions.when方法的具体用法？Python functions.when怎么用？Python functions.when使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyspark.sql.functions的用法示例。

在下文中一共展示了functions.when方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: compile_aggregator

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def compile_aggregator(t, expr, scope, fn, context=None, **kwargs):
    op = expr.op()
    src_col = t.translate(op.arg, scope)

    if getattr(op, 'where', None) is not None:
        condition = t.translate(op.where, scope)
        src_col = F.when(condition, src_col)

    col = fn(src_col)
    if context is None:
        # We are trying to compile a expr such as some_col.max()
        # to a Spark expression.
        # Here we get the root table df of that column and compile
        # the expr to:
        # df.select(max(some_col))
        return t.translate(expr.op().arg.op().table, scope).select(col)
    elif context == AggregationContext.WINDOW:
        window = kwargs['window']
        return col.over(window)
    else:
        return col

开发者ID:ibis-project，项目名称:ibis，代码行数:23，代码来源:compiler.py

示例2: add_protein_fold_type

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def add_protein_fold_type(data, minThreshold, maxThreshold):
    '''
    Adds a column "foldType" with three major secondary structure class:
    "alpha", "beta", "alpha+beta", and "other" based upon the fraction of alpha/beta content.

    The simplified syntax used in this method relies on two imports:
        from pyspark.sql.functions import when
        from pyspark.sql.functions import col

    Attributes:
        data (Dataset<Row>): input dataset with alpha, beta composition
        minThreshold (float): below this threshold, the secondary structure is ignored
        maxThreshold (float): above this threshold, the secondary structure is ignored
    '''

    return data.withColumn("foldType",                            when((col("alpha") > maxThreshold) & (col("beta") < minThreshold), "alpha").                            when((col("beta") > maxThreshold) & (col("alpha") < minThreshold), "beta").                            when((col("alpha") > maxThreshold) & (col("beta") > maxThreshold), "alpha+beta").                            otherwise("other")                           )


# ## Classify chains by secondary structure type

# In[22]:

开发者ID:sbl-sdsc，项目名称:mmtf-pyspark，代码行数:23，代码来源:ProteinChainClassification.ipynb.py

示例3: add_protein_fold_type

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def add_protein_fold_type(data, minThreshold, maxThreshold):
    '''
    Adds a column "foldType" with three major secondary structure class:
    "alpha", "beta", "alpha+beta", and "other" based upon the fraction of alpha/beta content.

    The simplified syntax used in this method relies on two imports:
        from pyspark.sql.functions import when
        from pyspark.sql.functions import col

    Attributes:
        data (Dataset<Row>): input dataset with alpha, beta composition
        minThreshold (float): below this threshold, the secondary structure is ignored
        maxThreshold (float): above this threshold, the secondary structure is ignored
    '''

    return data.withColumn("foldType",                            when((col("alpha") > maxThreshold) & (col("beta") < minThreshold), "alpha").                            when((col("beta") > maxThreshold) & (col("alpha") < minThreshold), "beta").                            when((col("alpha") > maxThreshold) & (col("beta") > minThreshold), "alpha+beta").                            otherwise("other")                           )


# ## Configure Spark Context

# In[14]:

开发者ID:sbl-sdsc，项目名称:mmtf-pyspark，代码行数:23，代码来源:ProteinFoldDatasetCreatorDemo.ipynb.py

示例4: booleanize_null

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def booleanize_null(left_scol, scol, f):
    """
    Booleanize Null in Spark Column
    """
    comp_ops = [
        getattr(Column, "__{}__".format(comp_op))
        for comp_op in ["eq", "ne", "lt", "le", "ge", "gt"]
    ]

    if f in comp_ops:
        # if `f` is "!=", fill null with True otherwise False
        filler = f == Column.__ne__
        scol = F.when(scol.isNull(), filler).otherwise(scol)

    elif f == Column.__or__:
        scol = F.when(left_scol.isNull() | scol.isNull(), False).otherwise(scol)

    elif f == Column.__and__:
        scol = F.when(scol.isNull(), False).otherwise(scol)

    return scol

开发者ID:databricks，项目名称:koalas，代码行数:23，代码来源:base.py

示例5: _is_monotonic_increasing

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def _is_monotonic_increasing(self):
        scol = self.spark.column
        window = Window.orderBy(NATURAL_ORDER_COLUMN_NAME).rowsBetween(-1, -1)
        prev = F.lag(scol, 1).over(window)

        cond = F.lit(True)
        for field in self.spark.data_type[::-1]:
            left = scol.getField(field.name)
            right = prev.getField(field.name)
            compare = MultiIndex._comparator_for_monotonic_increasing(field.dataType)
            cond = F.when(left.eqNullSafe(right), cond).otherwise(
                compare(left, right, spark.Column.__gt__)
            )

        cond = prev.isNull() | cond

        internal = InternalFrame(
            spark_frame=self._internal.spark_frame.select(
                self._internal.index_spark_columns + [cond]
            ),
            index_map=self._internal.index_map,
        )

        return first_series(DataFrame(internal))

开发者ID:databricks，项目名称:koalas，代码行数:26，代码来源:indexes.py

示例6: _is_monotonic_decreasing

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def _is_monotonic_decreasing(self):
        scol = self.spark.column
        window = Window.orderBy(NATURAL_ORDER_COLUMN_NAME).rowsBetween(-1, -1)
        prev = F.lag(scol, 1).over(window)

        cond = F.lit(True)
        for field in self.spark.data_type[::-1]:
            left = scol.getField(field.name)
            right = prev.getField(field.name)
            compare = MultiIndex._comparator_for_monotonic_decreasing(field.dataType)
            cond = F.when(left.eqNullSafe(right), cond).otherwise(
                compare(left, right, spark.Column.__lt__)
            )

        cond = prev.isNull() | cond

        internal = InternalFrame(
            spark_frame=self._internal.spark_frame.select(
                self._internal.index_spark_columns + [cond]
            ),
            index_map=self._internal.index_map,
        )

        return first_series(DataFrame(internal))

开发者ID:databricks，项目名称:koalas，代码行数:26，代码来源:indexes.py

示例7: getitem

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def __getitem__(self, key):
        try:
            if (isinstance(key, slice) and any(type(n) == int for n in [key.start, key.stop])) or (
                type(key) == int
                and not isinstance(self.index.spark.data_type, (IntegerType, LongType))
            ):
                # Seems like pandas Series always uses int as positional search when slicing
                # with ints, searches based on index values when the value is int.
                return self.iloc[key]
            return self.loc[key]
        except SparkPandasIndexingError:
            raise KeyError(
                "Key length ({}) exceeds index depth ({})".format(
                    len(key), len(self._internal.index_map)
                )
            )

开发者ID:databricks，项目名称:koalas，代码行数:18，代码来源:series.py

示例8: compile_sign

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def compile_sign(t, expr, scope, **kwargs):
    op = expr.op()

    src_column = t.translate(op.arg, scope)

    return F.when(src_column == 0, F.lit(0.0)).otherwise(
        F.when(src_column > 0, F.lit(1.0)).otherwise(-1.0)
    )

开发者ID:ibis-project，项目名称:ibis，代码行数:10，代码来源:compiler.py

示例9: compile_if_null

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def compile_if_null(t, expr, scope, **kwargs):
    op = expr.op()
    col = t.translate(op.arg, scope)
    ifnull_col = t.translate(op.ifnull_expr, scope)
    return F.when(col.isNull(), ifnull_col).otherwise(col)

开发者ID:ibis-project，项目名称:ibis，代码行数:7，代码来源:compiler.py

示例10: compile_null_if

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def compile_null_if(t, expr, scope, **kwargs):
    op = expr.op()
    col = t.translate(op.arg, scope)
    nullif_col = t.translate(op.null_if_expr, scope)
    return F.when(col == nullif_col, F.lit(None)).otherwise(col)

开发者ID:ibis-project，项目名称:ibis，代码行数:7，代码来源:compiler.py