本文整理汇总了Python中pyspark.sql.functions.when方法的典型用法代码示例。如果您正苦于以下问题:Python functions.when方法的具体用法?Python functions.when怎么用?Python functions.when使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyspark.sql.functions
的用法示例。
在下文中一共展示了functions.when方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: compile_aggregator
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def compile_aggregator(t, expr, scope, fn, context=None, **kwargs):
op = expr.op()
src_col = t.translate(op.arg, scope)
if getattr(op, 'where', None) is not None:
condition = t.translate(op.where, scope)
src_col = F.when(condition, src_col)
col = fn(src_col)
if context is None:
# We are trying to compile a expr such as some_col.max()
# to a Spark expression.
# Here we get the root table df of that column and compile
# the expr to:
# df.select(max(some_col))
return t.translate(expr.op().arg.op().table, scope).select(col)
elif context == AggregationContext.WINDOW:
window = kwargs['window']
return col.over(window)
else:
return col
示例2: add_protein_fold_type
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def add_protein_fold_type(data, minThreshold, maxThreshold):
'''
Adds a column "foldType" with three major secondary structure class:
"alpha", "beta", "alpha+beta", and "other" based upon the fraction of alpha/beta content.
The simplified syntax used in this method relies on two imports:
from pyspark.sql.functions import when
from pyspark.sql.functions import col
Attributes:
data (Dataset<Row>): input dataset with alpha, beta composition
minThreshold (float): below this threshold, the secondary structure is ignored
maxThreshold (float): above this threshold, the secondary structure is ignored
'''
return data.withColumn("foldType", when((col("alpha") > maxThreshold) & (col("beta") < minThreshold), "alpha"). when((col("beta") > maxThreshold) & (col("alpha") < minThreshold), "beta"). when((col("alpha") > maxThreshold) & (col("beta") > maxThreshold), "alpha+beta"). otherwise("other") )
# ## Classify chains by secondary structure type
# In[22]:
示例3: add_protein_fold_type
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def add_protein_fold_type(data, minThreshold, maxThreshold):
'''
Adds a column "foldType" with three major secondary structure class:
"alpha", "beta", "alpha+beta", and "other" based upon the fraction of alpha/beta content.
The simplified syntax used in this method relies on two imports:
from pyspark.sql.functions import when
from pyspark.sql.functions import col
Attributes:
data (Dataset<Row>): input dataset with alpha, beta composition
minThreshold (float): below this threshold, the secondary structure is ignored
maxThreshold (float): above this threshold, the secondary structure is ignored
'''
return data.withColumn("foldType", when((col("alpha") > maxThreshold) & (col("beta") < minThreshold), "alpha"). when((col("beta") > maxThreshold) & (col("alpha") < minThreshold), "beta"). when((col("alpha") > maxThreshold) & (col("beta") > minThreshold), "alpha+beta"). otherwise("other") )
# ## Configure Spark Context
# In[14]:
示例4: booleanize_null
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def booleanize_null(left_scol, scol, f):
"""
Booleanize Null in Spark Column
"""
comp_ops = [
getattr(Column, "__{}__".format(comp_op))
for comp_op in ["eq", "ne", "lt", "le", "ge", "gt"]
]
if f in comp_ops:
# if `f` is "!=", fill null with True otherwise False
filler = f == Column.__ne__
scol = F.when(scol.isNull(), filler).otherwise(scol)
elif f == Column.__or__:
scol = F.when(left_scol.isNull() | scol.isNull(), False).otherwise(scol)
elif f == Column.__and__:
scol = F.when(scol.isNull(), False).otherwise(scol)
return scol
示例5: _is_monotonic_increasing
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def _is_monotonic_increasing(self):
scol = self.spark.column
window = Window.orderBy(NATURAL_ORDER_COLUMN_NAME).rowsBetween(-1, -1)
prev = F.lag(scol, 1).over(window)
cond = F.lit(True)
for field in self.spark.data_type[::-1]:
left = scol.getField(field.name)
right = prev.getField(field.name)
compare = MultiIndex._comparator_for_monotonic_increasing(field.dataType)
cond = F.when(left.eqNullSafe(right), cond).otherwise(
compare(left, right, spark.Column.__gt__)
)
cond = prev.isNull() | cond
internal = InternalFrame(
spark_frame=self._internal.spark_frame.select(
self._internal.index_spark_columns + [cond]
),
index_map=self._internal.index_map,
)
return first_series(DataFrame(internal))
示例6: _is_monotonic_decreasing
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def _is_monotonic_decreasing(self):
scol = self.spark.column
window = Window.orderBy(NATURAL_ORDER_COLUMN_NAME).rowsBetween(-1, -1)
prev = F.lag(scol, 1).over(window)
cond = F.lit(True)
for field in self.spark.data_type[::-1]:
left = scol.getField(field.name)
right = prev.getField(field.name)
compare = MultiIndex._comparator_for_monotonic_decreasing(field.dataType)
cond = F.when(left.eqNullSafe(right), cond).otherwise(
compare(left, right, spark.Column.__lt__)
)
cond = prev.isNull() | cond
internal = InternalFrame(
spark_frame=self._internal.spark_frame.select(
self._internal.index_spark_columns + [cond]
),
index_map=self._internal.index_map,
)
return first_series(DataFrame(internal))
示例7: __getitem__
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def __getitem__(self, key):
try:
if (isinstance(key, slice) and any(type(n) == int for n in [key.start, key.stop])) or (
type(key) == int
and not isinstance(self.index.spark.data_type, (IntegerType, LongType))
):
# Seems like pandas Series always uses int as positional search when slicing
# with ints, searches based on index values when the value is int.
return self.iloc[key]
return self.loc[key]
except SparkPandasIndexingError:
raise KeyError(
"Key length ({}) exceeds index depth ({})".format(
len(key), len(self._internal.index_map)
)
)
示例8: compile_sign
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def compile_sign(t, expr, scope, **kwargs):
op = expr.op()
src_column = t.translate(op.arg, scope)
return F.when(src_column == 0, F.lit(0.0)).otherwise(
F.when(src_column > 0, F.lit(1.0)).otherwise(-1.0)
)
示例9: compile_if_null
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def compile_if_null(t, expr, scope, **kwargs):
op = expr.op()
col = t.translate(op.arg, scope)
ifnull_col = t.translate(op.ifnull_expr, scope)
return F.when(col.isNull(), ifnull_col).otherwise(col)
示例10: compile_null_if
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def compile_null_if(t, expr, scope, **kwargs):
op = expr.op()
col = t.translate(op.arg, scope)
nullif_col = t.translate(op.null_if_expr, scope)
return F.when(col == nullif_col, F.lit(None)).otherwise(col)
示例11: sum
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def sum(self):
def sum(scol):
return F.when(
F.row_number().over(self._unbounded_window) >= self._min_periods,
F.sum(scol).over(self._window),
).otherwise(F.lit(None))
return self._apply_as_series_or_frame(sum)
示例12: min
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def min(self):
def min(scol):
return F.when(
F.row_number().over(self._unbounded_window) >= self._min_periods,
F.min(scol).over(self._window),
).otherwise(F.lit(None))
return self._apply_as_series_or_frame(min)
示例13: max
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def max(self):
def max(scol):
return F.when(
F.row_number().over(self._unbounded_window) >= self._min_periods,
F.max(scol).over(self._window),
).otherwise(F.lit(None))
return self._apply_as_series_or_frame(max)
示例14: mean
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def mean(self):
def mean(scol):
return F.when(
F.row_number().over(self._unbounded_window) >= self._min_periods,
F.mean(scol).over(self._window),
).otherwise(F.lit(None))
return self._apply_as_series_or_frame(mean)
示例15: std
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import when [as 别名]
def std(self):
def std(scol):
return F.when(
F.row_number().over(self._unbounded_window) >= self._min_periods,
F.stddev(scol).over(self._window),
).otherwise(F.lit(None))
return self._apply_as_series_or_frame(std)