本文整理汇总了Python中pyspark.sql.functions.min方法的典型用法代码示例。如果您正苦于以下问题:Python functions.min方法的具体用法?Python functions.min怎么用?Python functions.min使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyspark.sql.functions
的用法示例。
在下文中一共展示了functions.min方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: is_multi_agg_with_relabel
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import min [as 别名]
def is_multi_agg_with_relabel(**kwargs):
"""
Check whether the kwargs pass to .agg look like multi-agg with relabling.
Parameters
----------
**kwargs : dict
Returns
-------
bool
Examples
--------
>>> is_multi_agg_with_relabel(a='max')
False
>>> is_multi_agg_with_relabel(a_max=('a', 'max'),
... a_min=('a', 'min'))
True
>>> is_multi_agg_with_relabel()
False
"""
if not kwargs:
return False
return all(isinstance(v, tuple) and len(v) == 2 for v in kwargs.values())
示例2: get_sampled
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import min [as 别名]
def get_sampled(self, data):
from databricks.koalas import DataFrame, Series
fraction = get_option("plotting.sample_ratio")
if fraction is None:
fraction = 1 / (len(data) / get_option("plotting.max_rows"))
fraction = min(1.0, fraction)
self.fraction = fraction
if isinstance(data, (DataFrame, Series)):
if isinstance(data, Series):
data = data.to_frame()
sampled = data._internal.resolved_copy.spark_frame.sample(fraction=self.fraction)
return DataFrame(data._internal.with_new_sdf(sampled)).to_pandas()
else:
raise ValueError("Only DataFrame and Series are supported for plotting.")
示例3: stats
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import min [as 别名]
def stats(self, columns):
"""Compute the stats for each column provided in columns.
Parameters
----------
columns : list of str, contains all columns to compute stats on.
"""
assert (not isinstance(columns, basestring)), "columns should be a " \
"list of strs, " \
"not a str!"
assert isinstance(columns, list), "columns should be a list!"
from pyspark.sql import functions as F
functions = [F.min, F.max, F.avg, F.count]
aggs = list(
self._flatmap(lambda column: map(lambda f: f(column), functions),
columns))
return PStats(self.from_schema_rdd(self._schema_rdd.agg(*aggs)))
示例4: min
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import min [as 别名]
def min(self):
"""Compute the min for each group."""
if self._can_use_new_school():
self._prep_spark_sql_groupby()
import pyspark.sql.functions as func
return self._use_aggregation(func.min)
self._prep_pandas_groupby()
myargs = self._myargs
mykwargs = self._mykwargs
def create_combiner(x):
return x.groupby(*myargs, **mykwargs).min()
def merge_value(x, y):
return x.append(create_combiner(y)).min()
def merge_combiner(x, y):
return x.append(y).min(level=0)
rddOfMin = self._sortIfNeeded(self._distributedRDD.combineByKey(
create_combiner,
merge_value,
merge_combiner)).values()
return DataFrame.fromDataFrameRDD(rddOfMin, self.sql_ctx)
示例5: compile_all
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import min [as 别名]
def compile_all(t, expr, scope, context=None, **kwargs):
return compile_aggregator(t, expr, scope, F.min, context, **kwargs)
示例6: compile_notall
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import min [as 别名]
def compile_notall(t, expr, scope, *, context=None, window=None, **kwargs):
# See comments for opts.NotAny for reasoning for the if/else
if context is None:
def fn(col):
return ~(F.min(col))
return compile_aggregator(t, expr, scope, fn, context, **kwargs)
else:
return ~compile_all(
t, expr, scope, context=context, window=window, **kwargs
)
示例7: compile_min
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import min [as 别名]
def compile_min(t, expr, scope, context=None, **kwargs):
return compile_aggregator(t, expr, scope, F.min, context, **kwargs)
示例8: min
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import min [as 别名]
def min(self):
def min(scol):
return F.when(
F.row_number().over(self._unbounded_window) >= self._min_periods,
F.min(scol).over(self._window),
).otherwise(F.lit(None))
return self._apply_as_series_or_frame(min)
示例9: min
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import min [as 别名]
def min(self):
"""
Return the minimum value of the Index.
Returns
-------
scalar
Minimum value.
See Also
--------
Index.max : Return the maximum value of the object.
Series.min : Return the minimum value in a Series.
DataFrame.min : Return the minimum values in a DataFrame.
Examples
--------
>>> idx = ks.Index([3, 2, 1])
>>> idx.min()
1
>>> idx = ks.Index(['c', 'b', 'a'])
>>> idx.min()
'a'
For a MultiIndex, the maximum is determined lexicographically.
>>> idx = ks.MultiIndex.from_tuples([('a', 'x', 1), ('b', 'y', 2)])
>>> idx.min()
('a', 'x', 1)
"""
sdf = self._internal.spark_frame
min_row = sdf.select(F.min(F.struct(self._internal.index_spark_columns))).head()
result = tuple(min_row[0])
return result if len(result) > 1 else result[0]
示例10: max
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import min [as 别名]
def max(self):
"""
Return the maximum value of the Index.
Returns
-------
scalar
Maximum value.
See Also
--------
Index.min : Return the minimum value in an Index.
Series.max : Return the maximum value in a Series.
DataFrame.max : Return the maximum values in a DataFrame.
Examples
--------
>>> idx = pd.Index([3, 2, 1])
>>> idx.max()
3
>>> idx = pd.Index(['c', 'b', 'a'])
>>> idx.max()
'c'
For a MultiIndex, the maximum is determined lexicographically.
>>> idx = ks.MultiIndex.from_tuples([('a', 'x', 1), ('b', 'y', 2)])
>>> idx.max()
('b', 'y', 2)
"""
sdf = self._internal.spark_frame
max_row = sdf.select(F.max(F.struct(self._internal.index_spark_columns))).head()
result = tuple(max_row[0])
return result if len(result) > 1 else result[0]
示例11: min
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import min [as 别名]
def min(self):
"""
Compute min of group values.
See Also
--------
databricks.koalas.Series.groupby
databricks.koalas.DataFrame.groupby
"""
return self._reduce_for_stat_function(F.min, only_numeric=False)
# TODO: sync the doc and implement `ddof`.
示例12: _calc_whiskers
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import min [as 别名]
def _calc_whiskers(colname, outliers):
# Computes min and max values of non-outliers - the whiskers
minmax = (
outliers.filter("not __{}_outlier".format(colname))
.agg(F.min(colname).alias("min"), F.max(colname).alias("max"))
.toPandas()
)
return minmax.iloc[0][["min", "max"]].values
示例13: _get_bins
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import min [as 别名]
def _get_bins(sdf, bins):
# 'data' is a Spark DataFrame that selects all columns.
if len(sdf.columns) > 1:
min_col = F.least(*map(F.min, sdf))
max_col = F.greatest(*map(F.max, sdf))
else:
min_col = F.min(sdf.columns[-1])
max_col = F.max(sdf.columns[-1])
boundaries = sdf.select(min_col, max_col).first()
# divides the boundaries into bins
if boundaries[0] == boundaries[1]:
boundaries = (boundaries[0] - 0.5, boundaries[1] + 0.5)
return np.linspace(boundaries[0], boundaries[1], bins + 1)
示例14: _reduce_for_stat_function
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import min [as 别名]
def _reduce_for_stat_function(self, sfun, name, axis=None, numeric_only=None):
"""
Applies sfun to the column and returns a scalar
Parameters
----------
sfun : the stats function to be used for aggregation
name : original pandas API name.
axis : used only for sanity check because series only support index axis.
numeric_only : not used by this implementation, but passed down by stats functions
"""
from inspect import signature
axis = validate_axis(axis)
if axis == 1:
raise ValueError("Series does not support columns axis.")
num_args = len(signature(sfun).parameters)
scol = self.spark.column
spark_type = self.spark.data_type
if isinstance(spark_type, BooleanType) and sfun.__name__ not in ("min", "max"):
# Stat functions cannot be used with boolean values by default
# Thus, cast to integer (true to 1 and false to 0)
# Exclude the min and max methods though since those work with booleans
scol = scol.cast("integer")
if num_args == 1:
# Only pass in the column if sfun accepts only one arg
scol = sfun(scol)
else: # must be 2
assert num_args == 2
# Pass in both the column and its data type if sfun accepts two args
scol = sfun(scol, spark_type)
return unpack_scalar(self._internal.spark_frame.select(scol))
示例15: min
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import min [as 别名]
def min(self):
return self.from_spark_rdd(self._schema_rdd.min(), self.sql_ctx)