Python functions.max方法代码示例

本文整理汇总了Python中pyspark.sql.functions.max方法的典型用法代码示例。如果您正苦于以下问题：Python functions.max方法的具体用法？Python functions.max怎么用？Python functions.max使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyspark.sql.functions的用法示例。

在下文中一共展示了functions.max方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: compile_aggregator

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import max [as 别名]
def compile_aggregator(t, expr, scope, fn, context=None, **kwargs):
    op = expr.op()
    src_col = t.translate(op.arg, scope)

    if getattr(op, 'where', None) is not None:
        condition = t.translate(op.where, scope)
        src_col = F.when(condition, src_col)

    col = fn(src_col)
    if context is None:
        # We are trying to compile a expr such as some_col.max()
        # to a Spark expression.
        # Here we get the root table df of that column and compile
        # the expr to:
        # df.select(max(some_col))
        return t.translate(expr.op().arg.op().table, scope).select(col)
    elif context == AggregationContext.WINDOW:
        window = kwargs['window']
        return col.over(window)
    else:
        return col

开发者ID:ibis-project，项目名称:ibis，代码行数:23，代码来源:compiler.py

示例2: compile_notany

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import max [as 别名]
def compile_notany(t, expr, scope, *, context=None, window=None, **kwargs):
    # The code here is a little ugly because the translation are different
    # with different context.
    # When translating col.notany() (context is None), we returns the dataframe
    # so we need to negate the aggregator, i.e., df.select(~F.max(col))
    # When traslating col.notany().over(w), we need to negate the result
    # after the window translation, i.e., ~(F.max(col).over(w))

    if context is None:

        def fn(col):
            return ~(F.max(col))

        return compile_aggregator(t, expr, scope, fn, context, **kwargs)
    else:
        return ~compile_any(
            t, expr, scope, context=context, window=window, **kwargs
        )

开发者ID:ibis-project，项目名称:ibis，代码行数:20，代码来源:compiler.py

示例3: get_latest_dataframe_id

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import max [as 别名]
def get_latest_dataframe_id(dataframe_metadata_df):
    """ Get dataframe id of dataframe on which model has been trained.

        Args:
            dataframe_metadata_df (dataframe): Refer to listenbrainz_spark.schema.dataframe_metadata_schema

        Returns:
            dataframe id
    """
    # get timestamp of recently saved dataframe.
    timestamp = dataframe_metadata_df.select(func.max('dataframe_created').alias('recent_dataframe_timestamp')).take(1)[0]
    # get dataframe id corresponding to most recent timestamp.
    df = dataframe_metadata_df.select('dataframe_id') \
                              .where(func.col('dataframe_created') == timestamp.recent_dataframe_timestamp).take(1)[0]

    return df.dataframe_id

开发者ID:metabrainz，项目名称:listenbrainz-server，代码行数:18，代码来源:train_models.py

示例4: get_most_recent_model_id

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import max [as 别名]
def get_most_recent_model_id():
    """ Get model id of recently created model.

        Returns:
            model_id (str): Model identification string.
    """
    try:
        model_metadata = utils.read_files_from_HDFS(path.MODEL_METADATA)
    except PathNotFoundException as err:
        current_app.logger.error(str(err), exc_info=True)
        sys.exit(-1)
    except FileNotFetchedException as err:
        current_app.logger.error(str(err), exc_info=True)
        sys.exit(-1)

    latest_ts = model_metadata.select(func.max('model_created').alias('model_created')).take(1)[0].model_created
    model_id = model_metadata.select('model_id') \
                             .where(col('model_created') == latest_ts).take(1)[0].model_id

    return model_id

开发者ID:metabrainz，项目名称:listenbrainz-server，代码行数:22，代码来源:recommend.py

示例5: get_dates_to_generate_candidate_sets

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import max [as 别名]
def get_dates_to_generate_candidate_sets(mapped_df, recommendation_generation_window):
    """ Get window to fetch listens to generate candidate sets.

        Args:
            mapped_df (dataframe): listens mapped with msid_mbid_mapping. Refer to candidate_sets.py
                                   for dataframe columns.
            recommendation_generation_window (int): recommendations to be generated on history of given number of days.

        Returns:
            from_date (datetime): Date from which start fetching listens.
            to_date (datetime): Date upto which fetch listens.
    """
    # get timestamp of latest listen in HDFS
    to_date = mapped_df.select(func.max('listened_at').alias('listened_at')).collect()[0].listened_at
    from_date = stats.adjust_days(to_date, recommendation_generation_window).replace(hour=0, minute=0, second=0)
    return from_date, to_date

开发者ID:metabrainz，项目名称:listenbrainz-server，代码行数:18，代码来源:candidate_sets.py

示例6: is_multi_agg_with_relabel

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import max [as 别名]
def is_multi_agg_with_relabel(**kwargs):
    """
    Check whether the kwargs pass to .agg look like multi-agg with relabling.

    Parameters
    ----------
    **kwargs : dict

    Returns
    -------
    bool

    Examples
    --------
    >>> is_multi_agg_with_relabel(a='max')
    False
    >>> is_multi_agg_with_relabel(a_max=('a', 'max'),
    ...                            a_min=('a', 'min'))
    True
    >>> is_multi_agg_with_relabel()
    False
    """
    if not kwargs:
        return False
    return all(isinstance(v, tuple) and len(v) == 2 for v in kwargs.values())

开发者ID:databricks，项目名称:koalas，代码行数:27，代码来源:groupby.py

示例7: stats

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import max [as 别名]
def stats(self, columns):
        """Compute the stats for each column provided in columns.
        Parameters
        ----------
        columns : list of str, contains all columns to compute stats on.
        """
        assert (not isinstance(columns, basestring)), "columns should be a " \
                                                      "list of strs,  " \
                                                      "not a str!"
        assert isinstance(columns, list), "columns should be a list!"

        from pyspark.sql import functions as F
        functions = [F.min, F.max, F.avg, F.count]
        aggs = list(
            self._flatmap(lambda column: map(lambda f: f(column), functions),
                          columns))
        return PStats(self.from_schema_rdd(self._schema_rdd.agg(*aggs)))

开发者ID:sparklingpandas，项目名称:sparklingpandas，代码行数:19，代码来源:dataframe.py

示例8: max

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import max [as 别名]
def max(self):
        """Compute the max for each group."""
        if self._can_use_new_school():
            self._prep_spark_sql_groupby()
            import pyspark.sql.functions as func
            return self._use_aggregation(func.max)
        self._prep_pandas_groupby()
        myargs = self._myargs
        mykwargs = self._mykwargs

        def create_combiner(x):
            return x.groupby(*myargs, **mykwargs).max()

        def merge_value(x, y):
            return x.append(create_combiner(y)).max()

        def merge_combiner(x, y):
            return x.append(y).max(level=0)

        rddOfMax = self._sortIfNeeded(self._distributedRDD.combineByKey(
            create_combiner,
            merge_value,
            merge_combiner)).values()
        return DataFrame.fromDataFrameRDD(rddOfMax, self.sql_ctx)

开发者ID:sparklingpandas，项目名称:sparklingpandas，代码行数:26，代码来源:groupby.py

示例9: to_pandas

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import max [as 别名]
def to_pandas(self, kind='hist'):
        """Returns a pandas dataframe from the Histogram object.

        This function calculates the Histogram function in Spark if it was not done yet.

        Args:
            :kind: (:obj:`str`, optional):
                'hist' or 'density'. When using hist this returns the histogram object
                as pandas dataframe. When using density the index contains the bin centers, and the values in the
                DataFrame are the scaled values. Defaults to 'hist'

        Returns:
            A pandas DataFrame from the Histogram object.
        """
        self.build()
        if kind == 'hist':
            return pd.DataFrame(self.hist_dict).set_index([self._get_col_names()])
        elif kind == 'density':
            result = pd.DataFrame(self.hist_dict).set_index([self._get_bin_centers()])
            return result.apply(lambda x: x / x.max(), axis=0)

开发者ID:Bergvca，项目名称:pyspark_dist_explore，代码行数:22，代码来源:pyspark_dist_explore.py

示例10: compile_any

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import max [as 别名]
def compile_any(t, expr, scope, context=None, **kwargs):
    return compile_aggregator(t, expr, scope, F.max, context, **kwargs)

开发者ID:ibis-project，项目名称:ibis，代码行数:4，代码来源:compiler.py

示例11: compile_max

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import max [as 别名]
def compile_max(t, expr, scope, context=None, **kwargs):
    return compile_aggregator(t, expr, scope, F.max, context, **kwargs)

开发者ID:ibis-project，项目名称:ibis，代码行数:4，代码来源:compiler.py

示例12: test_aggregation

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import max [as 别名]
def test_aggregation(client):
    import pyspark.sql.functions as F

    table = client.table('basic_table')
    result = table.aggregate(table['id'].max()).compile()
    expected = table.compile().agg(F.max('id').alias('max'))

    tm.assert_frame_equal(result.toPandas(), expected.toPandas())

开发者ID:ibis-project，项目名称:ibis，代码行数:10，代码来源:test_basic.py

示例13: make_daily_temperature_highs

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import max [as 别名]
def make_daily_temperature_highs(_, weather_samples: DataFrame) -> DataFrame:
    '''Computes the temperature high for each day'''
    valid_date = f.to_date(weather_samples['valid']).alias('valid_date')
    return weather_samples.groupBy(valid_date).agg(f.max('tmpf').alias('max_tmpf'))

开发者ID:dagster-io，项目名称:dagster，代码行数:6，代码来源:solids.py

示例14: max

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import max [as 别名]
def max(self):
        def max(scol):
            return F.when(
                F.row_number().over(self._unbounded_window) >= self._min_periods,
                F.max(scol).over(self._window),
            ).otherwise(F.lit(None))

        return self._apply_as_series_or_frame(max)

开发者ID:databricks，项目名称:koalas，代码行数:10，代码来源:window.py

示例15: min

# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import max [as 别名]
def min(self):
        """
        Return the minimum value of the Index.

        Returns
        -------
        scalar
            Minimum value.

        See Also
        --------
        Index.max : Return the maximum value of the object.
        Series.min : Return the minimum value in a Series.
        DataFrame.min : Return the minimum values in a DataFrame.

        Examples
        --------
        >>> idx = ks.Index([3, 2, 1])
        >>> idx.min()
        1

        >>> idx = ks.Index(['c', 'b', 'a'])
        >>> idx.min()
        'a'

        For a MultiIndex, the maximum is determined lexicographically.

        >>> idx = ks.MultiIndex.from_tuples([('a', 'x', 1), ('b', 'y', 2)])
        >>> idx.min()
        ('a', 'x', 1)
        """
        sdf = self._internal.spark_frame
        min_row = sdf.select(F.min(F.struct(self._internal.index_spark_columns))).head()
        result = tuple(min_row[0])

        return result if len(result) > 1 else result[0]

开发者ID:databricks，项目名称:koalas，代码行数:38，代码来源:indexes.py

注：本文中的pyspark.sql.functions.max方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。