Python functions.max方法代碼示例

本文整理匯總了Python中pyspark.sql.functions.max方法的典型用法代碼示例。如果您正苦於以下問題：Python functions.max方法的具體用法？Python functions.max怎麽用？Python functions.max使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pyspark.sql.functions的用法示例。

在下文中一共展示了functions.max方法的15個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: compile_aggregator

# 需要導入模塊: from pyspark.sql import functions [as 別名]
# 或者: from pyspark.sql.functions import max [as 別名]
def compile_aggregator(t, expr, scope, fn, context=None, **kwargs):
    op = expr.op()
    src_col = t.translate(op.arg, scope)

    if getattr(op, 'where', None) is not None:
        condition = t.translate(op.where, scope)
        src_col = F.when(condition, src_col)

    col = fn(src_col)
    if context is None:
        # We are trying to compile a expr such as some_col.max()
        # to a Spark expression.
        # Here we get the root table df of that column and compile
        # the expr to:
        # df.select(max(some_col))
        return t.translate(expr.op().arg.op().table, scope).select(col)
    elif context == AggregationContext.WINDOW:
        window = kwargs['window']
        return col.over(window)
    else:
        return col

開發者ID:ibis-project，項目名稱:ibis，代碼行數:23，代碼來源:compiler.py

示例2: compile_notany

# 需要導入模塊: from pyspark.sql import functions [as 別名]
# 或者: from pyspark.sql.functions import max [as 別名]
def compile_notany(t, expr, scope, *, context=None, window=None, **kwargs):
    # The code here is a little ugly because the translation are different
    # with different context.
    # When translating col.notany() (context is None), we returns the dataframe
    # so we need to negate the aggregator, i.e., df.select(~F.max(col))
    # When traslating col.notany().over(w), we need to negate the result
    # after the window translation, i.e., ~(F.max(col).over(w))

    if context is None:

        def fn(col):
            return ~(F.max(col))

        return compile_aggregator(t, expr, scope, fn, context, **kwargs)
    else:
        return ~compile_any(
            t, expr, scope, context=context, window=window, **kwargs
        )

開發者ID:ibis-project，項目名稱:ibis，代碼行數:20，代碼來源:compiler.py

示例3: get_latest_dataframe_id

# 需要導入模塊: from pyspark.sql import functions [as 別名]
# 或者: from pyspark.sql.functions import max [as 別名]
def get_latest_dataframe_id(dataframe_metadata_df):
    """ Get dataframe id of dataframe on which model has been trained.

        Args:
            dataframe_metadata_df (dataframe): Refer to listenbrainz_spark.schema.dataframe_metadata_schema

        Returns:
            dataframe id
    """
    # get timestamp of recently saved dataframe.
    timestamp = dataframe_metadata_df.select(func.max('dataframe_created').alias('recent_dataframe_timestamp')).take(1)[0]
    # get dataframe id corresponding to most recent timestamp.
    df = dataframe_metadata_df.select('dataframe_id') \
                              .where(func.col('dataframe_created') == timestamp.recent_dataframe_timestamp).take(1)[0]

    return df.dataframe_id

開發者ID:metabrainz，項目名稱:listenbrainz-server，代碼行數:18，代碼來源:train_models.py

示例4: get_most_recent_model_id

# 需要導入模塊: from pyspark.sql import functions [as 別名]
# 或者: from pyspark.sql.functions import max [as 別名]
def get_most_recent_model_id():
    """ Get model id of recently created model.

        Returns:
            model_id (str): Model identification string.
    """
    try:
        model_metadata = utils.read_files_from_HDFS(path.MODEL_METADATA)
    except PathNotFoundException as err:
        current_app.logger.error(str(err), exc_info=True)
        sys.exit(-1)
    except FileNotFetchedException as err:
        current_app.logger.error(str(err), exc_info=True)
        sys.exit(-1)

    latest_ts = model_metadata.select(func.max('model_created').alias('model_created')).take(1)[0].model_created
    model_id = model_metadata.select('model_id') \
                             .where(col('model_created') == latest_ts).take(1)[0].model_id

    return model_id

開發者ID:metabrainz，項目名稱:listenbrainz-server，代碼行數:22，代碼來源:recommend.py

示例5: get_dates_to_generate_candidate_sets

# 需要導入模塊: from pyspark.sql import functions [as 別名]
# 或者: from pyspark.sql.functions import max [as 別名]
def get_dates_to_generate_candidate_sets(mapped_df, recommendation_generation_window):
    """ Get window to fetch listens to generate candidate sets.

        Args:
            mapped_df (dataframe): listens mapped with msid_mbid_mapping. Refer to candidate_sets.py
                                   for dataframe columns.
            recommendation_generation_window (int): recommendations to be generated on history of given number of days.

        Returns:
            from_date (datetime): Date from which start fetching listens.
            to_date (datetime): Date upto which fetch listens.
    """
    # get timestamp of latest listen in HDFS
    to_date = mapped_df.select(func.max('listened_at').alias('listened_at')).collect()[0].listened_at
    from_date = stats.adjust_days(to_date, recommendation_generation_window).replace(hour=0, minute=0, second=0)
    return from_date, to_date

開發者ID:metabrainz，項目名稱:listenbrainz-server，代碼行數:18，代碼來源:candidate_sets.py

示例6: is_multi_agg_with_relabel

# 需要導入模塊: from pyspark.sql import functions [as 別名]
# 或者: from pyspark.sql.functions import max [as 別名]
def is_multi_agg_with_relabel(**kwargs):
    """
    Check whether the kwargs pass to .agg look like multi-agg with relabling.

    Parameters
    ----------
    **kwargs : dict

    Returns
    -------
    bool

    Examples
    --------
    >>> is_multi_agg_with_relabel(a='max')
    False
    >>> is_multi_agg_with_relabel(a_max=('a', 'max'),
    ...                            a_min=('a', 'min'))
    True
    >>> is_multi_agg_with_relabel()
    False
    """
    if not kwargs:
        return False
    return all(isinstance(v, tuple) and len(v) == 2 for v in kwargs.values())

開發者ID:databricks，項目名稱:koalas，代碼行數:27，代碼來源:groupby.py

示例7: stats

# 需要導入模塊: from pyspark.sql import functions [as 別名]
# 或者: from pyspark.sql.functions import max [as 別名]
def stats(self, columns):
        """Compute the stats for each column provided in columns.
        Parameters
        ----------
        columns : list of str, contains all columns to compute stats on.
        """
        assert (not isinstance(columns, basestring)), "columns should be a " \
                                                      "list of strs,  " \
                                                      "not a str!"
        assert isinstance(columns, list), "columns should be a list!"

        from pyspark.sql import functions as F
        functions = [F.min, F.max, F.avg, F.count]
        aggs = list(
            self._flatmap(lambda column: map(lambda f: f(column), functions),
                          columns))
        return PStats(self.from_schema_rdd(self._schema_rdd.agg(*aggs)))

開發者ID:sparklingpandas，項目名稱:sparklingpandas，代碼行數:19，代碼來源:dataframe.py

示例8: max

# 需要導入模塊: from pyspark.sql import functions [as 別名]
# 或者: from pyspark.sql.functions import max [as 別名]
def max(self):
        """Compute the max for each group."""
        if self._can_use_new_school():
            self._prep_spark_sql_groupby()
            import pyspark.sql.functions as func
            return self._use_aggregation(func.max)
        self._prep_pandas_groupby()
        myargs = self._myargs
        mykwargs = self._mykwargs

        def create_combiner(x):
            return x.groupby(*myargs, **mykwargs).max()

        def merge_value(x, y):
            return x.append(create_combiner(y)).max()

        def merge_combiner(x, y):
            return x.append(y).max(level=0)

        rddOfMax = self._sortIfNeeded(self._distributedRDD.combineByKey(
            create_combiner,
            merge_value,
            merge_combiner)).values()
        return DataFrame.fromDataFrameRDD(rddOfMax, self.sql_ctx)

開發者ID:sparklingpandas，項目名稱:sparklingpandas，代碼行數:26，代碼來源:groupby.py

示例9: to_pandas

# 需要導入模塊: from pyspark.sql import functions [as 別名]
# 或者: from pyspark.sql.functions import max [as 別名]
def to_pandas(self, kind='hist'):
        """Returns a pandas dataframe from the Histogram object.

        This function calculates the Histogram function in Spark if it was not done yet.

        Args:
            :kind: (:obj:`str`, optional):
                'hist' or 'density'. When using hist this returns the histogram object
                as pandas dataframe. When using density the index contains the bin centers, and the values in the
                DataFrame are the scaled values. Defaults to 'hist'

        Returns:
            A pandas DataFrame from the Histogram object.
        """
        self.build()
        if kind == 'hist':
            return pd.DataFrame(self.hist_dict).set_index([self._get_col_names()])
        elif kind == 'density':
            result = pd.DataFrame(self.hist_dict).set_index([self._get_bin_centers()])
            return result.apply(lambda x: x / x.max(), axis=0)

開發者ID:Bergvca，項目名稱:pyspark_dist_explore，代碼行數:22，代碼來源:pyspark_dist_explore.py

示例10: compile_any

# 需要導入模塊: from pyspark.sql import functions [as 別名]
# 或者: from pyspark.sql.functions import max [as 別名]
def compile_any(t, expr, scope, context=None, **kwargs):
    return compile_aggregator(t, expr, scope, F.max, context, **kwargs)

開發者ID:ibis-project，項目名稱:ibis，代碼行數:4，代碼來源:compiler.py

示例11: compile_max

# 需要導入模塊: from pyspark.sql import functions [as 別名]
# 或者: from pyspark.sql.functions import max [as 別名]
def compile_max(t, expr, scope, context=None, **kwargs):
    return compile_aggregator(t, expr, scope, F.max, context, **kwargs)

開發者ID:ibis-project，項目名稱:ibis，代碼行數:4，代碼來源:compiler.py

示例12: test_aggregation

# 需要導入模塊: from pyspark.sql import functions [as 別名]
# 或者: from pyspark.sql.functions import max [as 別名]
def test_aggregation(client):
    import pyspark.sql.functions as F

    table = client.table('basic_table')
    result = table.aggregate(table['id'].max()).compile()
    expected = table.compile().agg(F.max('id').alias('max'))

    tm.assert_frame_equal(result.toPandas(), expected.toPandas())

開發者ID:ibis-project，項目名稱:ibis，代碼行數:10，代碼來源:test_basic.py

示例13: make_daily_temperature_highs

# 需要導入模塊: from pyspark.sql import functions [as 別名]
# 或者: from pyspark.sql.functions import max [as 別名]
def make_daily_temperature_highs(_, weather_samples: DataFrame) -> DataFrame:
    '''Computes the temperature high for each day'''
    valid_date = f.to_date(weather_samples['valid']).alias('valid_date')
    return weather_samples.groupBy(valid_date).agg(f.max('tmpf').alias('max_tmpf'))

開發者ID:dagster-io，項目名稱:dagster，代碼行數:6，代碼來源:solids.py

示例14: max

# 需要導入模塊: from pyspark.sql import functions [as 別名]
# 或者: from pyspark.sql.functions import max [as 別名]
def max(self):
        def max(scol):
            return F.when(
                F.row_number().over(self._unbounded_window) >= self._min_periods,
                F.max(scol).over(self._window),
            ).otherwise(F.lit(None))

        return self._apply_as_series_or_frame(max)

開發者ID:databricks，項目名稱:koalas，代碼行數:10，代碼來源:window.py

示例15: min

# 需要導入模塊: from pyspark.sql import functions [as 別名]
# 或者: from pyspark.sql.functions import max [as 別名]
def min(self):
        """
        Return the minimum value of the Index.

        Returns
        -------
        scalar
            Minimum value.

        See Also
        --------
        Index.max : Return the maximum value of the object.
        Series.min : Return the minimum value in a Series.
        DataFrame.min : Return the minimum values in a DataFrame.

        Examples
        --------
        >>> idx = ks.Index([3, 2, 1])
        >>> idx.min()
        1

        >>> idx = ks.Index(['c', 'b', 'a'])
        >>> idx.min()
        'a'

        For a MultiIndex, the maximum is determined lexicographically.

        >>> idx = ks.MultiIndex.from_tuples([('a', 'x', 1), ('b', 'y', 2)])
        >>> idx.min()
        ('a', 'x', 1)
        """
        sdf = self._internal.spark_frame
        min_row = sdf.select(F.min(F.struct(self._internal.index_spark_columns))).head()
        result = tuple(min_row[0])

        return result if len(result) > 1 else result[0]

開發者ID:databricks，項目名稱:koalas，代碼行數:38，代碼來源:indexes.py

注：本文中的pyspark.sql.functions.max方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。