当前位置: 首页>>代码示例>>Python>>正文


Python pandas.NamedAgg方法代码示例

本文整理汇总了Python中pandas.NamedAgg方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.NamedAgg方法的具体用法?Python pandas.NamedAgg怎么用?Python pandas.NamedAgg使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pandas的用法示例。


在下文中一共展示了pandas.NamedAgg方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _get_aggregate_funcs

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NamedAgg [as 别名]
def _get_aggregate_funcs(
    df: DataFrame, aggregates: Dict[str, Dict[str, Any]],
) -> Dict[str, NamedAgg]:
    """
    Converts a set of aggregate config objects into functions that pandas can use as
    aggregators. Currently only numpy aggregators are supported.

    :param df: DataFrame on which to perform aggregate operation.
    :param aggregates: Mapping from column name to aggregate config.
    :return: Mapping from metric name to function that takes a single input argument.
    """
    agg_funcs: Dict[str, NamedAgg] = {}
    for name, agg_obj in aggregates.items():
        column = agg_obj.get("column", name)
        if column not in df:
            raise QueryObjectValidationError(
                _(
                    "Column referenced by aggregate is undefined: %(column)s",
                    column=column,
                )
            )
        if "operator" not in agg_obj:
            raise QueryObjectValidationError(
                _("Operator undefined for aggregator: %(name)s", name=name,)
            )
        operator = agg_obj["operator"]
        if operator not in WHITELIST_NUMPY_FUNCTIONS or not hasattr(np, operator):
            raise QueryObjectValidationError(
                _("Invalid numpy function: %(operator)s", operator=operator,)
            )
        func = getattr(np, operator)
        options = agg_obj.get("options", {})
        agg_funcs[name] = NamedAgg(column=column, aggfunc=partial(func, **options))

    return agg_funcs 
开发者ID:apache,项目名称:incubator-superset,代码行数:37,代码来源:pandas_postprocessing.py

示例2: plot_lr_finders

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NamedAgg [as 别名]
def plot_lr_finders(lr_finders:List[LRFinder], lr_range:Optional[Union[float,Tuple]]=None, loss_range:Optional[Union[float,Tuple,str]]='auto',
                    log_y:Union[str,bool]='auto', settings:PlotSettings=PlotSettings()) -> None:
    r'''
    Plot mean loss evolution against learning rate for several :class:`~lumin.nn.callbacks.opt_callbacks.LRFinder callbacks as returned by
    :meth:`~lumin.nn.optimisation.hyper_param.fold_lr_find`.

    Arguments:
        lr_finders: list of :class:`~lumin.nn.callbacks.opt_callbacks.LRFinder callbacks used during training (e.g. as returned by
            :meth:`~lumin.nn.optimisation.hyper_param.fold_lr_find`)
        lr_range: limits the range of learning rates plotted on the x-axis: if float, maximum LR; if tuple, minimum & maximum LR
        loss_range: limits the range of losses plotted on the x-axis:
            if float, maximum loss;
            if tuple, minimum & maximum loss;
            if None, no limits;
            if 'auto', computes an upper limit automatically
        log_y: whether to plot y-axis as log. If 'auto', will set to log if maximal fractional difference in loss values is greater than 50
        settings: :class:`~lumin.plotting.plot_settings.PlotSettings` class to control figure appearance
    '''
    
    df = pd.DataFrame()
    for lrf in lr_finders: df = df.append(lrf.get_df(), ignore_index=True)
    if lr_range is not None:
        if isinstance(lr_range, float): lr_range = (0, lr_range)
        df = df[(df.LR >= lr_range[0]) & (df.LR < lr_range[1])]

    if loss_range == 'auto':  # Max loss = 1.1 * max mean-loss at LR less than LR at min mean-loss
        agg = df.groupby(by='LR').agg(mean_loss=pd.NamedAgg(column='Loss', aggfunc='mean'))
        agg.reset_index(inplace=True)
        argmin_lr = agg.loc[agg.mean_loss.idxmin(), 'LR']
        loss_range = [0.8*agg.loc[agg.LR < argmin_lr, 'mean_loss'].min(), 1.2*agg.loc[agg.LR < argmin_lr, 'mean_loss'].max()]

    with sns.axes_style('whitegrid'), sns.color_palette(settings.cat_palette):
        plt.figure(figsize=(settings.w_mid, settings.h_mid))
        sns.lineplot(x='LR', y='Loss', data=df, ci='sd')
        plt.xscale('log')
        if log_y == 'auto':
            if df.Loss.max()/df.Loss.min() > 50: plt.yscale('log')
        elif log_y:
            plt.yscale('log')
        plt.grid(b=True, which="both", axis="both")
        if loss_range is not None: plt.ylim((0,loss_range) if isinstance(loss_range, float) else loss_range)
        plt.xticks(fontsize=settings.tk_sz, color=settings.tk_col)
        plt.yticks(fontsize=settings.tk_sz, color=settings.tk_col)
        plt.xlabel("Learning rate", fontsize=settings.lbl_sz, color=settings.lbl_col)
        plt.ylabel("Loss", fontsize=settings.lbl_sz, color=settings.lbl_col)
        plt.show() 
开发者ID:GilesStrong,项目名称:lumin,代码行数:48,代码来源:training.py

示例3: test_aggregate_relabel

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NamedAgg [as 别名]
def test_aggregate_relabel(self):
        # this is to test named aggregation in groupby
        pdf = pd.DataFrame({"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]})
        kdf = ks.from_pandas(pdf)

        # different agg column, same function
        agg_pdf = pdf.groupby("group").agg(a_max=("A", "max"), b_max=("B", "max")).sort_index()
        agg_kdf = kdf.groupby("group").agg(a_max=("A", "max"), b_max=("B", "max")).sort_index()
        self.assert_eq(agg_pdf, agg_kdf)

        # same agg column, different functions
        agg_pdf = pdf.groupby("group").agg(b_max=("B", "max"), b_min=("B", "min")).sort_index()
        agg_kdf = kdf.groupby("group").agg(b_max=("B", "max"), b_min=("B", "min")).sort_index()
        self.assert_eq(agg_pdf, agg_kdf)

        # test on NamedAgg
        agg_pdf = (
            pdf.groupby("group").agg(b_max=pd.NamedAgg(column="B", aggfunc="max")).sort_index()
        )
        agg_kdf = (
            kdf.groupby("group").agg(b_max=ks.NamedAgg(column="B", aggfunc="max")).sort_index()
        )
        self.assert_eq(agg_kdf, agg_pdf)

        # test on NamedAgg multi columns aggregation
        agg_pdf = (
            pdf.groupby("group")
            .agg(
                b_max=pd.NamedAgg(column="B", aggfunc="max"),
                b_min=pd.NamedAgg(column="B", aggfunc="min"),
            )
            .sort_index()
        )
        agg_kdf = (
            kdf.groupby("group")
            .agg(
                b_max=ks.NamedAgg(column="B", aggfunc="max"),
                b_min=ks.NamedAgg(column="B", aggfunc="min"),
            )
            .sort_index()
        )
        self.assert_eq(agg_kdf, agg_pdf) 
开发者ID:databricks,项目名称:koalas,代码行数:44,代码来源:test_groupby.py

示例4: sanitize_blast_data

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NamedAgg [as 别名]
def sanitize_blast_data(data: pd.DataFrame, queries: pd.DataFrame, targets: pd.DataFrame,
                        qmult=3, tmult=1):

    if data[data.btop.isna()].shape[0] > 0:
        raise ValueError(data.loc[0])

    data["qseqid"] = data["qseqid"].str.replace(id_pattern, "\\1")
    data["sseqid"] = data["sseqid"].str.replace(id_pattern, "\\1")

    data = data.join(queries, on=["qseqid"]).join(targets, on=["sseqid"]).join(
        data.groupby(["qseqid", "sseqid"]).agg(
            min_evalue=pd.NamedAgg("evalue", np.min),
            max_bitscore=pd.NamedAgg("bitscore", np.max)
        )[["min_evalue", "max_bitscore"]], on=["qseqid", "sseqid"])

    for col in ["qstart", "qend", "sstart", "send", "qlength", "slength"]:
        assert ~(data[col].isna().any()), (col, data[data[col].isna()].shape[0], data.shape[0])
        try:
            data[col] = data[col].astype(int).values
        except ValueError as exc:
            raise ValueError("{}: {}".format(exc, col))

    for key, multiplier, (start, end), length in [
        ("query_frame", qmult, ("qstart", "qend"), "qlength"),
        ("target_frame", tmult, ("sstart", "send"), "slength")]:
        # Switch start and end when they are not in the correct order
        _ix = (data[start] > data[end])
        if multiplier > 1:
            data.loc[~_ix, key] = data[start] % multiplier
            data.loc[_ix, key] = -((data[length] - data[end] - 1) % multiplier)
            data.loc[(data[key] == 0) & ~_ix, key] = multiplier
            data.loc[(data[key] == 0) & _ix, key] = -multiplier
        else:
            data.loc[:, key] = 0
        data.loc[_ix, [start, end]] = data.loc[_ix, [end, start]].values
        data[start] -= 1
    # Get the minimum evalue for each group
    # data["aln_span"] = data.qend - data.qstart
    # Set the hsp_num
    data["sstart"] = data["sstart"].astype(int).values
    data["hsp_num"] = data.sort_values("bitscore", ascending=False).groupby(["qseqid", "sseqid"]).cumcount() + 1
    temp = data[["qseqid", "sseqid", "max_bitscore"]].drop_duplicates().sort_values(
        ["max_bitscore", "sseqid"], ascending=[False, True])
    temp["hit_num"] = temp.groupby(["qseqid"]).cumcount() + 1
    temp.set_index(["qseqid", "sseqid"], inplace=True)
    data = data.join(temp["hit_num"], on=["qseqid", "sseqid"])
    data = data.sort_values(["qid", "sid"])
    data.set_index(["qid", "sid"], drop=False, inplace=True)
    return data 
开发者ID:EI-CoreBioinformatics,项目名称:mikado,代码行数:51,代码来源:tabular_utils.py


注:本文中的pandas.NamedAgg方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。