本文整理汇总了Python中pandas.NamedAgg方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.NamedAgg方法的具体用法?Python pandas.NamedAgg怎么用?Python pandas.NamedAgg使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas
的用法示例。
在下文中一共展示了pandas.NamedAgg方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _get_aggregate_funcs
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NamedAgg [as 别名]
def _get_aggregate_funcs(
df: DataFrame, aggregates: Dict[str, Dict[str, Any]],
) -> Dict[str, NamedAgg]:
"""
Converts a set of aggregate config objects into functions that pandas can use as
aggregators. Currently only numpy aggregators are supported.
:param df: DataFrame on which to perform aggregate operation.
:param aggregates: Mapping from column name to aggregate config.
:return: Mapping from metric name to function that takes a single input argument.
"""
agg_funcs: Dict[str, NamedAgg] = {}
for name, agg_obj in aggregates.items():
column = agg_obj.get("column", name)
if column not in df:
raise QueryObjectValidationError(
_(
"Column referenced by aggregate is undefined: %(column)s",
column=column,
)
)
if "operator" not in agg_obj:
raise QueryObjectValidationError(
_("Operator undefined for aggregator: %(name)s", name=name,)
)
operator = agg_obj["operator"]
if operator not in WHITELIST_NUMPY_FUNCTIONS or not hasattr(np, operator):
raise QueryObjectValidationError(
_("Invalid numpy function: %(operator)s", operator=operator,)
)
func = getattr(np, operator)
options = agg_obj.get("options", {})
agg_funcs[name] = NamedAgg(column=column, aggfunc=partial(func, **options))
return agg_funcs
示例2: plot_lr_finders
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NamedAgg [as 别名]
def plot_lr_finders(lr_finders:List[LRFinder], lr_range:Optional[Union[float,Tuple]]=None, loss_range:Optional[Union[float,Tuple,str]]='auto',
log_y:Union[str,bool]='auto', settings:PlotSettings=PlotSettings()) -> None:
r'''
Plot mean loss evolution against learning rate for several :class:`~lumin.nn.callbacks.opt_callbacks.LRFinder callbacks as returned by
:meth:`~lumin.nn.optimisation.hyper_param.fold_lr_find`.
Arguments:
lr_finders: list of :class:`~lumin.nn.callbacks.opt_callbacks.LRFinder callbacks used during training (e.g. as returned by
:meth:`~lumin.nn.optimisation.hyper_param.fold_lr_find`)
lr_range: limits the range of learning rates plotted on the x-axis: if float, maximum LR; if tuple, minimum & maximum LR
loss_range: limits the range of losses plotted on the x-axis:
if float, maximum loss;
if tuple, minimum & maximum loss;
if None, no limits;
if 'auto', computes an upper limit automatically
log_y: whether to plot y-axis as log. If 'auto', will set to log if maximal fractional difference in loss values is greater than 50
settings: :class:`~lumin.plotting.plot_settings.PlotSettings` class to control figure appearance
'''
df = pd.DataFrame()
for lrf in lr_finders: df = df.append(lrf.get_df(), ignore_index=True)
if lr_range is not None:
if isinstance(lr_range, float): lr_range = (0, lr_range)
df = df[(df.LR >= lr_range[0]) & (df.LR < lr_range[1])]
if loss_range == 'auto': # Max loss = 1.1 * max mean-loss at LR less than LR at min mean-loss
agg = df.groupby(by='LR').agg(mean_loss=pd.NamedAgg(column='Loss', aggfunc='mean'))
agg.reset_index(inplace=True)
argmin_lr = agg.loc[agg.mean_loss.idxmin(), 'LR']
loss_range = [0.8*agg.loc[agg.LR < argmin_lr, 'mean_loss'].min(), 1.2*agg.loc[agg.LR < argmin_lr, 'mean_loss'].max()]
with sns.axes_style('whitegrid'), sns.color_palette(settings.cat_palette):
plt.figure(figsize=(settings.w_mid, settings.h_mid))
sns.lineplot(x='LR', y='Loss', data=df, ci='sd')
plt.xscale('log')
if log_y == 'auto':
if df.Loss.max()/df.Loss.min() > 50: plt.yscale('log')
elif log_y:
plt.yscale('log')
plt.grid(b=True, which="both", axis="both")
if loss_range is not None: plt.ylim((0,loss_range) if isinstance(loss_range, float) else loss_range)
plt.xticks(fontsize=settings.tk_sz, color=settings.tk_col)
plt.yticks(fontsize=settings.tk_sz, color=settings.tk_col)
plt.xlabel("Learning rate", fontsize=settings.lbl_sz, color=settings.lbl_col)
plt.ylabel("Loss", fontsize=settings.lbl_sz, color=settings.lbl_col)
plt.show()
示例3: test_aggregate_relabel
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NamedAgg [as 别名]
def test_aggregate_relabel(self):
# this is to test named aggregation in groupby
pdf = pd.DataFrame({"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]})
kdf = ks.from_pandas(pdf)
# different agg column, same function
agg_pdf = pdf.groupby("group").agg(a_max=("A", "max"), b_max=("B", "max")).sort_index()
agg_kdf = kdf.groupby("group").agg(a_max=("A", "max"), b_max=("B", "max")).sort_index()
self.assert_eq(agg_pdf, agg_kdf)
# same agg column, different functions
agg_pdf = pdf.groupby("group").agg(b_max=("B", "max"), b_min=("B", "min")).sort_index()
agg_kdf = kdf.groupby("group").agg(b_max=("B", "max"), b_min=("B", "min")).sort_index()
self.assert_eq(agg_pdf, agg_kdf)
# test on NamedAgg
agg_pdf = (
pdf.groupby("group").agg(b_max=pd.NamedAgg(column="B", aggfunc="max")).sort_index()
)
agg_kdf = (
kdf.groupby("group").agg(b_max=ks.NamedAgg(column="B", aggfunc="max")).sort_index()
)
self.assert_eq(agg_kdf, agg_pdf)
# test on NamedAgg multi columns aggregation
agg_pdf = (
pdf.groupby("group")
.agg(
b_max=pd.NamedAgg(column="B", aggfunc="max"),
b_min=pd.NamedAgg(column="B", aggfunc="min"),
)
.sort_index()
)
agg_kdf = (
kdf.groupby("group")
.agg(
b_max=ks.NamedAgg(column="B", aggfunc="max"),
b_min=ks.NamedAgg(column="B", aggfunc="min"),
)
.sort_index()
)
self.assert_eq(agg_kdf, agg_pdf)
示例4: sanitize_blast_data
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import NamedAgg [as 别名]
def sanitize_blast_data(data: pd.DataFrame, queries: pd.DataFrame, targets: pd.DataFrame,
qmult=3, tmult=1):
if data[data.btop.isna()].shape[0] > 0:
raise ValueError(data.loc[0])
data["qseqid"] = data["qseqid"].str.replace(id_pattern, "\\1")
data["sseqid"] = data["sseqid"].str.replace(id_pattern, "\\1")
data = data.join(queries, on=["qseqid"]).join(targets, on=["sseqid"]).join(
data.groupby(["qseqid", "sseqid"]).agg(
min_evalue=pd.NamedAgg("evalue", np.min),
max_bitscore=pd.NamedAgg("bitscore", np.max)
)[["min_evalue", "max_bitscore"]], on=["qseqid", "sseqid"])
for col in ["qstart", "qend", "sstart", "send", "qlength", "slength"]:
assert ~(data[col].isna().any()), (col, data[data[col].isna()].shape[0], data.shape[0])
try:
data[col] = data[col].astype(int).values
except ValueError as exc:
raise ValueError("{}: {}".format(exc, col))
for key, multiplier, (start, end), length in [
("query_frame", qmult, ("qstart", "qend"), "qlength"),
("target_frame", tmult, ("sstart", "send"), "slength")]:
# Switch start and end when they are not in the correct order
_ix = (data[start] > data[end])
if multiplier > 1:
data.loc[~_ix, key] = data[start] % multiplier
data.loc[_ix, key] = -((data[length] - data[end] - 1) % multiplier)
data.loc[(data[key] == 0) & ~_ix, key] = multiplier
data.loc[(data[key] == 0) & _ix, key] = -multiplier
else:
data.loc[:, key] = 0
data.loc[_ix, [start, end]] = data.loc[_ix, [end, start]].values
data[start] -= 1
# Get the minimum evalue for each group
# data["aln_span"] = data.qend - data.qstart
# Set the hsp_num
data["sstart"] = data["sstart"].astype(int).values
data["hsp_num"] = data.sort_values("bitscore", ascending=False).groupby(["qseqid", "sseqid"]).cumcount() + 1
temp = data[["qseqid", "sseqid", "max_bitscore"]].drop_duplicates().sort_values(
["max_bitscore", "sseqid"], ascending=[False, True])
temp["hit_num"] = temp.groupby(["qseqid"]).cumcount() + 1
temp.set_index(["qseqid", "sseqid"], inplace=True)
data = data.join(temp["hit_num"], on=["qseqid", "sseqid"])
data = data.sort_values(["qid", "sid"])
data.set_index(["qid", "sid"], drop=False, inplace=True)
return data