本文整理汇总了Python中pandas.cut方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.cut方法的具体用法?Python pandas.cut怎么用?Python pandas.cut使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas
的用法示例。
在下文中一共展示了pandas.cut方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: quality_over_time
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def quality_over_time(dfs, path, figformat, title, plot_settings={}):
time_qual = Plot(path=path + "TimeQualityViolinPlot." + figformat,
title="Violin plot of quality over time")
sns.set(style="white", **plot_settings)
ax = sns.violinplot(x="timebin",
y="quals",
data=dfs,
inner=None,
cut=0,
linewidth=0)
ax.set(xlabel='Interval (hours)',
ylabel="Basecall quality",
title=title or time_qual.title)
plt.xticks(rotation=45, ha='center', fontsize=8)
time_qual.fig = ax.get_figure()
time_qual.save(format=figformat)
plt.close("all")
return time_qual
示例2: sequencing_speed_over_time
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def sequencing_speed_over_time(dfs, path, figformat, title, plot_settings={}):
time_duration = Plot(path=path + "TimeSequencingSpeed_ViolinPlot." + figformat,
title="Violin plot of sequencing speed over time")
sns.set(style="white", **plot_settings)
if "timebin" not in dfs:
dfs['timebin'] = add_time_bins(dfs)
mask = dfs['duration'] != 0
ax = sns.violinplot(x=dfs.loc[mask, "timebin"],
y=dfs.loc[mask, "lengths"] / dfs.loc[mask, "duration"],
inner=None,
cut=0,
linewidth=0)
ax.set(xlabel='Interval (hours)',
ylabel="Sequencing speed (nucleotides/second)",
title=title or time_duration.title)
plt.xticks(rotation=45, ha='center', fontsize=8)
time_duration.fig = ax.get_figure()
time_duration.save(format=figformat)
plt.close("all")
return time_duration
示例3: test_slicing
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def test_slicing(self):
cat = Series(Categorical([1, 2, 3, 4]))
reversed = cat[::-1]
exp = np.array([4, 3, 2, 1], dtype=np.int64)
tm.assert_numpy_array_equal(reversed.__array__(), exp)
df = DataFrame({'value': (np.arange(100) + 1).astype('int64')})
df['D'] = pd.cut(df.value, bins=[0, 25, 50, 75, 100])
expected = Series([11, Interval(0, 25)], index=['value', 'D'], name=10)
result = df.iloc[10]
tm.assert_series_equal(result, expected)
expected = DataFrame({'value': np.arange(11, 21).astype('int64')},
index=np.arange(10, 20).astype('int64'))
expected['D'] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100])
result = df.iloc[10:20]
tm.assert_frame_equal(result, expected)
expected = Series([9, Interval(0, 25)], index=['value', 'D'], name=8)
result = df.loc[8]
tm.assert_series_equal(result, expected)
示例4: test_observed_codes_remap
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def test_observed_codes_remap(observed):
d = {'C1': [3, 3, 4, 5], 'C2': [1, 2, 3, 4], 'C3': [10, 100, 200, 34]}
df = pd.DataFrame(d)
values = pd.cut(df['C1'], [1, 2, 3, 6])
values.name = "cat"
groups_double_key = df.groupby([values, 'C2'], observed=observed)
idx = MultiIndex.from_arrays([values, [1, 2, 3, 4]],
names=["cat", "C2"])
expected = DataFrame({"C1": [3, 3, 4, 5],
"C3": [10, 100, 200, 34]}, index=idx)
if not observed:
expected = cartesian_product_for_groupers(
expected,
[values.values, [1, 2, 3, 4]],
['cat', 'C2'])
result = groups_double_key.agg('mean')
tm.assert_frame_equal(result, expected)
示例5: test_sort
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def test_sort():
# http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby # noqa: flake8
# This should result in a properly sorted Series so that the plot
# has a sorted x axis
# self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar')
df = DataFrame({'value': np.random.randint(0, 10000, 100)})
labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
cat_labels = Categorical(labels, labels)
df = df.sort_values(by=['value'], ascending=True)
df['value_group'] = pd.cut(df.value, range(0, 10500, 500),
right=False, labels=cat_labels)
res = df.groupby(['value_group'], observed=False)['value_group'].count()
exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))]
exp.index = CategoricalIndex(exp.index, name=exp.index.name)
tm.assert_series_equal(res, exp)
示例6: test_sort_index_intervalindex
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def test_sort_index_intervalindex(self):
# this is a de-facto sort via unstack
# confirming that we sort in the order of the bins
y = Series(np.random.randn(100))
x1 = Series(np.sign(np.random.randn(100)))
x2 = pd.cut(Series(np.random.randn(100)),
bins=[-3, -0.5, 0, 0.5, 3])
model = pd.concat([y, x1, x2], axis=1, keys=['Y', 'X1', 'X2'])
result = model.groupby(['X1', 'X2'], observed=True).mean().unstack()
expected = IntervalIndex.from_tuples(
[(-3.0, -0.5), (-0.5, 0.0),
(0.0, 0.5), (0.5, 3.0)],
closed='right')
result = result.columns.levels[1].categories
tm.assert_index_equal(result, expected)
示例7: test_to_excel_interval_no_labels
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def test_to_excel_interval_no_labels(self, *_):
# see gh-19242
#
# Test writing Interval without labels.
frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)),
dtype=np.int64)
expected = frame.copy()
frame["new"] = pd.cut(frame[0], 10)
expected["new"] = pd.cut(expected[0], 10).astype(str)
frame.to_excel(self.path, "test1")
reader = ExcelFile(self.path)
recons = read_excel(reader, "test1", index_col=0)
tm.assert_frame_equal(expected, recons)
示例8: test_to_excel_interval_labels
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def test_to_excel_interval_labels(self, *_):
# see gh-19242
#
# Test writing Interval with labels.
frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)),
dtype=np.int64)
expected = frame.copy()
intervals = pd.cut(frame[0], 10, labels=["A", "B", "C", "D", "E",
"F", "G", "H", "I", "J"])
frame["new"] = intervals
expected["new"] = pd.Series(list(intervals))
frame.to_excel(self.path, "test1")
reader = ExcelFile(self.path)
recons = read_excel(reader, "test1", index_col=0)
tm.assert_frame_equal(expected, recons)
示例9: generate_final_dataset
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def generate_final_dataset(self):
if self.sign == False:
shift_var = 1
self.bucket = True
else:
shift_var = -1
self.bucket = False
self.woe_summary[self.column + "_shift"] = self.woe_summary[self.column].shift(shift_var)
if self.sign == False:
self.woe_summary.loc[0, self.column + "_shift"] = -np.inf
self.bins = np.sort(list(self.woe_summary[self.column]) + [np.Inf,-np.Inf])
else:
self.woe_summary.loc[len(self.woe_summary) - 1, self.column + "_shift"] = np.inf
self.bins = np.sort(list(self.woe_summary[self.column]) + [np.Inf,-np.Inf])
self.woe_summary["labels"] = self.woe_summary.apply(self.generate_bin_labels, axis=1)
self.dataset["bins"] = pd.cut(self.dataset[self.column], self.bins, right=self.bucket, precision=0)
self.dataset["bins"] = self.dataset["bins"].astype(str)
self.dataset['bins'] = self.dataset['bins'].map(lambda x: x.lstrip('[').rstrip(')'))
示例10: __call__
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def __call__(self, inp):
test_series = build_series(inp).value_counts(normalize=self.normalize)
if self._bins is not None:
from .cut import cut
# cut
try:
inp = cut(inp, self._bins, include_lowest=True)
except TypeError: # pragma: no cover
raise TypeError("bins argument only works with numeric data.")
self._bins = None
self._convert_index_to_interval = True
return self.new_series([inp], shape=(np.nan,),
index_value=parse_index(pd.CategoricalIndex([]),
inp, store_data=False),
name=inp.name, dtype=test_series.dtype)
else:
return self.new_series([inp], shape=(np.nan,),
index_value=parse_index(test_series.index, store_data=False),
name=inp.name, dtype=test_series.dtype)
示例11: execute
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def execute(cls, ctx, op: "DataFrameValueCounts"):
if op.stage != OperandStage.map:
if op.convert_index_to_interval:
data = ctx[op.input.key]
result = data.value_counts(
normalize=False, sort=op.sort, ascending=op.ascending,
bins=op.bins, dropna=op.dropna)
if op.normalize:
result /= data.shape[0]
else:
result = ctx[op.input.key].value_counts(
normalize=op.normalize, sort=op.sort, ascending=op.ascending,
bins=op.bins, dropna=op.dropna)
else:
result = ctx[op.input.key]
if op.convert_index_to_interval:
# convert CategoricalDtype which generated in `cut`
# to IntervalDtype
result.index = result.index.astype('interval')
ctx[op.outputs[0].key] = result
示例12: execute
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def execute(cls, ctx, op):
x = ctx[op.input.key]
bins = ctx[op.bins.key] if isinstance(op.bins, (Base, Entity)) else op.bins
labels = ctx[op.labels.key] if isinstance(op.labels, (Base, Entity)) else op.labels
cut = partial(pd.cut, right=op.right, retbins=op.retbins, precision=op.precision,
include_lowest=op.include_lowest, duplicates=op.duplicates)
try:
ret = cut(x, bins, labels=labels)
except ValueError:
# fail due to buffer source array is read-only
ret = cut(x.copy(), bins, labels=labels)
if op.retbins: # pragma: no cover
ctx[op.outputs[0].key] = ret[0]
ctx[op.outputs[1].key] = ret[1]
else:
ctx[op.outputs[0].key] = ret
示例13: calibration_plot
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def calibration_plot(preds, truth):
"""Produces a calibration plot for the win probability model.
Splits the predictions into percentiles and calculates the
percentage of predictions per percentile that were wins. A perfectly
calibrated model means that plays with a win probability of n%
win about n% of the time.
"""
cal_df = pd.DataFrame({'pred': preds, 'win': truth})
cal_df['pred_bin'] = pd.cut(cal_df.pred, 100, labels=False)
win_means = cal_df.groupby('pred_bin')['win'].mean()
plt.figure()
plt.plot(win_means.index.values,
[100 * v for v in win_means.values], color='SteelBlue')
plt.plot(np.arange(0, 100), np.arange(0, 100), 'k--', alpha=0.3)
plt.xlim([0.0, 100])
plt.ylim([0.0, 100])
plt.xlabel('Estimated win probability')
plt.ylabel('True win percentage')
plt.title('Win probability calibration, binned by percent')
plt.show()
return
示例14: test_category_label
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def test_category_label(alltypes, df):
t = alltypes
d = t.double_col
bins = [0, 10, 25, 50, 100]
labels = ['a', 'b', 'c', 'd']
bucket = d.bucket(bins)
expr = bucket.label(labels)
result = expr.execute()
with warnings.catch_warnings():
warnings.simplefilter('ignore')
result = pd.Series(pd.Categorical(result, ordered=True))
result.name = 'double_col'
expected = pd.cut(df.double_col, bins, labels=labels, right=False)
tm.assert_series_equal(result, expected)
示例15: setup_params
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def setup_params(self, data):
params = self.params.copy()
random_state = params['random_state']
if params['maxwidth'] is None:
params['maxwidth'] = resolution(data['x'], False) * 0.9
if params['binwidth'] is None and self.params['bins'] is None:
params['bins'] = 50
if random_state is None:
params['random_state'] = np.random
elif isinstance(random_state, int):
params['random_state'] = np.random.RandomState(random_state)
# Required by compute_density
params['kernel'] = 'gau' # It has to be a gaussian kernel
params['cut'] = 0
params['gridsize'] = None
params['clip'] = (-np.inf, np.inf)
params['n'] = 512
return params