当前位置: 首页>>代码示例>>Python>>正文


Python pandas.cut方法代码示例

本文整理汇总了Python中pandas.cut方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.cut方法的具体用法?Python pandas.cut怎么用?Python pandas.cut使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pandas的用法示例。


在下文中一共展示了pandas.cut方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: quality_over_time

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def quality_over_time(dfs, path, figformat, title, plot_settings={}):
    time_qual = Plot(path=path + "TimeQualityViolinPlot." + figformat,
                     title="Violin plot of quality over time")
    sns.set(style="white", **plot_settings)
    ax = sns.violinplot(x="timebin",
                        y="quals",
                        data=dfs,
                        inner=None,
                        cut=0,
                        linewidth=0)
    ax.set(xlabel='Interval (hours)',
           ylabel="Basecall quality",
           title=title or time_qual.title)
    plt.xticks(rotation=45, ha='center', fontsize=8)
    time_qual.fig = ax.get_figure()
    time_qual.save(format=figformat)
    plt.close("all")
    return time_qual 
开发者ID:wdecoster,项目名称:NanoPlot,代码行数:20,代码来源:timeplots.py

示例2: sequencing_speed_over_time

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def sequencing_speed_over_time(dfs, path, figformat, title, plot_settings={}):
    time_duration = Plot(path=path + "TimeSequencingSpeed_ViolinPlot." + figformat,
                         title="Violin plot of sequencing speed over time")
    sns.set(style="white", **plot_settings)
    if "timebin" not in dfs:
        dfs['timebin'] = add_time_bins(dfs)
    mask = dfs['duration'] != 0
    ax = sns.violinplot(x=dfs.loc[mask, "timebin"],
                        y=dfs.loc[mask, "lengths"] / dfs.loc[mask, "duration"],
                        inner=None,
                        cut=0,
                        linewidth=0)
    ax.set(xlabel='Interval (hours)',
           ylabel="Sequencing speed (nucleotides/second)",
           title=title or time_duration.title)
    plt.xticks(rotation=45, ha='center', fontsize=8)
    time_duration.fig = ax.get_figure()
    time_duration.save(format=figformat)
    plt.close("all")
    return time_duration 
开发者ID:wdecoster,项目名称:NanoPlot,代码行数:22,代码来源:timeplots.py

示例3: test_slicing

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def test_slicing(self):
        cat = Series(Categorical([1, 2, 3, 4]))
        reversed = cat[::-1]
        exp = np.array([4, 3, 2, 1], dtype=np.int64)
        tm.assert_numpy_array_equal(reversed.__array__(), exp)

        df = DataFrame({'value': (np.arange(100) + 1).astype('int64')})
        df['D'] = pd.cut(df.value, bins=[0, 25, 50, 75, 100])

        expected = Series([11, Interval(0, 25)], index=['value', 'D'], name=10)
        result = df.iloc[10]
        tm.assert_series_equal(result, expected)

        expected = DataFrame({'value': np.arange(11, 21).astype('int64')},
                             index=np.arange(10, 20).astype('int64'))
        expected['D'] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100])
        result = df.iloc[10:20]
        tm.assert_frame_equal(result, expected)

        expected = Series([9, Interval(0, 25)], index=['value', 'D'], name=8)
        result = df.loc[8]
        tm.assert_series_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:24,代码来源:test_categorical.py

示例4: test_observed_codes_remap

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def test_observed_codes_remap(observed):
    d = {'C1': [3, 3, 4, 5], 'C2': [1, 2, 3, 4], 'C3': [10, 100, 200, 34]}
    df = pd.DataFrame(d)
    values = pd.cut(df['C1'], [1, 2, 3, 6])
    values.name = "cat"
    groups_double_key = df.groupby([values, 'C2'], observed=observed)

    idx = MultiIndex.from_arrays([values, [1, 2, 3, 4]],
                                 names=["cat", "C2"])
    expected = DataFrame({"C1": [3, 3, 4, 5],
                          "C3": [10, 100, 200, 34]}, index=idx)
    if not observed:
        expected = cartesian_product_for_groupers(
            expected,
            [values.values, [1, 2, 3, 4]],
            ['cat', 'C2'])

    result = groups_double_key.agg('mean')
    tm.assert_frame_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:21,代码来源:test_categorical.py

示例5: test_sort

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def test_sort():

    # http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby  # noqa: flake8
    # This should result in a properly sorted Series so that the plot
    # has a sorted x axis
    # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar')

    df = DataFrame({'value': np.random.randint(0, 10000, 100)})
    labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
    cat_labels = Categorical(labels, labels)

    df = df.sort_values(by=['value'], ascending=True)
    df['value_group'] = pd.cut(df.value, range(0, 10500, 500),
                               right=False, labels=cat_labels)

    res = df.groupby(['value_group'], observed=False)['value_group'].count()
    exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))]
    exp.index = CategoricalIndex(exp.index, name=exp.index.name)
    tm.assert_series_equal(res, exp) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:21,代码来源:test_categorical.py

示例6: test_sort_index_intervalindex

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def test_sort_index_intervalindex(self):
        # this is a de-facto sort via unstack
        # confirming that we sort in the order of the bins
        y = Series(np.random.randn(100))
        x1 = Series(np.sign(np.random.randn(100)))
        x2 = pd.cut(Series(np.random.randn(100)),
                    bins=[-3, -0.5, 0, 0.5, 3])
        model = pd.concat([y, x1, x2], axis=1, keys=['Y', 'X1', 'X2'])

        result = model.groupby(['X1', 'X2'], observed=True).mean().unstack()
        expected = IntervalIndex.from_tuples(
            [(-3.0, -0.5), (-0.5, 0.0),
             (0.0, 0.5), (0.5, 3.0)],
            closed='right')
        result = result.columns.levels[1].categories
        tm.assert_index_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:18,代码来源:test_sorting.py

示例7: test_to_excel_interval_no_labels

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def test_to_excel_interval_no_labels(self, *_):
        # see gh-19242
        #
        # Test writing Interval without labels.
        frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)),
                          dtype=np.int64)
        expected = frame.copy()

        frame["new"] = pd.cut(frame[0], 10)
        expected["new"] = pd.cut(expected[0], 10).astype(str)

        frame.to_excel(self.path, "test1")
        reader = ExcelFile(self.path)

        recons = read_excel(reader, "test1", index_col=0)
        tm.assert_frame_equal(expected, recons) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:18,代码来源:test_excel.py

示例8: test_to_excel_interval_labels

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def test_to_excel_interval_labels(self, *_):
        # see gh-19242
        #
        # Test writing Interval with labels.
        frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)),
                          dtype=np.int64)
        expected = frame.copy()
        intervals = pd.cut(frame[0], 10, labels=["A", "B", "C", "D", "E",
                                                 "F", "G", "H", "I", "J"])
        frame["new"] = intervals
        expected["new"] = pd.Series(list(intervals))

        frame.to_excel(self.path, "test1")
        reader = ExcelFile(self.path)

        recons = read_excel(reader, "test1", index_col=0)
        tm.assert_frame_equal(expected, recons) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:19,代码来源:test_excel.py

示例9: generate_final_dataset

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def generate_final_dataset(self):
        if self.sign == False:
            shift_var = 1
            self.bucket = True
        else:
            shift_var = -1
            self.bucket = False

        self.woe_summary[self.column + "_shift"] = self.woe_summary[self.column].shift(shift_var)

        if self.sign == False:
            self.woe_summary.loc[0, self.column + "_shift"] = -np.inf
            self.bins = np.sort(list(self.woe_summary[self.column]) + [np.Inf,-np.Inf])
        else:
            self.woe_summary.loc[len(self.woe_summary) - 1, self.column + "_shift"] = np.inf
            self.bins = np.sort(list(self.woe_summary[self.column]) + [np.Inf,-np.Inf])

        self.woe_summary["labels"] = self.woe_summary.apply(self.generate_bin_labels, axis=1)

        self.dataset["bins"] = pd.cut(self.dataset[self.column], self.bins, right=self.bucket, precision=0)

        self.dataset["bins"] = self.dataset["bins"].astype(str)
        self.dataset['bins'] = self.dataset['bins'].map(lambda x: x.lstrip('[').rstrip(')')) 
开发者ID:jstephenj14,项目名称:Monotonic-WOE-Binning-Algorithm,代码行数:25,代码来源:monotonic_woe_binning.py

示例10: __call__

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def __call__(self, inp):
        test_series = build_series(inp).value_counts(normalize=self.normalize)
        if self._bins is not None:
            from .cut import cut

            # cut
            try:
                inp = cut(inp, self._bins, include_lowest=True)
            except TypeError:  # pragma: no cover
                raise TypeError("bins argument only works with numeric data.")

            self._bins = None
            self._convert_index_to_interval = True
            return self.new_series([inp], shape=(np.nan,),
                                   index_value=parse_index(pd.CategoricalIndex([]),
                                                           inp, store_data=False),
                                   name=inp.name, dtype=test_series.dtype)
        else:
            return self.new_series([inp], shape=(np.nan,),
                                   index_value=parse_index(test_series.index, store_data=False),
                                   name=inp.name, dtype=test_series.dtype) 
开发者ID:mars-project,项目名称:mars,代码行数:23,代码来源:value_counts.py

示例11: execute

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def execute(cls, ctx, op: "DataFrameValueCounts"):
        if op.stage != OperandStage.map:
            if op.convert_index_to_interval:
                data = ctx[op.input.key]
                result = data.value_counts(
                    normalize=False, sort=op.sort, ascending=op.ascending,
                    bins=op.bins, dropna=op.dropna)
                if op.normalize:
                    result /= data.shape[0]
            else:
                result = ctx[op.input.key].value_counts(
                    normalize=op.normalize, sort=op.sort, ascending=op.ascending,
                    bins=op.bins, dropna=op.dropna)
        else:
            result = ctx[op.input.key]
        if op.convert_index_to_interval:
            # convert CategoricalDtype which generated in `cut`
            # to IntervalDtype
            result.index = result.index.astype('interval')
        ctx[op.outputs[0].key] = result 
开发者ID:mars-project,项目名称:mars,代码行数:22,代码来源:value_counts.py

示例12: execute

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def execute(cls, ctx, op):
        x = ctx[op.input.key]
        bins = ctx[op.bins.key] if isinstance(op.bins, (Base, Entity)) else op.bins
        labels = ctx[op.labels.key] if isinstance(op.labels, (Base, Entity)) else op.labels

        cut = partial(pd.cut, right=op.right, retbins=op.retbins, precision=op.precision,
                      include_lowest=op.include_lowest, duplicates=op.duplicates)
        try:
            ret = cut(x, bins, labels=labels)
        except ValueError:
            # fail due to buffer source array is read-only
            ret = cut(x.copy(), bins, labels=labels)
        if op.retbins:  # pragma: no cover
            ctx[op.outputs[0].key] = ret[0]
            ctx[op.outputs[1].key] = ret[1]
        else:
            ctx[op.outputs[0].key] = ret 
开发者ID:mars-project,项目名称:mars,代码行数:19,代码来源:cut.py

示例13: calibration_plot

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def calibration_plot(preds, truth):
    """Produces a calibration plot for the win probability model.

    Splits the predictions into percentiles and calculates the
    percentage of predictions per percentile that were wins. A perfectly
    calibrated model means that plays with a win probability of n%
    win about n% of the time.
    """
    cal_df = pd.DataFrame({'pred': preds, 'win': truth})
    cal_df['pred_bin'] = pd.cut(cal_df.pred, 100, labels=False)

    win_means = cal_df.groupby('pred_bin')['win'].mean()

    plt.figure()
    plt.plot(win_means.index.values,
             [100 * v for v in win_means.values], color='SteelBlue')
    plt.plot(np.arange(0, 100), np.arange(0, 100), 'k--', alpha=0.3)
    plt.xlim([0.0, 100])
    plt.ylim([0.0, 100])
    plt.xlabel('Estimated win probability')
    plt.ylabel('True win percentage')
    plt.title('Win probability calibration, binned by percent')
    plt.show()

    return 
开发者ID:TheUpshot,项目名称:4thdownbot-model,代码行数:27,代码来源:model_train.py

示例14: test_category_label

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def test_category_label(alltypes, df):
    t = alltypes
    d = t.double_col

    bins = [0, 10, 25, 50, 100]
    labels = ['a', 'b', 'c', 'd']
    bucket = d.bucket(bins)
    expr = bucket.label(labels)
    result = expr.execute()

    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
    result = pd.Series(pd.Categorical(result, ordered=True))

    result.name = 'double_col'

    expected = pd.cut(df.double_col, bins, labels=labels, right=False)

    tm.assert_series_equal(result, expected) 
开发者ID:ibis-project,项目名称:ibis,代码行数:21,代码来源:test_functions.py

示例15: setup_params

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import cut [as 别名]
def setup_params(self, data):
        params = self.params.copy()
        random_state = params['random_state']

        if params['maxwidth'] is None:
            params['maxwidth'] = resolution(data['x'], False) * 0.9

        if params['binwidth'] is None and self.params['bins'] is None:
            params['bins'] = 50

        if random_state is None:
            params['random_state'] = np.random
        elif isinstance(random_state, int):
            params['random_state'] = np.random.RandomState(random_state)

        # Required by compute_density
        params['kernel'] = 'gau'  # It has to be a gaussian kernel
        params['cut'] = 0
        params['gridsize'] = None
        params['clip'] = (-np.inf, np.inf)
        params['n'] = 512
        return params 
开发者ID:has2k1,项目名称:plotnine,代码行数:24,代码来源:stat_sina.py


注:本文中的pandas.cut方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。