当前位置: 首页>>代码示例>>Python>>正文


Python types.CategoricalDtype方法代码示例

本文整理汇总了Python中pandas.api.types.CategoricalDtype方法的典型用法代码示例。如果您正苦于以下问题:Python types.CategoricalDtype方法的具体用法?Python types.CategoricalDtype怎么用?Python types.CategoricalDtype使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pandas.api.types的用法示例。


在下文中一共展示了types.CategoricalDtype方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: setup_method

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def setup_method(self, method):

        self.df = DataFrame({'A': np.arange(6, dtype='int64'),
                             'B': Series(list('aabbca')).astype(
                                 CDT(list('cab')))}).set_index('B')
        self.df2 = DataFrame({'A': np.arange(6, dtype='int64'),
                              'B': Series(list('aabbca')).astype(
                                  CDT(list('cabe')))}).set_index('B')
        self.df3 = DataFrame({'A': np.arange(6, dtype='int64'),
                              'B': (Series([1, 1, 2, 1, 3, 2])
                                    .astype(CDT([3, 2, 1], ordered=True)))
                              }).set_index('B')
        self.df4 = DataFrame({'A': np.arange(6, dtype='int64'),
                              'B': (Series([1, 1, 2, 1, 3, 2])
                                    .astype(CDT([3, 2, 1], ordered=False)))
                              }).set_index('B') 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:18,代码来源:test_categorical.py

示例2: test_astype_categoricaldtype

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def test_astype_categoricaldtype(self):
        s = Series(['a', 'b', 'a'])
        result = s.astype(CategoricalDtype(['a', 'b'], ordered=True))
        expected = Series(Categorical(['a', 'b', 'a'], ordered=True))
        tm.assert_series_equal(result, expected)

        result = s.astype(CategoricalDtype(['a', 'b'], ordered=False))
        expected = Series(Categorical(['a', 'b', 'a'], ordered=False))
        tm.assert_series_equal(result, expected)

        result = s.astype(CategoricalDtype(['a', 'b', 'c'], ordered=False))
        expected = Series(Categorical(['a', 'b', 'a'],
                                      categories=['a', 'b', 'c'],
                                      ordered=False))
        tm.assert_series_equal(result, expected)
        tm.assert_index_equal(result.cat.categories, Index(['a', 'b', 'c'])) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:18,代码来源:test_dtypes.py

示例3: test_constructor_categorical_dtype

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def test_constructor_categorical_dtype(self):
        result = pd.Series(['a', 'b'],
                           dtype=CategoricalDtype(['a', 'b', 'c'],
                                                  ordered=True))
        assert is_categorical_dtype(result) is True
        tm.assert_index_equal(result.cat.categories, pd.Index(['a', 'b', 'c']))
        assert result.cat.ordered

        result = pd.Series(['a', 'b'], dtype=CategoricalDtype(['b', 'a']))
        assert is_categorical_dtype(result)
        tm.assert_index_equal(result.cat.categories, pd.Index(['b', 'a']))
        assert result.cat.ordered is False

        # GH 19565 - Check broadcasting of scalar with Categorical dtype
        result = Series('a', index=[0, 1],
                        dtype=CategoricalDtype(['a', 'b'], ordered=True))
        expected = Series(['a', 'a'], index=[0, 1],
                          dtype=CategoricalDtype(['a', 'b'], ordered=True))
        tm.assert_series_equal(result, expected, check_categorical=True) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:21,代码来源:test_constructors.py

示例4: all_stack

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def all_stack(fold=BUZZER_DEV_FOLD):
    df_rnn = stack('output/buzzer/RNNBuzzer', 'RNN', fold)
    df_mlp = stack('output/buzzer/MLPBuzzer', 'MLP', fold)
    df_thr = stack('output/buzzer/ThresholdBuzzer', 'Threshold', fold)
    df = df_rnn.append(df_mlp, ignore_index=True)
    df = df.append(df_thr, ignore_index=True)
    model_type = CategoricalDtype(
        categories=['Threshold', 'MLP', 'RNN'])
    df['Model'] = df['Model'].astype(model_type)
    p = (
        ggplot(df)
        + geom_area(aes(x='Position', y='Frequency', fill='Buzzing'))
        + facet_grid('~ Model')
        + theme_fs()
        + theme(aspect_ratio=1)
        + scale_fill_brewer(type='div', palette=7)
    )
    p.save('output/buzzer/{}_stack.pdf'.format(fold)) 
开发者ID:Pinafore,项目名称:qb,代码行数:20,代码来源:plot.py

示例5: execute

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def execute(cls, ctx, op):
        in_data = ctx[op.inputs[0].key]
        if not isinstance(op.dtype_values, dict):
            if op.category_cols is not None:
                uniques = [ctx[c.key] for c in op.inputs[1:]]
                dtype = dict((col, CategoricalDtype(unique_values)) for
                             col, unique_values in zip(op.category_cols, uniques))
                ctx[op.outputs[0].key] = in_data.astype(dtype, errors=op.errors)

            else:
                ctx[op.outputs[0].key] = in_data.astype(op.dtype_values, errors=op.errors)
        else:
            selected_dtype = dict((k, v) for k, v in op.dtype_values.items()
                                  if k in in_data.columns)
            if op.category_cols is not None:
                uniques = [ctx[c.key] for c in op.inputs[1:]]
                for col, unique_values in zip(op.category_cols, uniques):
                    selected_dtype[col] = CategoricalDtype(unique_values)
            ctx[op.outputs[0].key] = in_data.astype(selected_dtype, errors=op.errors) 
开发者ID:mars-project,项目名称:mars,代码行数:21,代码来源:astype.py

示例6: __call__

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def __call__(self, df):
        if isinstance(df, SERIES_TYPE):
            empty_series = build_empty_series(df.dtype)
            new_series = empty_series.astype(self.dtype_values, errors=self.errors)
            if new_series.dtype != df.dtype:
                dtype = CategoricalDtype() if isinstance(
                    new_series.dtype, CategoricalDtype) else new_series.dtype
            else:  # pragma: no cover
                dtype = df.dtype
            return self.new_series([df], shape=df.shape, dtype=dtype,
                                   name=df.name, index_value=df.index_value)
        else:
            empty_df = build_empty_df(df.dtypes)
            new_df = empty_df.astype(self.dtype_values, errors=self.errors)
            dtypes = []
            for dt, new_dt in zip(df.dtypes, new_df.dtypes):
                if new_dt != dt and isinstance(new_dt, CategoricalDtype):
                    dtypes.append(CategoricalDtype())
                else:
                    dtypes.append(new_dt)
            dtypes = pd.Series(dtypes, index=new_df.dtypes.index)
            return self.new_dataframe([df], shape=df.shape, dtypes=dtypes,
                                      index_value=df.index_value,
                                      columns_value=df.columns_value) 
开发者ID:mars-project,项目名称:mars,代码行数:26,代码来源:astype.py

示例7: test_datetime_bin

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def test_datetime_bin(self):
        data = [np.datetime64('2012-12-13'), np.datetime64('2012-12-15')]
        bin_data = ['2012-12-12', '2012-12-14', '2012-12-16']
        expected = (
            Series(IntervalIndex([
                Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])),
                Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2]))]))
            .astype(CDT(ordered=True)))

        for conv in [Timestamp, Timestamp, np.datetime64]:
            bins = [conv(v) for v in bin_data]
            result = cut(data, bins=bins)
            tm.assert_series_equal(Series(result), expected)

        bin_pydatetime = [Timestamp(v).to_pydatetime() for v in bin_data]
        result = cut(data, bins=bin_pydatetime)
        tm.assert_series_equal(Series(result), expected)

        bins = to_datetime(bin_data)
        result = cut(data, bins=bin_pydatetime)
        tm.assert_series_equal(Series(result), expected) 
开发者ID:birforce,项目名称:vnpy_crypto,代码行数:23,代码来源:test_tile.py

示例8: test_merging_with_bool_or_int_cateorical_column

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def test_merging_with_bool_or_int_cateorical_column(self, category_column,
                                                        categories,
                                                        expected_categories,
                                                        ordered):
        # GH 17187
        # merging with a boolean/int categorical column
        df1 = pd.DataFrame({'id': [1, 2, 3, 4],
                            'cat': category_column})
        df1['cat'] = df1['cat'].astype(CDT(categories, ordered=ordered))
        df2 = pd.DataFrame({'id': [2, 4], 'num': [1, 9]})
        result = df1.merge(df2)
        expected = pd.DataFrame({'id': [2, 4], 'cat': expected_categories,
                                 'num': [1, 9]})
        expected['cat'] = expected['cat'].astype(
            CDT(categories, ordered=ordered))
        assert_frame_equal(expected, result) 
开发者ID:birforce,项目名称:vnpy_crypto,代码行数:18,代码来源:test_merge.py

示例9: _set_pattern_columns

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def _set_pattern_columns(self, path_column):
        """Get a column of values for each field in pattern
        """
        try:
            # CategoricalDtype allows specifying known categories when
            # creating objects. It was added in pandas 0.21.0.
            from pandas.api.types import CategoricalDtype
            _HAS_CDT = True
        except ImportError:
            _HAS_CDT = False

        col = self._dataframe[path_column]
        paths = col.cat.categories

        column_by_field = {field:
            col.cat.codes.map(dict(enumerate(values))).astype(
                "category" if not _HAS_CDT else CategoricalDtype(set(values))
            ) for field, values in reverse_formats(self.pattern, paths).items()
        }
        self._dataframe = self._dataframe.assign(**column_by_field) 
开发者ID:intake,项目名称:intake,代码行数:22,代码来源:csv.py

示例10: test_join_cat2

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def test_join_cat2(self):
        # test setting NaN in categorical array
        def test_impl():
            ct_dtype = CategoricalDtype(['A', 'B', 'C'])
            dtypes = {'C1': np.int, 'C2': ct_dtype, 'C3': str}
            df1 = pd.read_csv("csv_data_cat1.csv",
                              names=['C1', 'C2', 'C3'],
                              dtype=dtypes,
                              )
            n = len(df1)
            df2 = pd.DataFrame({'C1': 2 * np.arange(n) + 1, 'AAA': n + np.arange(n) + 1.0})
            df3 = df1.merge(df2, on='C1', how='right')
            return df3

        hpat_func = self.jit(test_impl)
        pd.testing.assert_frame_equal(
            hpat_func().sort_values('C1').reset_index(drop=True),
            test_impl().sort_values('C1').reset_index(drop=True)) 
开发者ID:IntelPython,项目名称:sdc,代码行数:20,代码来源:test_join.py

示例11: test_join_cat_parallel1

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def test_join_cat_parallel1(self):
        # TODO: cat as keys
        def test_impl():
            ct_dtype = CategoricalDtype(['A', 'B', 'C'])
            dtypes = {'C1': np.int, 'C2': ct_dtype, 'C3': str}
            df1 = pd.read_csv("csv_data_cat1.csv",
                              names=['C1', 'C2', 'C3'],
                              dtype=dtypes,
                              )
            n = len(df1)
            df2 = pd.DataFrame({'C1': 2 * np.arange(n) + 1, 'AAA': n + np.arange(n) + 1.0})
            df3 = df1.merge(df2, on='C1')
            return df3

        hpat_func = self.jit(distributed=['df3'])(test_impl)
        # TODO: check results
        self.assertTrue((hpat_func().columns == test_impl().columns).all()) 
开发者ID:IntelPython,项目名称:sdc,代码行数:19,代码来源:test_join.py

示例12: test_qcut_return_intervals

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def test_qcut_return_intervals():
    ser = Series([0, 1, 2, 3, 4, 5, 6, 7, 8])
    res = qcut(ser, [0, 0.333, 0.666, 1])

    exp_levels = np.array([Interval(-0.001, 2.664),
                           Interval(2.664, 5.328), Interval(5.328, 8)])
    exp = Series(exp_levels.take([0, 0, 0, 1, 1, 1, 2, 2, 2])).astype(
        CDT(ordered=True))
    tm.assert_series_equal(res, exp) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:11,代码来源:test_qcut.py

示例13: test_single_quantile

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def test_single_quantile(data, start, end, length, labels):
    # see gh-15431
    ser = Series([data] * length)
    result = qcut(ser, 1, labels=labels)

    if labels is None:
        intervals = IntervalIndex([Interval(start, end)] *
                                  length, closed="right")
        expected = Series(intervals).astype(CDT(ordered=True))
    else:
        expected = Series([0] * length)

    tm.assert_series_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:15,代码来源:test_qcut.py

示例14: test_datetime_tz_qcut

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def test_datetime_tz_qcut(bins):
    # see gh-19872
    tz = "US/Eastern"
    ser = Series(date_range("20130101", periods=3, tz=tz))

    result = qcut(ser, bins)
    expected = Series(IntervalIndex([
        Interval(Timestamp("2012-12-31 23:59:59.999999999", tz=tz),
                 Timestamp("2013-01-01 16:00:00", tz=tz)),
        Interval(Timestamp("2013-01-01 16:00:00", tz=tz),
                 Timestamp("2013-01-02 08:00:00", tz=tz)),
        Interval(Timestamp("2013-01-02 08:00:00", tz=tz),
                 Timestamp("2013-01-03 00:00:00", tz=tz))])).astype(
        CDT(ordered=True))
    tm.assert_series_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:17,代码来源:test_qcut.py

示例15: left

# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def left():
    np.random.seed(1234)
    return DataFrame(
        {'X': Series(np.random.choice(
            ['foo', 'bar'],
            size=(10,))).astype(CDT(['foo', 'bar'])),
         'Y': np.random.choice(['one', 'two', 'three'], size=(10,))}) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:9,代码来源:test_merge.py


注:本文中的pandas.api.types.CategoricalDtype方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。