本文整理汇总了Python中pandas.api.types.CategoricalDtype方法的典型用法代码示例。如果您正苦于以下问题:Python types.CategoricalDtype方法的具体用法?Python types.CategoricalDtype怎么用?Python types.CategoricalDtype使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas.api.types
的用法示例。
在下文中一共展示了types.CategoricalDtype方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: setup_method
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def setup_method(self, method):
self.df = DataFrame({'A': np.arange(6, dtype='int64'),
'B': Series(list('aabbca')).astype(
CDT(list('cab')))}).set_index('B')
self.df2 = DataFrame({'A': np.arange(6, dtype='int64'),
'B': Series(list('aabbca')).astype(
CDT(list('cabe')))}).set_index('B')
self.df3 = DataFrame({'A': np.arange(6, dtype='int64'),
'B': (Series([1, 1, 2, 1, 3, 2])
.astype(CDT([3, 2, 1], ordered=True)))
}).set_index('B')
self.df4 = DataFrame({'A': np.arange(6, dtype='int64'),
'B': (Series([1, 1, 2, 1, 3, 2])
.astype(CDT([3, 2, 1], ordered=False)))
}).set_index('B')
示例2: test_astype_categoricaldtype
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def test_astype_categoricaldtype(self):
s = Series(['a', 'b', 'a'])
result = s.astype(CategoricalDtype(['a', 'b'], ordered=True))
expected = Series(Categorical(['a', 'b', 'a'], ordered=True))
tm.assert_series_equal(result, expected)
result = s.astype(CategoricalDtype(['a', 'b'], ordered=False))
expected = Series(Categorical(['a', 'b', 'a'], ordered=False))
tm.assert_series_equal(result, expected)
result = s.astype(CategoricalDtype(['a', 'b', 'c'], ordered=False))
expected = Series(Categorical(['a', 'b', 'a'],
categories=['a', 'b', 'c'],
ordered=False))
tm.assert_series_equal(result, expected)
tm.assert_index_equal(result.cat.categories, Index(['a', 'b', 'c']))
示例3: test_constructor_categorical_dtype
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def test_constructor_categorical_dtype(self):
result = pd.Series(['a', 'b'],
dtype=CategoricalDtype(['a', 'b', 'c'],
ordered=True))
assert is_categorical_dtype(result) is True
tm.assert_index_equal(result.cat.categories, pd.Index(['a', 'b', 'c']))
assert result.cat.ordered
result = pd.Series(['a', 'b'], dtype=CategoricalDtype(['b', 'a']))
assert is_categorical_dtype(result)
tm.assert_index_equal(result.cat.categories, pd.Index(['b', 'a']))
assert result.cat.ordered is False
# GH 19565 - Check broadcasting of scalar with Categorical dtype
result = Series('a', index=[0, 1],
dtype=CategoricalDtype(['a', 'b'], ordered=True))
expected = Series(['a', 'a'], index=[0, 1],
dtype=CategoricalDtype(['a', 'b'], ordered=True))
tm.assert_series_equal(result, expected, check_categorical=True)
示例4: all_stack
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def all_stack(fold=BUZZER_DEV_FOLD):
df_rnn = stack('output/buzzer/RNNBuzzer', 'RNN', fold)
df_mlp = stack('output/buzzer/MLPBuzzer', 'MLP', fold)
df_thr = stack('output/buzzer/ThresholdBuzzer', 'Threshold', fold)
df = df_rnn.append(df_mlp, ignore_index=True)
df = df.append(df_thr, ignore_index=True)
model_type = CategoricalDtype(
categories=['Threshold', 'MLP', 'RNN'])
df['Model'] = df['Model'].astype(model_type)
p = (
ggplot(df)
+ geom_area(aes(x='Position', y='Frequency', fill='Buzzing'))
+ facet_grid('~ Model')
+ theme_fs()
+ theme(aspect_ratio=1)
+ scale_fill_brewer(type='div', palette=7)
)
p.save('output/buzzer/{}_stack.pdf'.format(fold))
示例5: execute
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def execute(cls, ctx, op):
in_data = ctx[op.inputs[0].key]
if not isinstance(op.dtype_values, dict):
if op.category_cols is not None:
uniques = [ctx[c.key] for c in op.inputs[1:]]
dtype = dict((col, CategoricalDtype(unique_values)) for
col, unique_values in zip(op.category_cols, uniques))
ctx[op.outputs[0].key] = in_data.astype(dtype, errors=op.errors)
else:
ctx[op.outputs[0].key] = in_data.astype(op.dtype_values, errors=op.errors)
else:
selected_dtype = dict((k, v) for k, v in op.dtype_values.items()
if k in in_data.columns)
if op.category_cols is not None:
uniques = [ctx[c.key] for c in op.inputs[1:]]
for col, unique_values in zip(op.category_cols, uniques):
selected_dtype[col] = CategoricalDtype(unique_values)
ctx[op.outputs[0].key] = in_data.astype(selected_dtype, errors=op.errors)
示例6: __call__
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def __call__(self, df):
if isinstance(df, SERIES_TYPE):
empty_series = build_empty_series(df.dtype)
new_series = empty_series.astype(self.dtype_values, errors=self.errors)
if new_series.dtype != df.dtype:
dtype = CategoricalDtype() if isinstance(
new_series.dtype, CategoricalDtype) else new_series.dtype
else: # pragma: no cover
dtype = df.dtype
return self.new_series([df], shape=df.shape, dtype=dtype,
name=df.name, index_value=df.index_value)
else:
empty_df = build_empty_df(df.dtypes)
new_df = empty_df.astype(self.dtype_values, errors=self.errors)
dtypes = []
for dt, new_dt in zip(df.dtypes, new_df.dtypes):
if new_dt != dt and isinstance(new_dt, CategoricalDtype):
dtypes.append(CategoricalDtype())
else:
dtypes.append(new_dt)
dtypes = pd.Series(dtypes, index=new_df.dtypes.index)
return self.new_dataframe([df], shape=df.shape, dtypes=dtypes,
index_value=df.index_value,
columns_value=df.columns_value)
示例7: test_datetime_bin
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def test_datetime_bin(self):
data = [np.datetime64('2012-12-13'), np.datetime64('2012-12-15')]
bin_data = ['2012-12-12', '2012-12-14', '2012-12-16']
expected = (
Series(IntervalIndex([
Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])),
Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2]))]))
.astype(CDT(ordered=True)))
for conv in [Timestamp, Timestamp, np.datetime64]:
bins = [conv(v) for v in bin_data]
result = cut(data, bins=bins)
tm.assert_series_equal(Series(result), expected)
bin_pydatetime = [Timestamp(v).to_pydatetime() for v in bin_data]
result = cut(data, bins=bin_pydatetime)
tm.assert_series_equal(Series(result), expected)
bins = to_datetime(bin_data)
result = cut(data, bins=bin_pydatetime)
tm.assert_series_equal(Series(result), expected)
示例8: test_merging_with_bool_or_int_cateorical_column
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def test_merging_with_bool_or_int_cateorical_column(self, category_column,
categories,
expected_categories,
ordered):
# GH 17187
# merging with a boolean/int categorical column
df1 = pd.DataFrame({'id': [1, 2, 3, 4],
'cat': category_column})
df1['cat'] = df1['cat'].astype(CDT(categories, ordered=ordered))
df2 = pd.DataFrame({'id': [2, 4], 'num': [1, 9]})
result = df1.merge(df2)
expected = pd.DataFrame({'id': [2, 4], 'cat': expected_categories,
'num': [1, 9]})
expected['cat'] = expected['cat'].astype(
CDT(categories, ordered=ordered))
assert_frame_equal(expected, result)
示例9: _set_pattern_columns
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def _set_pattern_columns(self, path_column):
"""Get a column of values for each field in pattern
"""
try:
# CategoricalDtype allows specifying known categories when
# creating objects. It was added in pandas 0.21.0.
from pandas.api.types import CategoricalDtype
_HAS_CDT = True
except ImportError:
_HAS_CDT = False
col = self._dataframe[path_column]
paths = col.cat.categories
column_by_field = {field:
col.cat.codes.map(dict(enumerate(values))).astype(
"category" if not _HAS_CDT else CategoricalDtype(set(values))
) for field, values in reverse_formats(self.pattern, paths).items()
}
self._dataframe = self._dataframe.assign(**column_by_field)
示例10: test_join_cat2
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def test_join_cat2(self):
# test setting NaN in categorical array
def test_impl():
ct_dtype = CategoricalDtype(['A', 'B', 'C'])
dtypes = {'C1': np.int, 'C2': ct_dtype, 'C3': str}
df1 = pd.read_csv("csv_data_cat1.csv",
names=['C1', 'C2', 'C3'],
dtype=dtypes,
)
n = len(df1)
df2 = pd.DataFrame({'C1': 2 * np.arange(n) + 1, 'AAA': n + np.arange(n) + 1.0})
df3 = df1.merge(df2, on='C1', how='right')
return df3
hpat_func = self.jit(test_impl)
pd.testing.assert_frame_equal(
hpat_func().sort_values('C1').reset_index(drop=True),
test_impl().sort_values('C1').reset_index(drop=True))
示例11: test_join_cat_parallel1
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def test_join_cat_parallel1(self):
# TODO: cat as keys
def test_impl():
ct_dtype = CategoricalDtype(['A', 'B', 'C'])
dtypes = {'C1': np.int, 'C2': ct_dtype, 'C3': str}
df1 = pd.read_csv("csv_data_cat1.csv",
names=['C1', 'C2', 'C3'],
dtype=dtypes,
)
n = len(df1)
df2 = pd.DataFrame({'C1': 2 * np.arange(n) + 1, 'AAA': n + np.arange(n) + 1.0})
df3 = df1.merge(df2, on='C1')
return df3
hpat_func = self.jit(distributed=['df3'])(test_impl)
# TODO: check results
self.assertTrue((hpat_func().columns == test_impl().columns).all())
示例12: test_qcut_return_intervals
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def test_qcut_return_intervals():
ser = Series([0, 1, 2, 3, 4, 5, 6, 7, 8])
res = qcut(ser, [0, 0.333, 0.666, 1])
exp_levels = np.array([Interval(-0.001, 2.664),
Interval(2.664, 5.328), Interval(5.328, 8)])
exp = Series(exp_levels.take([0, 0, 0, 1, 1, 1, 2, 2, 2])).astype(
CDT(ordered=True))
tm.assert_series_equal(res, exp)
示例13: test_single_quantile
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def test_single_quantile(data, start, end, length, labels):
# see gh-15431
ser = Series([data] * length)
result = qcut(ser, 1, labels=labels)
if labels is None:
intervals = IntervalIndex([Interval(start, end)] *
length, closed="right")
expected = Series(intervals).astype(CDT(ordered=True))
else:
expected = Series([0] * length)
tm.assert_series_equal(result, expected)
示例14: test_datetime_tz_qcut
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def test_datetime_tz_qcut(bins):
# see gh-19872
tz = "US/Eastern"
ser = Series(date_range("20130101", periods=3, tz=tz))
result = qcut(ser, bins)
expected = Series(IntervalIndex([
Interval(Timestamp("2012-12-31 23:59:59.999999999", tz=tz),
Timestamp("2013-01-01 16:00:00", tz=tz)),
Interval(Timestamp("2013-01-01 16:00:00", tz=tz),
Timestamp("2013-01-02 08:00:00", tz=tz)),
Interval(Timestamp("2013-01-02 08:00:00", tz=tz),
Timestamp("2013-01-03 00:00:00", tz=tz))])).astype(
CDT(ordered=True))
tm.assert_series_equal(result, expected)
示例15: left
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import CategoricalDtype [as 别名]
def left():
np.random.seed(1234)
return DataFrame(
{'X': Series(np.random.choice(
['foo', 'bar'],
size=(10,))).astype(CDT(['foo', 'bar'])),
'Y': np.random.choice(['one', 'two', 'three'], size=(10,))})