本文整理匯總了Python中pandas.api.types.is_categorical_dtype方法的典型用法代碼示例。如果您正苦於以下問題:Python types.is_categorical_dtype方法的具體用法?Python types.is_categorical_dtype怎麽用?Python types.is_categorical_dtype使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pandas.api.types
的用法示例。
在下文中一共展示了types.is_categorical_dtype方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: write_series
# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def write_series(group, key, series, dataset_kwargs=MappingProxyType({})):
# group here is an h5py type, otherwise categoricals won’t write
if series.dtype == object: # Assuming it’s string
group.create_dataset(
key,
data=series.values,
dtype=h5py.special_dtype(vlen=str),
**dataset_kwargs,
)
elif is_categorical_dtype(series):
# This should work for categorical Index and Series
categorical: pd.Categorical = series.values
categories: np.ndarray = categorical.categories.values
codes: np.ndarray = categorical.codes
category_key = f"__categories/{key}"
write_array(group, category_key, categories, dataset_kwargs=dataset_kwargs)
write_array(group, key, codes, dataset_kwargs=dataset_kwargs)
group[key].attrs["categories"] = group[category_key].ref
group[category_key].attrs["ordered"] = categorical.ordered
else:
group[key] = series.values
示例2: write_series
# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def write_series(group, key, series, dataset_kwargs=MappingProxyType({})):
if series.dtype == object:
group.create_dataset(
key,
shape=series.shape,
dtype=object,
object_codec=numcodecs.VLenUTF8(),
**dataset_kwargs,
)
group[key][:] = series.values
elif is_categorical_dtype(series):
# This should work for categorical Index and Series
categorical: pd.Categorical = series.values
categories: np.ndarray = categorical.categories.values
codes: np.ndarray = categorical.codes
category_key = f"__categories/{key}"
write_array(group, category_key, categories, dataset_kwargs=dataset_kwargs)
write_array(group, key, codes, dataset_kwargs=dataset_kwargs)
group[key].attrs["categories"] = category_key
# Must coerce np.bool_ to bool for json writing
group[category_key].attrs["ordered"] = bool(categorical.ordered)
else:
group[key] = series.values
示例3: _fit
# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def _fit(self, X: DataFrameType):
if self.columns is None:
columns = X.select_dtypes(include=["object", "category"]).columns
else:
columns = self.columns
categories = {}
for name in columns:
col = X[name]
if not is_categorical_dtype(col):
# This shouldn't ever be hit on a dask.array, since
# the object columns would have been converted to known cats
# already
col = pd.Series(col, index=X.index).astype("category")
if _HAS_CTD:
categories[name] = col.dtype
else:
categories[name] = (col.cat.categories, col.cat.ordered)
return columns, categories
示例4: _add_group_columns
# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def _add_group_columns(data, gdf):
"""
Add group columns to data with a value from the grouped dataframe
It is assumed that the grouped dataframe contains a single group
>>> data = pd.DataFrame({
... 'x': [5, 6, 7]})
>>> gdf = GroupedDataFrame({
... 'g': list('aaa'),
... 'x': range(3)}, groups=['g'])
>>> _add_group_columns(data, gdf)
g x
0 a 5
1 a 6
2 a 7
"""
n = len(data)
if isinstance(gdf, GroupedDataFrame):
for i, col in enumerate(gdf.plydata_groups):
if col not in data:
group_values = [gdf[col].iloc[0]] * n
# Need to be careful and maintain the dtypes
# of the group columns
if pdtypes.is_categorical_dtype(gdf[col]):
col_values = pd.Categorical(
group_values,
categories=gdf[col].cat.categories,
ordered=gdf[col].cat.ordered
)
else:
col_values = pd.Series(
group_values,
index=data.index,
dtype=gdf[col].dtype
)
# Group columns come first
data.insert(i, col, col_values)
return data
示例5: test_summarize
# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def test_summarize():
df = pd.DataFrame({'x': [1, 5, 2, 2, 4, 0, 4],
'y': [1, 2, 3, 4, 5, 6, 5],
'z': [1, 3, 3, 4, 5, 5, 5]})
result = df >> summarize('np.sum(x)', max='np.max(x)')
assert result.loc[0, 'max'] == np.max(df['x'])
assert result.loc[0, 'np.sum(x)'] == np.sum(df['x'])
result = df >> group_by('y', 'z') >> summarize(mean_x='np.mean(x)')
assert 'y' in result
assert 'z' in result
assert all(result['mean_x'] == [1, 5, 2, 2, 4, 0])
# (Name, Expression) tuples
result = df >> summarize(('sum', 'np.sum(x)'), ('max', 'np.max(x)'))
assert 'sum' in result
assert 'max' in result
# Branches
result = df >> group_by('y') >> summarize('np.sum(z)', constant=1)
assert 'y' in result
assert result.loc[0, 'constant'] == 1
# Category stays category
df1 = df.copy()
df1['z'] = pd.Categorical(df1['z'])
result = df1 >> group_by('y', 'z') >> summarize(mean_x='np.mean(x)')
assert result['y'].dtype == np.int
assert pdtypes.is_categorical_dtype(result['z'])
示例6: test_group_by_all
# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def test_group_by_all():
df = pd.DataFrame({
'alpha': list('aaabbb'),
'beta': list('babruq'),
'theta': list('cdecde'),
'x': [1, 2, 3, 4, 5, 6],
'y': [6, 5, 4, 3, 2, 1],
'z': [7, 9, 11, 8, 10, 12]
})
result = df >> group_by_all()
assert len(df.columns) == len(result.columns)
assert len(df.columns) == len(result.plydata_groups)
result = df >> group_by_all(pd.Categorical)
assert len(df.columns) == len(result.columns)
assert len(df.columns) == len(result.plydata_groups)
result = df >> group_by_all(dict(cat=pd.Categorical))
assert len(df.columns)*2 == len(result.columns)
for col in df.columns:
col_cat = '{}_cat'.format(col)
assert not pdtypes.is_categorical_dtype(result[col])
assert pdtypes.is_categorical_dtype(result[col_cat])
result = (df
>> group_by('x')
>> group_by_all(dict(cat=pd.Categorical)))
assert result.plydata_groups == [
'{}_cat'.format(col) for col in df.columns if col != 'x']
assert len(df.columns)*2-1 == len(result.columns)
assert 'x_cat' not in result
示例7: get_errors
# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def get_errors(self, series: pd.Series, column: 'column.Column'):
errors = []
# Calculate which columns are valid using the child class's validate function, skipping empty entries if the
# column specifies to do so
simple_validation = ~self.validate(series)
if column.allow_empty:
# Failing results are those that are not empty, and fail the validation
# explicitly check to make sure the series isn't a category because issubdtype will FAIL if it is
if is_categorical_dtype(series) or is_numeric_dtype(series):
validated = ~series.isnull() & simple_validation
else:
validated = (series.str.len() > 0) & simple_validation
else:
validated = simple_validation
# Cut down the original series to only ones that failed the validation
indices = series.index[validated]
# Use these indices to find the failing items. Also print the index which is probably a row number
for i in indices:
element = series[i]
errors.append(ValidationWarning(
message=self.message,
value=element,
row=i,
column=series.name
))
return errors
示例8: _id_var
# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def _id_var(x, drop=False):
"""
Assign ids to items in x. If two items
are the same, they get the same id.
Parameters
----------
x : array-like
items to associate ids with
drop : bool
Whether to drop unused factor levels
"""
if len(x) == 0:
return []
categorical = pdtypes.is_categorical_dtype(x)
if categorical:
if drop:
x = x.cat.remove_unused_categories()
lst = list(x.cat.codes + 1)
else:
has_nan = any(np.isnan(i) for i in x if isinstance(i, float))
if has_nan:
# NaNs are -1, we give them the highest code
nan_code = -1
new_nan_code = np.max(x.cat.codes) + 1
lst = [val if val != nan_code else new_nan_code for val in x]
else:
lst = list(x.cat.codes + 1)
else:
try:
levels = np.sort(np.unique(x))
except TypeError:
# x probably has NANs
levels = multitype_sort(set(x))
lst = match(x, levels)
lst = [item + 1 for item in lst]
return lst
示例9: get_var_type
# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def get_var_type(col):
"""
Return var_type (for KDEMultivariate) of the column
Parameters
----------
col : pandas.Series
A dataframe column.
Returns
-------
out : str
One of ['c', 'o', 'u'].
See Also
--------
The origin of the character codes is
:class:`statsmodels.nonparametric.kernel_density.KDEMultivariate`.
"""
if pdtypes.is_numeric_dtype(col):
# continuous
return 'c'
elif pdtypes.is_categorical_dtype(col):
# ordered or unordered
return 'o' if col.cat.ordered else 'u'
else:
# unordered if unsure, e.g string columns that
# are not categorical
return 'u'
示例10: fix_known_differences
# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def fix_known_differences(orig, result):
"""
Helper function for reducing anndata's to only the elements we expect to be
equivalent after concatenation.
Only for the case where orig is the ground truth result of what concatenation should be.
"""
orig = orig.copy()
result = result.copy()
result.obs.drop(columns=["batch"], inplace=True)
result.strings_to_categoricals() # Should this be implicit in concatenation?
# TODO
# * merge varm, varp similar to uns
# * merge obsp, but some information should be lost
del orig.varm
del orig.varp
del orig.obsp # TODO
# Possibly need to fix this, ordered categoricals lose orderedness
for k, dtype in orig.obs.dtypes.items():
if is_categorical_dtype(dtype) and dtype.ordered:
result.obs[k] = result.obs[k].astype(dtype)
return orig, result
示例11: describe
# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def describe(data):
'''
對每個變量生成統計指標特征
對於每一個變量,生成如下字段:
數據類型:
最大值/頻數最大的那個:
最小值/頻數最小的那個:
均值/頻數中間的那個:
缺失率:
範圍/唯一數:
'''
data=pd.DataFrame(data)
n_sample=len(data)
var_type=type_of_var(data,copy=True)
summary=pd.DataFrame(columns=data.columns,index=['dtype','max','min','mean','missing_pct','std/nuniue'])
for c in data.columns:
missing_pct=1-data[c].count()/n_sample
if var_type[c] == 'number':
max_value,min_value,mean_value=data[c].max(),data[c].min(),data[c].mean()
std_value=data[c].std()
summary.loc[:,c]=[var_type[c],max_value,min_value,mean_value,missing_pct,std_value]
elif var_type[c] == 'category' or is_categorical_dtype(data[c].dtype):
tmp=data[c].value_counts()
max_value,min_value=tmp.argmax(),tmp.argmin()
mean_value_index=tmp[tmp==tmp.median()].index
mean_value=mean_value_index[0] if len(mean_value_index)>0 else np.nan
summary.loc[:,c]=[var_type[c],max_value,min_value,mean_value,missing_pct,len(tmp)]
elif var_type[c] == 'datetime':
max_value,min_value=data[c].max(),data[c].min()
summary.loc[:,c]=[var_type[c],max_value,min_value,np.nan,missing_pct,np.nan]
else:
summary.loc[:,c]=[var_type[c],np.nan,np.nan,np.nan,missing_pct,np.nan]
return summary
示例12: fit
# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def fit(
self, X: DataFrameType, y: Optional[Union[ArrayLike, SeriesType]] = None
) -> "OrdinalEncoder":
"""Determine the categorical columns to be encoded.
Parameters
----------
X : pandas.DataFrame or dask.dataframe.DataFrame
y : ignored
Returns
-------
self
"""
self.columns_ = X.columns
columns = self.columns
if columns is None:
columns = X.select_dtypes(include=["category"]).columns
else:
for column in columns:
assert is_categorical_dtype(X[column]), "Must be categorical"
self.categorical_columns_ = columns
self.non_categorical_columns_ = X.columns.drop(self.categorical_columns_)
if _HAS_CTD:
self.dtypes_ = {col: X[col].dtype for col in self.categorical_columns_}
else:
self.dtypes_ = {
col: (X[col].cat.categories, X[col].cat.ordered)
for col in self.categorical_columns_
}
return self
示例13: test_ce
# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def test_ce(self):
ce = dpp.Categorizer()
original = raw.copy()
trn = ce.fit_transform(raw)
assert is_categorical_dtype(trn["A"])
assert is_categorical_dtype(trn["B"])
assert is_categorical_dtype(trn["C"])
assert trn["D"].dtype == np.dtype("int64")
tm.assert_index_equal(ce.columns_, pd.Index(["A", "B", "C"]))
tm.assert_frame_equal(raw, original)
示例14: test_dask
# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def test_dask(self):
a = dd.from_pandas(raw, npartitions=2)
ce = dpp.Categorizer()
trn = ce.fit_transform(a)
assert is_categorical_dtype(trn["A"])
assert is_categorical_dtype(trn["B"])
assert is_categorical_dtype(trn["C"])
assert trn["D"].dtype == np.dtype("int64")
tm.assert_index_equal(ce.columns_, pd.Index(["A", "B", "C"]))
示例15: test_upload_pandas_categorical_ipc
# 需要導入模塊: from pandas.api import types [as 別名]
# 或者: from pandas.api.types import is_categorical_dtype [as 別名]
def test_upload_pandas_categorical_ipc(self, con):
con.execute("DROP TABLE IF EXISTS test_categorical;")
df = pd.DataFrame({"A": ["a", "b", "c", "a"]})
df["B"] = df["A"].astype('category')
# test that table created correctly when it doesn't exist on server
con.load_table("test_categorical", df)
ans = con.execute("select * from test_categorical").fetchall()
assert ans == [('a', 'a'), ('b', 'b'), ('c', 'c'), ('a', 'a')]
assert con.get_table_details("test_categorical") == [
ColumnDetails(
name='A',
type='STR',
nullable=True,
precision=0,
scale=0,
comp_param=32,
encoding='DICT',
is_array=False,
),
ColumnDetails(
name='B',
type='STR',
nullable=True,
precision=0,
scale=0,
comp_param=32,
encoding='DICT',
is_array=False,
),
]
# load row-wise
con.load_table("test_categorical", df, method="rows")
# load columnar
con.load_table("test_categorical", df, method="columnar")
# load arrow
con.load_table("test_categorical", df, method="arrow")
# test end result
df_ipc = con.select_ipc("select * from test_categorical")
assert df_ipc.shape == (16, 2)
res = df.append([df, df, df]).reset_index(drop=True)
res["A"] = res["A"].astype('category')
res["B"] = res["B"].astype('category')
assert pd.DataFrame.equals(df_ipc, res)
# test that input df wasn't mutated
# original input is object, categorical
# to load via Arrow, converted internally to object, object
assert is_object_dtype(df["A"])
assert is_categorical_dtype(df["B"])
con.execute("DROP TABLE IF EXISTS test_categorical;")