本文整理汇总了Python中pandas.api.types.is_string_dtype方法的典型用法代码示例。如果您正苦于以下问题:Python types.is_string_dtype方法的具体用法?Python types.is_string_dtype怎么用?Python types.is_string_dtype使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas.api.types
的用法示例。
在下文中一共展示了types.is_string_dtype方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: generate_plotly_dim_dict
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_string_dtype [as 别名]
def generate_plotly_dim_dict(df, field):
dim_dict = {}
dim_dict["label"] = field
column = df[field]
if is_numeric_dtype(column):
dim_dict["values"] = column
elif is_string_dtype(column):
texts = column.unique()
dim_dict["values"] = [
np.argwhere(texts == x).flatten()[0] for x in column
]
dim_dict["tickvals"] = list(range(len(texts)))
dim_dict["ticktext"] = texts
else:
raise Exception("Unidentifiable Type")
return dim_dict
示例2: fit
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_string_dtype [as 别名]
def fit(self, X, y):
"""Fit the Imputer to the dataset and determine the right approach.
Args:
X (pd.Series): Dataset to fit the imputer, or predictors
y (pd.Series): None, or dataset to fit predictors
Returns:
self. Instance of the class.
"""
# start off with stats blank
stats = {"param": None, "strategy": None}
# if y is None, fitting simply X. univariate method.
if y is None:
if is_numeric_dtype(X):
stats = {"param": self.num_imputer.fit(X, y),
"strategy": self.num_imputer.strategy}
if is_string_dtype(X):
stats = {"param": self.cat_imputer.fit(X, y),
"strategy": self.cat_imputer.strategy}
# if y is not None, fitting X to y. predictive method.
if not y is None:
if is_numeric_dtype(y):
stats = {"param": self.num_imputer.fit(X, y),
"strategy": self.num_imputer.strategy}
if is_string_dtype(y):
stats = {"param": self.cat_imputer.fit(X, y),
"strategy": self.cat_imputer.strategy}
# return final stats
self.statistics_ = stats
return self
示例3: _not_cat_series
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_string_dtype [as 别名]
def _not_cat_series(m, s):
"""Private method to detect Series that are not categorical."""
if not is_string_dtype(s):
t = s.dtype
err = f"{m} not appropriate for Series {s.name} of type {t}."
raise TypeError(err)
示例4: strings_to_categoricals
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_string_dtype [as 别名]
def strings_to_categoricals(adata):
"""Transform string annotations to categoricals.
"""
from pandas.api.types import is_string_dtype, is_integer_dtype, is_bool_dtype
from pandas import Categorical
def is_valid_dtype(values):
return (
is_string_dtype(values) or is_integer_dtype(values) or is_bool_dtype(values)
)
df = adata.obs
df_keys = [key for key in df.columns if is_valid_dtype(df[key])]
for key in df_keys:
c = df[key]
c = Categorical(c)
if 1 < len(c.categories) < min(len(c), 100):
df[key] = c
df = adata.var
df_keys = [key for key in df.columns if is_string_dtype(df[key])]
for key in df_keys:
c = df[key]
c = Categorical(c)
if 1 < len(c.categories) < min(len(c), 100):
df[key] = c
示例5: _
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_string_dtype [as 别名]
def _(anno, length, index_names):
anno = anno.copy()
if not is_string_dtype(anno.index):
warnings.warn("Transforming to str index.", ImplicitModificationWarning)
anno.index = anno.index.astype(str)
return anno
示例6: strings_to_categoricals
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_string_dtype [as 别名]
def strings_to_categoricals(self, df: Optional[pd.DataFrame] = None):
"""\
Transform string annotations to categoricals.
Only affects string annotations that lead to less categories than the
total number of observations.
Params
------
df
If `df` is `None`, modifies both :attr:`obs` and :attr:`var`,
otherwise modifies `df` inplace.
Notes
-----
Turns the view of an :class:`~anndata.AnnData` into an actual
:class:`~anndata.AnnData`.
"""
dont_modify = False # only necessary for backed views
if df is None:
dfs = [self.obs, self.var]
if self.is_view and self.isbacked:
dont_modify = True
else:
dfs = [df]
for df in dfs:
string_cols = [
key
for key in df.columns
if is_string_dtype(df[key]) and not is_categorical(df[key])
]
for key in string_cols:
# make sure we only have strings
# (could be that there are np.nans (float), -666, "-666", for instance)
c = df[key].astype("U")
# make a categorical
c = pd.Categorical(c, categories=natsorted(np.unique(c)))
if len(c.categories) >= len(c):
continue
if dont_modify:
raise RuntimeError(
"Please call `.strings_to_categoricals()` on full "
"AnnData, not on this view. You might encounter this"
"error message while copying or writing to disk."
)
if self.is_view:
warnings.warn(
"Initializing view as actual.", ImplicitModificationWarning
)
# If `self` is a view, it will be actualized in the next line,
# therefore the previous warning
df[key] = c
logger.info(f"... storing {key!r} as categorical")
示例7: woepoints_ply1
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_string_dtype [as 别名]
def woepoints_ply1(dtx, binx, x_i, woe_points):
'''
Transform original values into woe or porints for one variable.
Params
------
Returns
------
'''
# woe_points: "woe" "points"
# binx = bins.loc[lambda x: x.variable == x_i]
# https://stackoverflow.com/questions/12680754/split-explode-pandas-dataframe-string-entry-to-separate-rows
binx = pd.merge(
binx[['bin']].assign(v1=binx['bin'].str.split('%,%')).explode('v1'),
binx[['bin', woe_points]],
how='left', on='bin'
).rename(columns={'v1':'V1',woe_points:'V2'})
# dtx
## cut numeric variable
if is_numeric_dtype(dtx[x_i]):
is_sv = pd.Series(not bool(re.search(r'\[', str(i))) for i in binx.V1)
binx_sv = binx.loc[is_sv]
binx_other = binx.loc[~is_sv]
# create bin column
breaks_binx_other = np.unique(list(map(float, ['-inf']+[re.match(r'.*\[(.*),.+\).*', str(i)).group(1) for i in binx_other['bin']]+['inf'])))
labels = ['[{},{})'.format(breaks_binx_other[i], breaks_binx_other[i+1]) for i in range(len(breaks_binx_other)-1)]
dtx = dtx.assign(xi_bin = lambda x: pd.cut(x[x_i], breaks_binx_other, right=False, labels=labels))\
.assign(xi_bin = lambda x: [i if (i != i) else str(i) for i in x['xi_bin']])
# dtx.loc[:,'xi_bin'] = pd.cut(dtx[x_i], breaks_binx_other, right=False, labels=labels)
# dtx.loc[:,'xi_bin'] = np.where(pd.isnull(dtx['xi_bin']), dtx['xi_bin'], dtx['xi_bin'].astype(str))
#
mask = dtx[x_i].isin(binx_sv['V1'])
dtx.loc[mask,'xi_bin'] = dtx.loc[mask, x_i].astype(str)
dtx = dtx[['xi_bin']].rename(columns={'xi_bin':x_i})
## to charcarter, na to missing
if not is_string_dtype(dtx[x_i]):
dtx.loc[:,x_i] = dtx.loc[:,x_i].astype(str).replace('nan', 'missing')
# dtx.loc[:,x_i] = np.where(pd.isnull(dtx[x_i]), dtx[x_i], dtx[x_i].astype(str))
dtx = dtx.replace(np.nan, 'missing').assign(rowid = dtx.index).sort_values('rowid')
# rename binx
binx.columns = ['bin', x_i, '_'.join([x_i,woe_points])]
# merge
dtx_suffix = pd.merge(dtx, binx, how='left', on=x_i).sort_values('rowid')\
.set_index(dtx.index)[['_'.join([x_i,woe_points])]]
return dtx_suffix