本文整理汇总了Python中pandas.api.types.is_numeric_dtype方法的典型用法代码示例。如果您正苦于以下问题:Python types.is_numeric_dtype方法的具体用法?Python types.is_numeric_dtype怎么用?Python types.is_numeric_dtype使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas.api.types
的用法示例。
在下文中一共展示了types.is_numeric_dtype方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: oob_dependences
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def oob_dependences(rf, X_train, n_samples=5000):
"""
Given a random forest model, rf, and training observation independent
variables in X_train (a dataframe), compute the OOB R^2 score using each var
as a dependent variable. We retrain rf for each var. Only numeric columns are considered.
By default, sample up to 5000 observations to compute feature dependencies.
:return: Return a DataFrame with Feature/Dependence values for each variable. Feature is the dataframe index.
"""
numcols = [col for col in X_train if is_numeric_dtype(X_train[col])]
X_train = sample_rows(X_train, n_samples)
df_dep = pd.DataFrame(columns=['Feature','Dependence'])
df_dep = df_dep.set_index('Feature')
for col in numcols:
X, y = X_train.drop(col, axis=1), X_train[col]
rf.fit(X, y)
df_dep.loc[col] = rf.oob_score_
df_dep = df_dep.sort_values('Dependence', ascending=False)
return df_dep
示例2: _get_columns_info
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def _get_columns_info(self, stats):
column_info = {}
column_info[self.TYPE_CONSTANT] = stats['uniques'][stats['uniques'] == 1].index
column_info[self.TYPE_BOOL] = stats['uniques'][stats['uniques'] == 2].index
rest_columns = self.get_columns(self.df,
self.EXCLUDE,
column_info['constant'].union(column_info['bool']))
column_info[self.TYPE_NUMERIC] = pd.Index([c for c in rest_columns
if types.is_numeric_dtype(self.df[c])])
rest_columns = self.get_columns(
self.df[rest_columns], self.EXCLUDE, column_info['numeric'])
column_info[self.TYPE_DATE] = pd.Index([c for c in rest_columns
if types.is_datetime64_dtype(self.df[c])])
rest_columns = self.get_columns(
self.df[rest_columns], self.EXCLUDE, column_info['date'])
unique_columns = stats['uniques'][rest_columns] == stats['counts'][rest_columns]
column_info[self.TYPE_UNIQUE] = stats['uniques'][rest_columns][unique_columns].index
column_info[self.TYPE_CATEGORICAL] = stats['uniques'][rest_columns][~unique_columns].index
return column_info
示例3: generate_plotly_dim_dict
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def generate_plotly_dim_dict(df, field):
dim_dict = {}
dim_dict["label"] = field
column = df[field]
if is_numeric_dtype(column):
dim_dict["values"] = column
elif is_string_dtype(column):
texts = column.unique()
dim_dict["values"] = [
np.argwhere(texts == x).flatten()[0] for x in column
]
dim_dict["tickvals"] = list(range(len(texts)))
dim_dict["ticktext"] = texts
else:
raise Exception("Unidentifiable Type")
return dim_dict
示例4: merger_data
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def merger_data(data, var, unique_num,is_merge_high=True):
if is_numeric_dtype(data[var]) and data[var].nunique() > unique_num:
data_miss = data[data[var] == -9999999]
data_nomiss = data[data[var] != -9999999]
merge_high_data = data_nomiss[var]
if is_merge_high:
merge_high_data = toad.utils.clip(data_nomiss[var], quantile=(None, .99))
data_index, bins = toad.merge(merge_high_data, method='step', return_splits=True, n_bins=unique_num)
temp = pd.DataFrame(data_index, columns=[var])
temp = temp.append(data_miss[[var]], ignore_index=True)[var]
target = data_nomiss.append(data_miss, ignore_index=True)['target']
return temp, target, bins
else:
return data[var], None, None
# 两两指标做透视表
示例5: check_cateCols_uniqueValues
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def check_cateCols_uniqueValues(dat, var_skip = None):
# character columns with too many unique values
char_cols = [i for i in list(dat) if not is_numeric_dtype(dat[i])]
if var_skip is not None:
char_cols = list(set(char_cols) - set(str_to_list(var_skip)))
char_cols_too_many_unique = [i for i in char_cols if len(dat[i].unique()) >= 50]
if len(char_cols_too_many_unique) > 0:
print('>>> There are {} variables have too many unique non-numberic values, which might cause the binning process slow. Please double check the following variables: \n{}'.format(len(char_cols_too_many_unique), ', '.join(char_cols_too_many_unique)))
print('>>> Continue the binning process?')
print('1: yes \n2: no')
cont = int(input("Selection: "))
while cont not in [1, 2]:
cont = int(input("Selection: "))
if cont == 2:
raise SystemExit(0)
return None
# replace blank by NA
#' @import data.table
#'
示例6: check_data_types
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def check_data_types(self, data_frame: pd.DataFrame) -> None:
"""
Checks whether a column contains string or numeric data
:param data_frame:
:return:
"""
self.numeric_columns = [c for c in self.input_columns if is_numeric_dtype(data_frame[c])]
self.string_columns = list(set(self.input_columns) - set(self.numeric_columns))
self.output_type = 'numeric' if is_numeric_dtype(data_frame[self.output_column]) else 'string'
logger.debug(
"Assuming {} numeric input columns: {}".format(len(self.numeric_columns),
", ".join(self.numeric_columns)))
logger.debug("Assuming {} string input columns: {}".format(len(self.string_columns),
", ".join(self.string_columns)))
示例7: prepare_data
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def prepare_data(self, candles_df, ohlc):
if isinstance(candles_df, pd.DataFrame):
if len(candles_df) >= self.required_count:
if ohlc and len(ohlc) == 4:
if not set(ohlc).issubset(candles_df.columns):
raise Exception('Provided columns does not exist in given data frame')
self.open_column = ohlc[0]
self.high_column = ohlc[1]
self.low_column = ohlc[2]
self.close_column = ohlc[3]
else:
raise Exception('Provide list of four elements indicating columns in strings. '
'Default: [open, high, low, close]')
self.data = candles_df.copy()
if not is_numeric_dtype(self.data[self.close_column]):
self.data[self.close_column] = pd.to_numeric(self.data[self.close_column])
if not is_numeric_dtype(self.data[self.open_column]):
self.data[self.open_column] = pd.to_numeric(self.data[self.open_column])
if not is_numeric_dtype(self.data[self.low_column]):
self.data[self.low_column] = pd.to_numeric(self.data[self.low_column])
if not is_numeric_dtype(self.data[self.high_column]):
self.data[self.high_column] = pd.to_numeric(candles_df[self.high_column])
self.is_data_prepared = True
else:
raise Exception('{0} requires at least {1} data'.format(self.name,
self.required_count))
else:
raise Exception('Candles must be in Panda data frame type')
示例8: feature_dependence_matrix
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def feature_dependence_matrix(rf, X_train, n_samples=5000):
"""
Given training observation independent variables in X_train (a dataframe),
compute the feature importance using each var as a dependent variable.
We retrain a random forest for each var as target using the others as
independent vars. Only numeric columns are considered.
By default, sample up to 5000 observations to compute feature dependencies.
:return: a non-symmetric data frame with the dependence matrix where each row is the importance of each var to the row's var used as a model target.
"""
numcols = [col for col in X_train if is_numeric_dtype(X_train[col])]
X_train = sample_rows(X_train, n_samples)
df_dep = pd.DataFrame(index=X_train.columns, columns=['Dependence']+X_train.columns.tolist())
for i in range(len(numcols)):
col = numcols[i]
X, y = X_train.drop(col, axis=1), X_train[col]
rf.fit(X,y)
#imp = rf.feature_importances_
imp = permutation_importances_raw(rf, X, y, oob_regression_r2_score, n_samples)
imp = np.insert(imp, i, 1.0)
df_dep.iloc[i] = np.insert(imp, 0, rf.oob_score_) # add overall dependence
return df_dep
示例9: is_numeric_dtype
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def is_numeric_dtype(arr_or_dtype):
# Crude implementation only suitable for array-like types
try:
tipo = arr_or_dtype.dtype.type
except AttributeError:
tipo = type(None)
return (issubclass(tipo, (np.number, np.bool_)) and
not issubclass(tipo, (np.datetime64, np.timedelta64)))
示例10: get_errors
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def get_errors(self, series: pd.Series, column: 'column.Column'):
errors = []
# Calculate which columns are valid using the child class's validate function, skipping empty entries if the
# column specifies to do so
simple_validation = ~self.validate(series)
if column.allow_empty:
# Failing results are those that are not empty, and fail the validation
# explicitly check to make sure the series isn't a category because issubdtype will FAIL if it is
if is_categorical_dtype(series) or is_numeric_dtype(series):
validated = ~series.isnull() & simple_validation
else:
validated = (series.str.len() > 0) & simple_validation
else:
validated = simple_validation
# Cut down the original series to only ones that failed the validation
indices = series.index[validated]
# Use these indices to find the failing items. Also print the index which is probably a row number
for i in indices:
element = series[i]
errors.append(ValidationWarning(
message=self.message,
value=element,
row=i,
column=series.name
))
return errors
示例11: fit
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def fit(self, X, y):
"""Fit the Imputer to the dataset and determine the right approach.
Args:
X (pd.Series): Dataset to fit the imputer, or predictors
y (pd.Series): None, or dataset to fit predictors
Returns:
self. Instance of the class.
"""
# start off with stats blank
stats = {"param": None, "strategy": None}
# if y is None, fitting simply X. univariate method.
if y is None:
if is_numeric_dtype(X):
stats = {"param": self.num_imputer.fit(X, y),
"strategy": self.num_imputer.strategy}
if is_string_dtype(X):
stats = {"param": self.cat_imputer.fit(X, y),
"strategy": self.cat_imputer.strategy}
# if y is not None, fitting X to y. predictive method.
if not y is None:
if is_numeric_dtype(y):
stats = {"param": self.num_imputer.fit(X, y),
"strategy": self.num_imputer.strategy}
if is_string_dtype(y):
stats = {"param": self.cat_imputer.fit(X, y),
"strategy": self.cat_imputer.strategy}
# return final stats
self.statistics_ = stats
return self
示例12: _not_num_series
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def _not_num_series(m, s):
"""Private method to detect columns of Matrix that are not categorical."""
if not is_numeric_dtype(s):
t = s.dtype
err = f"{m} not appropriate for Series {s.name} of type {t}."
raise TypeError(err)
示例13: _process_chart_y
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def _process_chart_y(self, data, x, y, single_y):
"""This should happen after _process_chart_x"""
y = y or self.y
if y is None:
ys = [c for c in data.columns if c not in [x]+self.by+self.groupby+self.grid]
if len(ys) > 1:
# if columns have different dtypes, only include numeric columns
from pandas.api.types import is_numeric_dtype as isnum
num_ys = [dim for dim in ys if isnum(data[dim])]
if len(num_ys) >= 1:
ys = num_ys
y = ys[0] if len(ys) == 1 or single_y else ys
return y
示例14: get_var_type
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def get_var_type(col):
"""
Return var_type (for KDEMultivariate) of the column
Parameters
----------
col : pandas.Series
A dataframe column.
Returns
-------
out : str
One of ['c', 'o', 'u'].
See Also
--------
The origin of the character codes is
:class:`statsmodels.nonparametric.kernel_density.KDEMultivariate`.
"""
if pdtypes.is_numeric_dtype(col):
# continuous
return 'c'
elif pdtypes.is_categorical_dtype(col):
# ordered or unordered
return 'o' if col.cat.ordered else 'u'
else:
# unordered if unsure, e.g string columns that
# are not categorical
return 'u'
示例15: assert_equal_ndarray
# 需要导入模块: from pandas.api import types [as 别名]
# 或者: from pandas.api.types import is_numeric_dtype [as 别名]
def assert_equal_ndarray(a, b, exact=False, elem_name=None):
b = asarray(b)
if not exact and is_numeric_dtype(a) and is_numeric_dtype(b):
assert a.shape == b.shape, format_msg(elem_name)
assert np.allclose(a, b, equal_nan=True), format_msg(elem_name)
elif ( # Structured dtype
not exact
and hasattr(a, "dtype")
and hasattr(b, "dtype")
and len(a.dtype) > 1
and len(b.dtype) > 0
):
assert_equal(pd.DataFrame(a), pd.DataFrame(b), exact, elem_name)
else:
assert np.all(a == b), format_msg(elem_name)