本文整理汇总了Python中sklearn.impute.SimpleImputer方法的典型用法代码示例。如果您正苦于以下问题:Python impute.SimpleImputer方法的具体用法?Python impute.SimpleImputer怎么用?Python impute.SimpleImputer使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.impute
的用法示例。
在下文中一共展示了impute.SimpleImputer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_simple_imputation_add_indicator_sparse_matrix
# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def test_simple_imputation_add_indicator_sparse_matrix(arr_type):
X_sparse = arr_type([
[np.nan, 1, 5],
[2, np.nan, 1],
[6, 3, np.nan],
[1, 2, 9]
])
X_true = np.array([
[3., 1., 5., 1., 0., 0.],
[2., 2., 1., 0., 1., 0.],
[6., 3., 5., 0., 0., 1.],
[1., 2., 9., 0., 0., 0.],
])
imputer = SimpleImputer(missing_values=np.nan, add_indicator=True)
X_trans = imputer.fit_transform(X_sparse)
assert sparse.issparse(X_trans)
assert X_trans.shape == X_true.shape
assert_allclose(X_trans.toarray(), X_true)
示例2: test_imputation_most_frequent
# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def test_imputation_most_frequent():
# Test imputation using the most-frequent strategy.
X = np.array([
[-1, -1, 0, 5],
[-1, 2, -1, 3],
[-1, 1, 3, -1],
[-1, 2, 3, 7],
])
X_true = np.array([
[2, 0, 5],
[2, 3, 3],
[1, 3, 3],
[2, 3, 7],
])
# scipy.stats.mode, used in SimpleImputer, doesn't return the first most
# frequent as promised in the doc but the lowest most frequent. When this
# test will fail after an update of scipy, SimpleImputer will need to be
# updated to be consistent with the new (correct) behaviour
_check_statistics(X, X_true, "most_frequent", [np.nan, 2, 3, 3], -1)
示例3: test_imputation_most_frequent_objects
# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def test_imputation_most_frequent_objects(marker):
# Test imputation using the most-frequent strategy.
X = np.array([
[marker, marker, "a", "f"],
[marker, "c", marker, "d"],
[marker, "b", "d", marker],
[marker, "c", "d", "h"],
], dtype=object)
X_true = np.array([
["c", "a", "f"],
["c", "d", "d"],
["b", "d", "d"],
["c", "d", "h"],
], dtype=object)
imputer = SimpleImputer(missing_values=marker,
strategy="most_frequent")
X_trans = imputer.fit(X).transform(X)
assert_array_equal(X_trans, X_true)
示例4: test_imputation_most_frequent_pandas
# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def test_imputation_most_frequent_pandas(dtype):
# Test imputation using the most frequent strategy on pandas df
pd = pytest.importorskip("pandas")
f = io.StringIO("Cat1,Cat2,Cat3,Cat4\n"
",i,x,\n"
"a,,y,\n"
"a,j,,\n"
"b,j,x,")
df = pd.read_csv(f, dtype=dtype)
X_true = np.array([
["a", "i", "x"],
["a", "j", "y"],
["a", "j", "x"],
["b", "j", "x"]
], dtype=object)
imputer = SimpleImputer(strategy="most_frequent")
X_trans = imputer.fit_transform(df)
assert_array_equal(X_trans, X_true)
示例5: test_imputation_constant_float
# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def test_imputation_constant_float(array_constructor):
# Test imputation using the constant strategy on floats
X = np.array([
[np.nan, 1.1, 0, np.nan],
[1.2, np.nan, 1.3, np.nan],
[0, 0, np.nan, np.nan],
[1.4, 1.5, 0, np.nan]
])
X_true = np.array([
[-1, 1.1, 0, -1],
[1.2, -1, 1.3, -1],
[0, 0, -1, -1],
[1.4, 1.5, 0, -1]
])
X = array_constructor(X)
X_true = array_constructor(X_true)
imputer = SimpleImputer(strategy="constant", fill_value=-1)
X_trans = imputer.fit_transform(X)
assert_allclose_dense_sparse(X_trans, X_true)
示例6: test_imputation_constant_object
# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def test_imputation_constant_object(marker):
# Test imputation using the constant strategy on objects
X = np.array([
[marker, "a", "b", marker],
["c", marker, "d", marker],
["e", "f", marker, marker],
["g", "h", "i", marker]
], dtype=object)
X_true = np.array([
["missing", "a", "b", "missing"],
["c", "missing", "d", "missing"],
["e", "f", "missing", "missing"],
["g", "h", "i", "missing"]
], dtype=object)
imputer = SimpleImputer(missing_values=marker, strategy="constant",
fill_value="missing")
X_trans = imputer.fit_transform(X)
assert_array_equal(X_trans, X_true)
示例7: test_imputation_constant_pandas
# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def test_imputation_constant_pandas(dtype):
# Test imputation using the constant strategy on pandas df
pd = pytest.importorskip("pandas")
f = io.StringIO("Cat1,Cat2,Cat3,Cat4\n"
",i,x,\n"
"a,,y,\n"
"a,j,,\n"
"b,j,x,")
df = pd.read_csv(f, dtype=dtype)
X_true = np.array([
["missing_value", "i", "x", "missing_value"],
["a", "missing_value", "y", "missing_value"],
["a", "j", "missing_value", "missing_value"],
["b", "j", "x", "missing_value"]
], dtype=object)
imputer = SimpleImputer(strategy="constant")
X_trans = imputer.fit_transform(df)
assert_array_equal(X_trans, X_true)
示例8: test_imputation_pipeline_grid_search
# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def test_imputation_pipeline_grid_search():
# Test imputation within a pipeline + gridsearch.
X = sparse_random_matrix(100, 100, density=0.10)
missing_values = X.data[0]
pipeline = Pipeline([('imputer',
SimpleImputer(missing_values=missing_values)),
('tree',
tree.DecisionTreeRegressor(random_state=0))])
parameters = {
'imputer__strategy': ["mean", "median", "most_frequent"]
}
Y = sparse_random_matrix(100, 1, density=0.10).toarray()
gs = GridSearchCV(pipeline, parameters)
gs.fit(X, Y)
示例9: _impute_values
# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def _impute_values(self, features):
"""Impute missing values in a feature set.
Parameters
----------
features: array-like {n_samples, n_features}
A feature matrix
Returns
-------
array-like {n_samples, n_features}
"""
if self.verbosity > 1:
print('Imputing missing values in feature set')
if self._fitted_imputer is None:
self._fitted_imputer = SimpleImputer(strategy="median")
self._fitted_imputer.fit(features)
return self._fitted_imputer.transform(features)
示例10: get_estimator
# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def get_estimator():
categorical_cols = ['Sex', 'Pclass', 'Embarked']
numerical_cols = ['Age', 'SibSp', 'Parch', 'Fare']
preprocessor = make_column_transformer(
(OneHotEncoder(handle_unknown='ignore'), categorical_cols),
(SimpleImputer(strategy='constant', fill_value=-1), numerical_cols),
)
pipeline = Pipeline([
('transformer', preprocessor),
('classifier', LogisticRegression()),
])
return pipeline
示例11: test_simple_imputer_float_inputs
# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def test_simple_imputer_float_inputs(self):
model = SimpleImputer(strategy="mean", fill_value="nan")
data = [[1, 2], [np.nan, 3], [7, 6]]
model.fit(data)
model_onnx = convert_sklearn(
model,
"scikit-learn simple imputer",
[("input", FloatTensorType([None, 2]))],
target_opset=TARGET_OPSET)
self.assertTrue(model_onnx.graph.node is not None)
# should contain only node
self.assertEqual(len(model_onnx.graph.node), 1)
# last node should contain the Imputer
outputs = model_onnx.graph.output
self.assertEqual(len(outputs), 1)
self.assertEqual(
outputs[0].type.tensor_type.shape.dim[-1].dim_value, 2)
dump_data_and_model(
np.array(data, dtype=np.float32),
model, model_onnx,
basename="SklearnSimpleImputerMeanFloat32")
示例12: __load_dataset__
# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def __load_dataset__(self):
df = pd.io.stata.read_stata(self.train_file)
orderings = []
features = []
for row in df.itertuples():
orderings.append(row[4:8])
context_feature = [float(i) if i != "." else np.NAN for i in row[13:33]]
features.append(context_feature)
X = np.array(features)
X = SimpleImputer().fit_transform(X)
X = np.array([np.log(np.array(X[:, i]) + 1) for i in range(len(features[0]))])
X = np.array(X.T)
self.X = StandardScaler().fit_transform(X)
orderings = np.array(orderings) - 1
self.Y = ranking_ordering_conversion(orderings)
self.__check_dataset_validity__()
示例13: fit
# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def fit(self, hyperparameter_config, X, train_indices, dataset_info):
hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config)
if dataset_info.is_sparse:
return {'imputation_preprocessor': None, 'all_nan_columns': None}
# delete all nan columns
all_nan = np.all(np.isnan(X), axis=0)
X = X[:, ~all_nan]
dataset_info.categorical_features = [dataset_info.categorical_features[i] for i, is_nan in enumerate(all_nan) if not is_nan]
strategy = hyperparameter_config['strategy']
fill_value = int(np.nanmax(X)) + 1 if not dataset_info.is_sparse else 0
numerical_imputer = SimpleImputer(strategy=strategy, copy=False)
categorical_imputer = SimpleImputer(strategy='constant', copy=False, fill_value=fill_value)
transformer = ColumnTransformer(
transformers=[('numerical_imputer', numerical_imputer, [i for i, c in enumerate(dataset_info.categorical_features) if not c]),
('categorical_imputer', categorical_imputer, [i for i, c in enumerate(dataset_info.categorical_features) if c])])
transformer.fit(X[train_indices])
X = transformer.transform(X)
dataset_info.categorical_features = sorted(dataset_info.categorical_features)
return { 'X': X, 'imputation_preprocessor': transformer, 'dataset_info': dataset_info , 'all_nan_columns': all_nan}
示例14: test_imputer
# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def test_imputer(self):
try:
model = Imputer(missing_values='NaN', strategy='mean', axis=0)
except TypeError:
model = Imputer(missing_values=np.nan, strategy='mean')
model.axis = 0
data = [[1, 2], [np.nan, 3], [7, 6]]
model.fit(data)
from onnxmltools.convert.coreml.convert import convert
import coremltools # noqa
try:
model_coreml = coremltools.converters.sklearn.convert(model)
except ValueError as e:
if 'not supported' in str(e):
# Python 2.7 + scikit-learn 0.22
return
model_onnx = convert(model_coreml.get_spec())
self.assertTrue(model_onnx is not None)
dump_data_and_model(np.array(data, dtype=np.float32),
model, model_onnx, basename="CmlImputerMeanFloat32")
示例15: _check_statistics
# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def _check_statistics(X, X_true,
strategy, statistics, missing_values):
"""Utility function for testing imputation for a given strategy.
Test with dense and sparse arrays
Check that:
- the statistics (mean, median, mode) are correct
- the missing values are imputed correctly"""
err_msg = "Parameters: strategy = %s, missing_values = %s, " \
"sparse = {0}" % (strategy, missing_values)
assert_ae = assert_array_equal
if X.dtype.kind == 'f' or X_true.dtype.kind == 'f':
assert_ae = assert_array_almost_equal
# Normal matrix
imputer = SimpleImputer(missing_values, strategy=strategy)
X_trans = imputer.fit(X).transform(X.copy())
assert_ae(imputer.statistics_, statistics,
err_msg=err_msg.format(False))
assert_ae(X_trans, X_true, err_msg=err_msg.format(False))
# Sparse matrix
imputer = SimpleImputer(missing_values, strategy=strategy)
imputer.fit(sparse.csc_matrix(X))
X_trans = imputer.transform(sparse.csc_matrix(X.copy()))
if sparse.issparse(X_trans):
X_trans = X_trans.toarray()
assert_ae(imputer.statistics_, statistics,
err_msg=err_msg.format(True))
assert_ae(X_trans, X_true, err_msg=err_msg.format(True))