本文整理汇总了Python中sklearn.impute.SimpleImputer类的典型用法代码示例。如果您正苦于以下问题:Python SimpleImputer类的具体用法?Python SimpleImputer怎么用?Python SimpleImputer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SimpleImputer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_changed_only
def test_changed_only():
# Make sure the changed_only param is correctly used
set_config(print_changed_only=True)
lr = LogisticRegression(C=99)
expected = """LogisticRegression(C=99)"""
assert lr.__repr__() == expected
# Check with a repr that doesn't fit on a single line
lr = LogisticRegression(C=99, class_weight=.4, fit_intercept=False,
tol=1234, verbose=True)
expected = """
LogisticRegression(C=99, class_weight=0.4, fit_intercept=False, tol=1234,
verbose=True)"""
expected = expected[1:] # remove first \n
assert lr.__repr__() == expected
imputer = SimpleImputer(missing_values=0)
expected = """SimpleImputer(missing_values=0)"""
assert imputer.__repr__() == expected
# Defaults to np.NaN, trying with float('NaN')
imputer = SimpleImputer(missing_values=float('NaN'))
expected = """SimpleImputer()"""
assert imputer.__repr__() == expected
set_config(print_changed_only=False)
示例2: test_imputation_error_invalid_strategy
def test_imputation_error_invalid_strategy(strategy):
X = np.ones((3, 5))
X[0, 0] = np.nan
with pytest.raises(ValueError, match=str(strategy)):
imputer = SimpleImputer(strategy=strategy)
imputer.fit_transform(X)
示例3: test_imputation_deletion_warning
def test_imputation_deletion_warning(strategy):
X = np.ones((3, 5))
X[:, 0] = np.nan
with pytest.warns(UserWarning, match="Deleting"):
imputer = SimpleImputer(strategy=strategy, verbose=True)
imputer.fit_transform(X)
示例4: test_imputation_mean_median_error_invalid_type
def test_imputation_mean_median_error_invalid_type(strategy, dtype):
X = np.array([["a", "b", 3],
[4, "e", 6],
["g", "h", 9]], dtype=dtype)
with pytest.raises(ValueError, match="non-numeric data"):
imputer = SimpleImputer(strategy=strategy)
imputer.fit_transform(X)
示例5: test_imputation_constant_error_invalid_type
def test_imputation_constant_error_invalid_type(X_data, missing_value):
# Verify that exceptions are raised on invalid fill_value type
X = np.full((3, 5), X_data, dtype=float)
X[0, 0] = missing_value
with pytest.raises(ValueError, match="imputing numerical"):
imputer = SimpleImputer(missing_values=missing_value,
strategy="constant",
fill_value="x")
imputer.fit_transform(X)
示例6: test_imputation_shape
def test_imputation_shape():
# Verify the shapes of the imputed matrix for different strategies.
X = np.random.randn(10, 2)
X[::2] = np.nan
for strategy in ['mean', 'median', 'most_frequent']:
imputer = SimpleImputer(strategy=strategy)
X_imputed = imputer.fit_transform(X)
assert_equal(X_imputed.shape, (10, 2))
X_imputed = imputer.fit_transform(sparse.csr_matrix(X))
assert_equal(X_imputed.shape, (10, 2))
示例7: data_preprocessing
def data_preprocessing(dataset):
# import data
# dataset = pd.read_csv('data/train.csv')
X = dataset.iloc[:, 2:13].values
Y = dataset.iloc[:, 1].values
# replace missing data
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy= "mean", missing_values = np.nan)
imputer = imputer.fit(X[:,3])
#X = imputer.fit_transform(X[:, 5]) Testing out new code
X[:,3] = imputer.transform(X[:,3])
示例8: test_imputation_const_mostf_error_invalid_types
def test_imputation_const_mostf_error_invalid_types(strategy, dtype):
# Test imputation on non-numeric data using "most_frequent" and "constant"
# strategy
X = np.array([
[np.nan, np.nan, "a", "f"],
[np.nan, "c", np.nan, "d"],
[np.nan, "b", "d", np.nan],
[np.nan, "c", "d", "h"],
], dtype=dtype)
err_msg = "SimpleImputer does not support data"
with pytest.raises(ValueError, match=err_msg):
imputer = SimpleImputer(strategy=strategy)
imputer.fit(X).transform(X)
示例9: test_imputation_shape
def test_imputation_shape():
# Verify the shapes of the imputed matrix for different strategies.
X = np.random.randn(10, 2)
X[::2] = np.nan
for strategy in ['mean', 'median', 'most_frequent', "constant"]:
imputer = SimpleImputer(strategy=strategy)
X_imputed = imputer.fit_transform(sparse.csr_matrix(X))
assert X_imputed.shape == (10, 2)
X_imputed = imputer.fit_transform(X)
assert X_imputed.shape == (10, 2)
iterative_imputer = IterativeImputer(initial_strategy=strategy)
X_imputed = iterative_imputer.fit_transform(X)
assert X_imputed.shape == (10, 2)
示例10: test_imputation_pickle
def test_imputation_pickle():
# Test for pickling imputers.
import pickle
X = sparse_random_matrix(100, 100, density=0.10)
for strategy in ["mean", "median", "most_frequent"]:
imputer = SimpleImputer(missing_values=0, strategy=strategy)
imputer.fit(X)
imputer_pickled = pickle.loads(pickle.dumps(imputer))
assert_array_almost_equal(
imputer.transform(X.copy()),
imputer_pickled.transform(X.copy()),
err_msg="Fail to transform the data after pickling "
"(strategy = %s)" % (strategy)
)
示例11: test_imputation_add_indicator
def test_imputation_add_indicator(marker):
X = np.array([
[marker, 1, 5, marker, 1],
[2, marker, 1, marker, 2],
[6, 3, marker, marker, 3],
[1, 2, 9, marker, 4]
])
X_true = np.array([
[3., 1., 5., 1., 1., 0., 0., 1.],
[2., 2., 1., 2., 0., 1., 0., 1.],
[6., 3., 5., 3., 0., 0., 1., 1.],
[1., 2., 9., 4., 0., 0., 0., 1.]
])
imputer = SimpleImputer(missing_values=marker, add_indicator=True)
X_trans = imputer.fit_transform(X)
assert_allclose(X_trans, X_true)
assert_array_equal(imputer.indicator_.features_, np.array([0, 1, 2, 3]))
示例12: test_simple_imputation_add_indicator_sparse_matrix
def test_simple_imputation_add_indicator_sparse_matrix(arr_type):
X_sparse = arr_type([
[np.nan, 1, 5],
[2, np.nan, 1],
[6, 3, np.nan],
[1, 2, 9]
])
X_true = np.array([
[3., 1., 5., 1., 0., 0.],
[2., 2., 1., 0., 1., 0.],
[6., 3., 5., 0., 0., 1.],
[1., 2., 9., 0., 0., 0.],
])
imputer = SimpleImputer(missing_values=np.nan, add_indicator=True)
X_trans = imputer.fit_transform(X_sparse)
assert sparse.issparse(X_trans)
assert X_trans.shape == X_true.shape
assert_allclose(X_trans.toarray(), X_true)
示例13: __call__
def __call__(self, data):
from Orange.data.sql.table import SqlTable
if isinstance(data, SqlTable):
return Impute()(data)
imputer = SimpleImputer(strategy=self.strategy)
X = imputer.fit_transform(data.X)
# Create new variables with appropriate `compute_value`, but
# drop the ones which do not have valid `imputer.statistics_`
# (i.e. all NaN columns). `sklearn.preprocessing.Imputer` already
# drops them from the transformed X.
features = [impute.Average()(data, var, value)
for var, value in zip(data.domain.attributes,
imputer.statistics_)
if not np.isnan(value)]
assert X.shape[1] == len(features)
domain = Orange.data.Domain(features, data.domain.class_vars,
data.domain.metas)
new_data = data.transform(domain)
new_data.X = X
return new_data
示例14: test_imputation_copy
def test_imputation_copy():
# Test imputation with copy
X_orig = sparse_random_matrix(5, 5, density=0.75, random_state=0)
# copy=True, dense => copy
X = X_orig.copy().toarray()
imputer = SimpleImputer(missing_values=0, strategy="mean", copy=True)
Xt = imputer.fit(X).transform(X)
Xt[0, 0] = -1
assert not np.all(X == Xt)
# copy=True, sparse csr => copy
X = X_orig.copy()
imputer = SimpleImputer(missing_values=X.data[0], strategy="mean",
copy=True)
Xt = imputer.fit(X).transform(X)
Xt.data[0] = -1
assert not np.all(X.data == Xt.data)
# copy=False, dense => no copy
X = X_orig.copy().toarray()
imputer = SimpleImputer(missing_values=0, strategy="mean", copy=False)
Xt = imputer.fit(X).transform(X)
Xt[0, 0] = -1
assert_array_almost_equal(X, Xt)
# copy=False, sparse csc => no copy
X = X_orig.copy().tocsc()
imputer = SimpleImputer(missing_values=X.data[0], strategy="mean",
copy=False)
Xt = imputer.fit(X).transform(X)
Xt.data[0] = -1
assert_array_almost_equal(X.data, Xt.data)
# copy=False, sparse csr => copy
X = X_orig.copy()
imputer = SimpleImputer(missing_values=X.data[0], strategy="mean",
copy=False)
Xt = imputer.fit(X).transform(X)
Xt.data[0] = -1
assert not np.all(X.data == Xt.data)
示例15: test_iterative_imputer_missing_at_transform
def test_iterative_imputer_missing_at_transform(strategy):
rng = np.random.RandomState(0)
n = 100
d = 10
X_train = rng.randint(low=0, high=3, size=(n, d))
X_test = rng.randint(low=0, high=3, size=(n, d))
X_train[:, 0] = 1 # definitely no missing values in 0th column
X_test[0, 0] = 0 # definitely missing value in 0th column
imputer = IterativeImputer(missing_values=0,
max_iter=1,
initial_strategy=strategy,
random_state=rng).fit(X_train)
initial_imputer = SimpleImputer(missing_values=0,
strategy=strategy).fit(X_train)
# if there were no missing values at time of fit, then imputer will
# only use the initial imputer for that feature at transform
assert_allclose(imputer.transform(X_test)[:, 0],
initial_imputer.transform(X_test)[:, 0])