本文整理汇总了Python中sklearn.impute.SimpleImputer.fit方法的典型用法代码示例。如果您正苦于以下问题:Python SimpleImputer.fit方法的具体用法?Python SimpleImputer.fit怎么用?Python SimpleImputer.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.impute.SimpleImputer
的用法示例。
在下文中一共展示了SimpleImputer.fit方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_imputation_error_sparse_0
# 需要导入模块: from sklearn.impute import SimpleImputer [as 别名]
# 或者: from sklearn.impute.SimpleImputer import fit [as 别名]
def test_imputation_error_sparse_0(strategy):
# check that error are raised when missing_values = 0 and input is sparse
X = np.ones((3, 5))
X[0] = 0
X = sparse.csc_matrix(X)
imputer = SimpleImputer(strategy=strategy, missing_values=0)
with pytest.raises(ValueError, match="Provide a dense array"):
imputer.fit(X)
imputer.fit(X.toarray())
with pytest.raises(ValueError, match="Provide a dense array"):
imputer.transform(X)
示例2: test_imputation_const_mostf_error_invalid_types
# 需要导入模块: from sklearn.impute import SimpleImputer [as 别名]
# 或者: from sklearn.impute.SimpleImputer import fit [as 别名]
def test_imputation_const_mostf_error_invalid_types(strategy, dtype):
# Test imputation on non-numeric data using "most_frequent" and "constant"
# strategy
X = np.array([
[np.nan, np.nan, "a", "f"],
[np.nan, "c", np.nan, "d"],
[np.nan, "b", "d", np.nan],
[np.nan, "c", "d", "h"],
], dtype=dtype)
err_msg = "SimpleImputer does not support data"
with pytest.raises(ValueError, match=err_msg):
imputer = SimpleImputer(strategy=strategy)
imputer.fit(X).transform(X)
示例3: test_imputation_pickle
# 需要导入模块: from sklearn.impute import SimpleImputer [as 别名]
# 或者: from sklearn.impute.SimpleImputer import fit [as 别名]
def test_imputation_pickle():
# Test for pickling imputers.
import pickle
X = sparse_random_matrix(100, 100, density=0.10)
for strategy in ["mean", "median", "most_frequent"]:
imputer = SimpleImputer(missing_values=0, strategy=strategy)
imputer.fit(X)
imputer_pickled = pickle.loads(pickle.dumps(imputer))
assert_array_almost_equal(
imputer.transform(X.copy()),
imputer_pickled.transform(X.copy()),
err_msg="Fail to transform the data after pickling "
"(strategy = %s)" % (strategy)
)
示例4: test_imputation_copy
# 需要导入模块: from sklearn.impute import SimpleImputer [as 别名]
# 或者: from sklearn.impute.SimpleImputer import fit [as 别名]
def test_imputation_copy():
# Test imputation with copy
X_orig = sparse_random_matrix(5, 5, density=0.75, random_state=0)
# copy=True, dense => copy
X = X_orig.copy().toarray()
imputer = SimpleImputer(missing_values=0, strategy="mean", copy=True)
Xt = imputer.fit(X).transform(X)
Xt[0, 0] = -1
assert not np.all(X == Xt)
# copy=True, sparse csr => copy
X = X_orig.copy()
imputer = SimpleImputer(missing_values=X.data[0], strategy="mean",
copy=True)
Xt = imputer.fit(X).transform(X)
Xt.data[0] = -1
assert not np.all(X.data == Xt.data)
# copy=False, dense => no copy
X = X_orig.copy().toarray()
imputer = SimpleImputer(missing_values=0, strategy="mean", copy=False)
Xt = imputer.fit(X).transform(X)
Xt[0, 0] = -1
assert_array_almost_equal(X, Xt)
# copy=False, sparse csc => no copy
X = X_orig.copy().tocsc()
imputer = SimpleImputer(missing_values=X.data[0], strategy="mean",
copy=False)
Xt = imputer.fit(X).transform(X)
Xt.data[0] = -1
assert_array_almost_equal(X.data, Xt.data)
# copy=False, sparse csr => copy
X = X_orig.copy()
imputer = SimpleImputer(missing_values=X.data[0], strategy="mean",
copy=False)
Xt = imputer.fit(X).transform(X)
Xt.data[0] = -1
assert not np.all(X.data == Xt.data)
示例5: data_preprocessing
# 需要导入模块: from sklearn.impute import SimpleImputer [as 别名]
# 或者: from sklearn.impute.SimpleImputer import fit [as 别名]
def data_preprocessing(dataset):
# import data
# dataset = pd.read_csv('data/train.csv')
X = dataset.iloc[:, 2:13].values
Y = dataset.iloc[:, 1].values
# replace missing data
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy= "mean", missing_values = np.nan)
imputer = imputer.fit(X[:,3])
#X = imputer.fit_transform(X[:, 5]) Testing out new code
X[:,3] = imputer.transform(X[:,3])
示例6: _check_statistics
# 需要导入模块: from sklearn.impute import SimpleImputer [as 别名]
# 或者: from sklearn.impute.SimpleImputer import fit [as 别名]
def _check_statistics(X, X_true,
strategy, statistics, missing_values):
"""Utility function for testing imputation for a given strategy.
Test:
- along the two axes
- with dense and sparse arrays
Check that:
- the statistics (mean, median, mode) are correct
- the missing values are imputed correctly"""
err_msg = "Parameters: strategy = %s, missing_values = %s, " \
"axis = {0}, sparse = {1}" % (strategy, missing_values)
assert_ae = assert_array_equal
if X.dtype.kind == 'f' or X_true.dtype.kind == 'f':
assert_ae = assert_array_almost_equal
# Normal matrix
imputer = SimpleImputer(missing_values, strategy=strategy)
X_trans = imputer.fit(X).transform(X.copy())
assert_ae(imputer.statistics_, statistics,
err_msg=err_msg.format(0, False))
assert_ae(X_trans, X_true, err_msg=err_msg.format(0, False))
# Sparse matrix
imputer = SimpleImputer(missing_values, strategy=strategy)
imputer.fit(sparse.csc_matrix(X))
X_trans = imputer.transform(sparse.csc_matrix(X.copy()))
if sparse.issparse(X_trans):
X_trans = X_trans.toarray()
assert_ae(imputer.statistics_, statistics,
err_msg=err_msg.format(0, True))
assert_ae(X_trans, X_true, err_msg=err_msg.format(0, True))
示例7: test_imputation_most_frequent_objects
# 需要导入模块: from sklearn.impute import SimpleImputer [as 别名]
# 或者: from sklearn.impute.SimpleImputer import fit [as 别名]
def test_imputation_most_frequent_objects(marker):
# Test imputation using the most-frequent strategy.
X = np.array([
[marker, marker, "a", "f"],
[marker, "c", marker, "d"],
[marker, "b", "d", marker],
[marker, "c", "d", "h"],
], dtype=object)
X_true = np.array([
["c", "a", "f"],
["c", "d", "d"],
["b", "d", "d"],
["c", "d", "h"],
], dtype=object)
imputer = SimpleImputer(missing_values=marker,
strategy="most_frequent")
X_trans = imputer.fit(X).transform(X)
assert_array_equal(X_trans, X_true)
示例8: scatter_matrix
# 需要导入模块: from sklearn.impute import SimpleImputer [as 别名]
# 或者: from sklearn.impute.SimpleImputer import fit [as 别名]
# plt.show()
from pandas.tools.plotting import scatter_matrix
attributes = ["median_house_value", "median_income", "total_rooms", "housing_median_age"]
# scatter_matrix(housing[attributes], figsize=(12, 8))
# plt.show()
housing["rooms_per_household"] = housing["total_rooms"]/housing["households"]
housing["bedrooms_per_room"] = housing["total_bedrooms"]/housing["total_rooms"]
housing["population_per_household"]=housing["population"]/housing["households"]
housing_num = housing.drop("ocean_proximity", axis=1)
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy="median")
imputer.fit(housing_num)
X = imputer.transform(housing_num)
housing_tr = pd.DataFrame(X, columns=housing_num.columns)
# from sklearn.preprocessing import LabelEncoder
# encoder = LabelEncoder()
housing_cat = housing["ocean_proximity"]
# housing_cat_encoded = encoder.fit_transform(housing_cat)
# from sklearn.preprocessing import OneHotEncoder
# encoder = OneHotEncoder()
# housing_cat_1hot = encoder.fit_transform(housing_cat_encoded.reshape(-1,1))
from sklearn.preprocessing import LabelBinarizer
encoder = LabelBinarizer()