本文整理汇总了Python中sklearn.impute.SimpleImputer.transform方法的典型用法代码示例。如果您正苦于以下问题:Python SimpleImputer.transform方法的具体用法?Python SimpleImputer.transform怎么用?Python SimpleImputer.transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.impute.SimpleImputer
的用法示例。
在下文中一共展示了SimpleImputer.transform方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_imputation_error_sparse_0
# 需要导入模块: from sklearn.impute import SimpleImputer [as 别名]
# 或者: from sklearn.impute.SimpleImputer import transform [as 别名]
def test_imputation_error_sparse_0(strategy):
# check that error are raised when missing_values = 0 and input is sparse
X = np.ones((3, 5))
X[0] = 0
X = sparse.csc_matrix(X)
imputer = SimpleImputer(strategy=strategy, missing_values=0)
with pytest.raises(ValueError, match="Provide a dense array"):
imputer.fit(X)
imputer.fit(X.toarray())
with pytest.raises(ValueError, match="Provide a dense array"):
imputer.transform(X)
示例2: data_preprocessing
# 需要导入模块: from sklearn.impute import SimpleImputer [as 别名]
# 或者: from sklearn.impute.SimpleImputer import transform [as 别名]
def data_preprocessing(dataset):
# import data
# dataset = pd.read_csv('data/train.csv')
X = dataset.iloc[:, 2:13].values
Y = dataset.iloc[:, 1].values
# replace missing data
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy= "mean", missing_values = np.nan)
imputer = imputer.fit(X[:,3])
#X = imputer.fit_transform(X[:, 5]) Testing out new code
X[:,3] = imputer.transform(X[:,3])
示例3: test_imputation_pickle
# 需要导入模块: from sklearn.impute import SimpleImputer [as 别名]
# 或者: from sklearn.impute.SimpleImputer import transform [as 别名]
def test_imputation_pickle():
# Test for pickling imputers.
import pickle
X = sparse_random_matrix(100, 100, density=0.10)
for strategy in ["mean", "median", "most_frequent"]:
imputer = SimpleImputer(missing_values=0, strategy=strategy)
imputer.fit(X)
imputer_pickled = pickle.loads(pickle.dumps(imputer))
assert_array_almost_equal(
imputer.transform(X.copy()),
imputer_pickled.transform(X.copy()),
err_msg="Fail to transform the data after pickling "
"(strategy = %s)" % (strategy)
)
示例4: test_iterative_imputer_missing_at_transform
# 需要导入模块: from sklearn.impute import SimpleImputer [as 别名]
# 或者: from sklearn.impute.SimpleImputer import transform [as 别名]
def test_iterative_imputer_missing_at_transform(strategy):
rng = np.random.RandomState(0)
n = 100
d = 10
X_train = rng.randint(low=0, high=3, size=(n, d))
X_test = rng.randint(low=0, high=3, size=(n, d))
X_train[:, 0] = 1 # definitely no missing values in 0th column
X_test[0, 0] = 0 # definitely missing value in 0th column
imputer = IterativeImputer(missing_values=0,
max_iter=1,
initial_strategy=strategy,
random_state=rng).fit(X_train)
initial_imputer = SimpleImputer(missing_values=0,
strategy=strategy).fit(X_train)
# if there were no missing values at time of fit, then imputer will
# only use the initial imputer for that feature at transform
assert_allclose(imputer.transform(X_test)[:, 0],
initial_imputer.transform(X_test)[:, 0])
示例5: test_mice_missing_at_transform
# 需要导入模块: from sklearn.impute import SimpleImputer [as 别名]
# 或者: from sklearn.impute.SimpleImputer import transform [as 别名]
def test_mice_missing_at_transform(strategy):
rng = np.random.RandomState(0)
n = 100
d = 10
X_train = rng.randint(low=0, high=3, size=(n, d))
X_test = rng.randint(low=0, high=3, size=(n, d))
X_train[:, 0] = 1 # definitely no missing values in 0th column
X_test[0, 0] = 0 # definitely missing value in 0th column
mice = MICEImputer(missing_values=0,
n_imputations=1,
n_burn_in=1,
initial_strategy=strategy,
random_state=rng).fit(X_train)
initial_imputer = SimpleImputer(missing_values=0,
strategy=strategy).fit(X_train)
# if there were no missing values at time of fit, then mice will
# only use the initial imputer for that feature at transform
assert np.all(mice.transform(X_test)[:, 0] ==
initial_imputer.transform(X_test)[:, 0])
示例6: _check_statistics
# 需要导入模块: from sklearn.impute import SimpleImputer [as 别名]
# 或者: from sklearn.impute.SimpleImputer import transform [as 别名]
def _check_statistics(X, X_true,
strategy, statistics, missing_values):
"""Utility function for testing imputation for a given strategy.
Test:
- along the two axes
- with dense and sparse arrays
Check that:
- the statistics (mean, median, mode) are correct
- the missing values are imputed correctly"""
err_msg = "Parameters: strategy = %s, missing_values = %s, " \
"axis = {0}, sparse = {1}" % (strategy, missing_values)
assert_ae = assert_array_equal
if X.dtype.kind == 'f' or X_true.dtype.kind == 'f':
assert_ae = assert_array_almost_equal
# Normal matrix
imputer = SimpleImputer(missing_values, strategy=strategy)
X_trans = imputer.fit(X).transform(X.copy())
assert_ae(imputer.statistics_, statistics,
err_msg=err_msg.format(0, False))
assert_ae(X_trans, X_true, err_msg=err_msg.format(0, False))
# Sparse matrix
imputer = SimpleImputer(missing_values, strategy=strategy)
imputer.fit(sparse.csc_matrix(X))
X_trans = imputer.transform(sparse.csc_matrix(X.copy()))
if sparse.issparse(X_trans):
X_trans = X_trans.toarray()
assert_ae(imputer.statistics_, statistics,
err_msg=err_msg.format(0, True))
assert_ae(X_trans, X_true, err_msg=err_msg.format(0, True))
示例7: scatter_matrix
# 需要导入模块: from sklearn.impute import SimpleImputer [as 别名]
# 或者: from sklearn.impute.SimpleImputer import transform [as 别名]
from pandas.tools.plotting import scatter_matrix
attributes = ["median_house_value", "median_income", "total_rooms", "housing_median_age"]
# scatter_matrix(housing[attributes], figsize=(12, 8))
# plt.show()
housing["rooms_per_household"] = housing["total_rooms"]/housing["households"]
housing["bedrooms_per_room"] = housing["total_bedrooms"]/housing["total_rooms"]
housing["population_per_household"]=housing["population"]/housing["households"]
housing_num = housing.drop("ocean_proximity", axis=1)
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy="median")
imputer.fit(housing_num)
X = imputer.transform(housing_num)
housing_tr = pd.DataFrame(X, columns=housing_num.columns)
# from sklearn.preprocessing import LabelEncoder
# encoder = LabelEncoder()
housing_cat = housing["ocean_proximity"]
# housing_cat_encoded = encoder.fit_transform(housing_cat)
# from sklearn.preprocessing import OneHotEncoder
# encoder = OneHotEncoder()
# housing_cat_1hot = encoder.fit_transform(housing_cat_encoded.reshape(-1,1))
from sklearn.preprocessing import LabelBinarizer
encoder = LabelBinarizer()
housing_cat_1hot = encoder.fit_transform(housing_cat)