本文整理汇总了Python中sklearn.preprocessing.Imputer方法的典型用法代码示例。如果您正苦于以下问题:Python preprocessing.Imputer方法的具体用法?Python preprocessing.Imputer怎么用?Python preprocessing.Imputer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing
的用法示例。
在下文中一共展示了preprocessing.Imputer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: setUpClass
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def setUpClass(self):
"""
Set up the unit test by loading the dataset and training a model.
"""
from sklearn.datasets import load_boston
scikit_data = load_boston()
scikit_model = Imputer(strategy="most_frequent", axis=0)
scikit_data["data"][1, 8] = np.NaN
input_data = scikit_data["data"][:, 8].reshape(-1, 1)
scikit_model.fit(input_data, scikit_data["target"])
# Save the data and the model
self.scikit_data = scikit_data
self.scikit_model = scikit_model
示例2: readFile
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def readFile(inpath):
if os.path.isfile(inpath):
dataset = genfromtxt(open(inpath,'r'), delimiter=',', dtype='f8')[0:]
imp = Imputer(missing_values='NaN', strategy='mean', axis=0)# fill in the missing values with the mean of each column
transformedData = imp.fit_transform(dataset)
rmvedCols = imp.statistics_
idxRmved = np.where(np.isnan(rmvedCols))#take the indices of the nan columns
nanTarget = dataset.shape[1]-1 in idxRmved[0]#check if the target is a nan column
if nanTarget:
raise ValueError("The target variable contains only nan values or inf")
else:
raise ValueError("File does not exist")
return transformedData
#parameters: vector 'target' which is the target variable
#returns: the dataset which includes the previous values of the target
示例3: test_imputer_float_inputs
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def test_imputer_float_inputs(self):
model = Imputer(missing_values="NaN", strategy="mean", axis=0)
data = [[1, 2], [np.nan, 3], [7, 6]]
model.fit(data)
model_onnx = convert_sklearn(model, "scikit-learn imputer",
[("input", FloatTensorType([None, 2]))])
self.assertTrue(model_onnx.graph.node is not None)
# should contain only node
self.assertEqual(len(model_onnx.graph.node), 1)
# last node should contain the Imputer
outputs = model_onnx.graph.output
self.assertEqual(len(outputs), 1)
self.assertEqual(outputs[0].type.tensor_type.shape.dim[-1].dim_value,
2)
dump_data_and_model(
np.array(data, dtype=np.float32),
model,
model_onnx,
basename="SklearnImputerMeanFloat32",
)
示例4: test_simple_imputer_float_inputs
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def test_simple_imputer_float_inputs(self):
model = SimpleImputer(strategy="mean", fill_value="nan")
data = [[1, 2], [np.nan, 3], [7, 6]]
model.fit(data)
model_onnx = convert_sklearn(
model,
"scikit-learn simple imputer",
[("input", FloatTensorType([None, 2]))],
target_opset=TARGET_OPSET)
self.assertTrue(model_onnx.graph.node is not None)
# should contain only node
self.assertEqual(len(model_onnx.graph.node), 1)
# last node should contain the Imputer
outputs = model_onnx.graph.output
self.assertEqual(len(outputs), 1)
self.assertEqual(
outputs[0].type.tensor_type.shape.dim[-1].dim_value, 2)
dump_data_and_model(
np.array(data, dtype=np.float32),
model, model_onnx,
basename="SklearnSimpleImputerMeanFloat32")
示例5: test_objectmapper
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def test_objectmapper(self):
df = pdml.ModelFrame([])
self.assertIs(df.preprocessing.Binarizer, pp.Binarizer)
self.assertIs(df.preprocessing.FunctionTransformer,
pp.FunctionTransformer)
self.assertIs(df.preprocessing.Imputer, pp.Imputer)
self.assertIs(df.preprocessing.KernelCenterer, pp.KernelCenterer)
self.assertIs(df.preprocessing.LabelBinarizer, pp.LabelBinarizer)
self.assertIs(df.preprocessing.LabelEncoder, pp.LabelEncoder)
self.assertIs(df.preprocessing.MultiLabelBinarizer, pp.MultiLabelBinarizer)
self.assertIs(df.preprocessing.MaxAbsScaler, pp.MaxAbsScaler)
self.assertIs(df.preprocessing.MinMaxScaler, pp.MinMaxScaler)
self.assertIs(df.preprocessing.Normalizer, pp.Normalizer)
self.assertIs(df.preprocessing.OneHotEncoder, pp.OneHotEncoder)
self.assertIs(df.preprocessing.PolynomialFeatures, pp.PolynomialFeatures)
self.assertIs(df.preprocessing.RobustScaler, pp.RobustScaler)
self.assertIs(df.preprocessing.StandardScaler, pp.StandardScaler)
示例6: test_transform_1d_frame_int
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def test_transform_1d_frame_int(self):
arr = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3])
idx = pd.Index('a b c d e f g h i'.split(' '))
df = pdml.ModelFrame(arr, index=idx, columns=['X'])
self.assertEqual(len(df.columns), 1)
# reshape arr to 2d
arr = arr.reshape(-1, 1)
if pd.compat.PY3:
models = ['Binarizer', 'Imputer', 'StandardScaler']
# MinMaxScalar raises TypeError in ufunc
else:
models = ['Binarizer', 'Imputer', 'StandardScaler', 'MinMaxScaler']
for model in models:
mod1 = getattr(df.preprocessing, model)()
mod2 = getattr(pp, model)()
self._assert_transform(df, arr, mod1, mod2)
mod1 = getattr(df.preprocessing, model)()
mod2 = getattr(pp, model)()
self._assert_fit_transform(df, arr, mod1, mod2)
示例7: test_Imputer
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def test_Imputer(self):
arr = np.array([1, np.nan, 3, 2])
s = pdml.ModelSeries(arr)
mod1 = s.pp.Imputer(axis=0)
s.fit(mod1)
result = s.transform(mod1)
expected = np.array([1, 2, 3, 2])
self.assertIsInstance(result, pdml.ModelSeries)
self.assert_numpy_array_almost_equal(result.values, expected)
mod1 = s.pp.Imputer(axis=0)
result = s.fit_transform(mod1)
self.assertIsInstance(result, pdml.ModelSeries)
self.assert_numpy_array_almost_equal(result.values, expected)
示例8: FeatureCombination
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def FeatureCombination(Df,s='',num_feature=2):
feature_set = []
for c in Df.columns:
if c.startswith(s): feature_set.append(c)
print('combining', len(feature_set), 'features')
data = Df[feature_set].values
for c in Df.columns:
if Df[c].dtype == 'object':
lbl = preprocessing.LabelEncoder()
lbl.fit(list(Df[c].values))
Df[c] = lbl.transform(list(Df[c].values))
imp = preprocessing.Imputer()
data = imp.fit_transform(data)
data = preprocessing.scale(data)
pca = PCA(num_feature)
pca.fit(data)
print('explained_variance_ratio_:', pca.explained_variance_ratio_)
trans = pca.transform(data)
for i in range(0,num_feature):
Df[s+'_%d'%(i+1)] = trans[:,i]
Df.drop(feature_set,1,inplace=True)
return Df
示例9: test_imputer
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def test_imputer(self):
try:
model = Imputer(missing_values='NaN', strategy='mean', axis=0)
except TypeError:
model = Imputer(missing_values=np.nan, strategy='mean')
model.axis = 0
data = [[1, 2], [np.nan, 3], [7, 6]]
model.fit(data)
from onnxmltools.convert.coreml.convert import convert
import coremltools # noqa
try:
model_coreml = coremltools.converters.sklearn.convert(model)
except ValueError as e:
if 'not supported' in str(e):
# Python 2.7 + scikit-learn 0.22
return
model_onnx = convert(model_coreml.get_spec())
self.assertTrue(model_onnx is not None)
dump_data_and_model(np.array(data, dtype=np.float32),
model, model_onnx, basename="CmlImputerMeanFloat32")
示例10: impute_data
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def impute_data(self,x):
"""Imputes data set containing Nan values"""
imp = Imputer(missing_values='NaN', strategy='mean', axis=0)
return imp.fit_transform(x)
示例11: __init__
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def __init__(self, params, dataset):
"""Initializes a UMAPTransformer object.
Args:
params (Namespace): Contains parameters used to instantiate the transformer.
dataset (Dataset): Dataset used to "train" the projection mapping.
"""
# TODO: decide whether to make n_epochs a parameter
#default_n_epochs = None
default_n_epochs = 500
if params.prediction_type == 'classification':
target_metric = 'categorical'
else:
target_metric = 'l2'
self.scaler = RobustScaler()
# Use Imputer to replace missing values (NaNs) with means for each column
self.imputer = Imputer()
scaled_X = self.scaler.fit_transform(self.imputer.fit_transform(dataset.X))
self.mapper = umap.UMAP(n_neighbors=params.umap_neighbors,
n_components=params.umap_dim,
metric=params.umap_metric,
target_metric=target_metric,
target_weight=params.umap_targ_wt,
min_dist=params.umap_min_dist,
n_epochs=default_n_epochs)
# TODO: How to deal with multitask data?
self.mapper.fit(scaled_X, y=dataset.y.flatten())
# ****************************************************************************************
示例12: get_clf_pipeline
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def get_clf_pipeline():
clf = models.DefaultClassifier(
GradientBoostingClassifier(
loss='deviance', learning_rate=0.01, n_estimators=3000,
subsample=0.6, min_samples_split=12, min_samples_leaf=12,
max_depth=6, random_state=1357, verbose=0)
)
steps = [('features', models.FeatureSelector()),
('Impute', Imputer(strategy='median')),
('scaler', StandardScaler()),
('clf', clf)]
return Pipeline(steps)
示例13: get_reg_pipeline
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def get_reg_pipeline():
clf = models.PartialRegressor(
GradientBoostingRegressor(loss='ls', learning_rate=0.0075, n_estimators=5000,
subsample=0.5, min_samples_split=20, min_samples_leaf=20, max_leaf_nodes=30,
random_state=9753, verbose=0)
)
steps = [('features', models.FeatureSelector()),
('Impute', Imputer(strategy='median')),
('scaler', StandardScaler()),
('clf', clf)]
return Pipeline(steps)
示例14: __init__
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def __init__(self, max_iter=10, initial_strategy='mean', tol=1e-3, f_model="RandomForest"):
self.max_iter = max_iter
self.initial_strategy = initial_strategy
self.initial_imputer = Imputer(strategy=initial_strategy)
self.tol = tol
self.f_model = f_model
示例15: data_handlemissing
# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def data_handlemissing(dataframe, pipeline):
try:
if pipeline['options']['type'] == "dropcolumns":
thresh = pipeline['options']['thresh']
if thresh == -1:
dataframe.dropna(axis=1, how="all", inplace=True)
elif thresh == 0:
dataframe.dropna(axis=1, how="any", inplace=True)
elif thresh > 0:
dataframe.dropna(axis=1, thresh=thresh, inplace=True)
elif pipeline['options']['type'] == "droprows":
thresh = pipeline['options']['thresh']
if thresh == -1:
dataframe.dropna(axis=0, how="all", inplace=True)
elif thresh == 0:
dataframe.dropna(axis=0, how="any", inplace=True)
elif thresh > 0:
dataframe.dropna(axis=0, thresh=thresh)
elif pipeline['options']['type'] == "fillmissing":
strategy = pipeline['options']['strategy']
imp = Imputer(missing_values='NaN', strategy=strategy, axis=0)
array = imp.fit_transform(dataframe.values)
dataframe = pandas.DataFrame(array, columns = dataframe.columns)
return dataframe
except Exception as e:
raise Exception("data_handlemissing: " + str(e))