当前位置: 首页>>代码示例>>Python>>正文


Python preprocessing.Imputer方法代码示例

本文整理汇总了Python中sklearn.preprocessing.Imputer方法的典型用法代码示例。如果您正苦于以下问题:Python preprocessing.Imputer方法的具体用法?Python preprocessing.Imputer怎么用?Python preprocessing.Imputer使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.preprocessing的用法示例。


在下文中一共展示了preprocessing.Imputer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: setUpClass

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston

        scikit_data = load_boston()
        scikit_model = Imputer(strategy="most_frequent", axis=0)
        scikit_data["data"][1, 8] = np.NaN

        input_data = scikit_data["data"][:, 8].reshape(-1, 1)
        scikit_model.fit(input_data, scikit_data["target"])

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model 
开发者ID:apple,项目名称:coremltools,代码行数:18,代码来源:test_categorical_imputer.py

示例2: readFile

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def readFile(inpath):
    if os.path.isfile(inpath):
        dataset = genfromtxt(open(inpath,'r'), delimiter=',', dtype='f8')[0:] 
        imp = Imputer(missing_values='NaN', strategy='mean', axis=0)# fill in the missing values with the mean of each column
        transformedData = imp.fit_transform(dataset)
        rmvedCols = imp.statistics_
        idxRmved = np.where(np.isnan(rmvedCols))#take the indices of the nan columns
        nanTarget = dataset.shape[1]-1 in idxRmved[0]#check if the target is a nan column
        if nanTarget:
            raise ValueError("The target variable contains only nan values or inf")
    else:
        raise ValueError("File does not exist")    
    return transformedData
    
#parameters: vector 'target' which is the target variable
#returns: the dataset which includes the previous values of the target 
开发者ID:h-cel,项目名称:ClimateVegetationDynamics_GrangerCausality,代码行数:18,代码来源:GC_script.py

示例3: test_imputer_float_inputs

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def test_imputer_float_inputs(self):
        model = Imputer(missing_values="NaN", strategy="mean", axis=0)
        data = [[1, 2], [np.nan, 3], [7, 6]]
        model.fit(data)

        model_onnx = convert_sklearn(model, "scikit-learn imputer",
                                     [("input", FloatTensorType([None, 2]))])
        self.assertTrue(model_onnx.graph.node is not None)

        # should contain only node
        self.assertEqual(len(model_onnx.graph.node), 1)

        # last node should contain the Imputer
        outputs = model_onnx.graph.output
        self.assertEqual(len(outputs), 1)
        self.assertEqual(outputs[0].type.tensor_type.shape.dim[-1].dim_value,
                         2)
        dump_data_and_model(
            np.array(data, dtype=np.float32),
            model,
            model_onnx,
            basename="SklearnImputerMeanFloat32",
        ) 
开发者ID:onnx,项目名称:sklearn-onnx,代码行数:25,代码来源:test_sklearn_imputer_converter.py

示例4: test_simple_imputer_float_inputs

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def test_simple_imputer_float_inputs(self):
        model = SimpleImputer(strategy="mean", fill_value="nan")
        data = [[1, 2], [np.nan, 3], [7, 6]]
        model.fit(data)

        model_onnx = convert_sklearn(
            model,
            "scikit-learn simple imputer",
            [("input", FloatTensorType([None, 2]))],
            target_opset=TARGET_OPSET)
        self.assertTrue(model_onnx.graph.node is not None)

        # should contain only node
        self.assertEqual(len(model_onnx.graph.node), 1)

        # last node should contain the Imputer
        outputs = model_onnx.graph.output
        self.assertEqual(len(outputs), 1)
        self.assertEqual(
            outputs[0].type.tensor_type.shape.dim[-1].dim_value, 2)
        dump_data_and_model(
            np.array(data, dtype=np.float32),
            model, model_onnx,
            basename="SklearnSimpleImputerMeanFloat32") 
开发者ID:onnx,项目名称:sklearn-onnx,代码行数:26,代码来源:test_sklearn_imputer_converter.py

示例5: test_objectmapper

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.preprocessing.Binarizer, pp.Binarizer)
        self.assertIs(df.preprocessing.FunctionTransformer,
                      pp.FunctionTransformer)
        self.assertIs(df.preprocessing.Imputer, pp.Imputer)
        self.assertIs(df.preprocessing.KernelCenterer, pp.KernelCenterer)
        self.assertIs(df.preprocessing.LabelBinarizer, pp.LabelBinarizer)
        self.assertIs(df.preprocessing.LabelEncoder, pp.LabelEncoder)
        self.assertIs(df.preprocessing.MultiLabelBinarizer, pp.MultiLabelBinarizer)
        self.assertIs(df.preprocessing.MaxAbsScaler, pp.MaxAbsScaler)
        self.assertIs(df.preprocessing.MinMaxScaler, pp.MinMaxScaler)
        self.assertIs(df.preprocessing.Normalizer, pp.Normalizer)
        self.assertIs(df.preprocessing.OneHotEncoder, pp.OneHotEncoder)
        self.assertIs(df.preprocessing.PolynomialFeatures, pp.PolynomialFeatures)
        self.assertIs(df.preprocessing.RobustScaler, pp.RobustScaler)
        self.assertIs(df.preprocessing.StandardScaler, pp.StandardScaler) 
开发者ID:pandas-ml,项目名称:pandas-ml,代码行数:19,代码来源:test_preprocessing.py

示例6: test_transform_1d_frame_int

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def test_transform_1d_frame_int(self):
        arr = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3])
        idx = pd.Index('a b c d e f g h i'.split(' '))
        df = pdml.ModelFrame(arr, index=idx, columns=['X'])
        self.assertEqual(len(df.columns), 1)

        # reshape arr to 2d
        arr = arr.reshape(-1, 1)

        if pd.compat.PY3:
            models = ['Binarizer', 'Imputer', 'StandardScaler']
            # MinMaxScalar raises TypeError in ufunc
        else:
            models = ['Binarizer', 'Imputer', 'StandardScaler', 'MinMaxScaler']

        for model in models:
            mod1 = getattr(df.preprocessing, model)()
            mod2 = getattr(pp, model)()

            self._assert_transform(df, arr, mod1, mod2)

            mod1 = getattr(df.preprocessing, model)()
            mod2 = getattr(pp, model)()
            self._assert_fit_transform(df, arr, mod1, mod2) 
开发者ID:pandas-ml,项目名称:pandas-ml,代码行数:26,代码来源:test_preprocessing.py

示例7: test_Imputer

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def test_Imputer(self):
        arr = np.array([1, np.nan, 3, 2])
        s = pdml.ModelSeries(arr)

        mod1 = s.pp.Imputer(axis=0)
        s.fit(mod1)
        result = s.transform(mod1)

        expected = np.array([1, 2, 3, 2])

        self.assertIsInstance(result, pdml.ModelSeries)
        self.assert_numpy_array_almost_equal(result.values, expected)

        mod1 = s.pp.Imputer(axis=0)
        result = s.fit_transform(mod1)

        self.assertIsInstance(result, pdml.ModelSeries)
        self.assert_numpy_array_almost_equal(result.values, expected) 
开发者ID:pandas-ml,项目名称:pandas-ml,代码行数:20,代码来源:test_preprocessing.py

示例8: FeatureCombination

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def FeatureCombination(Df,s='',num_feature=2): 
    feature_set = []
    for c in Df.columns:
        if c.startswith(s): feature_set.append(c)
    print('combining', len(feature_set), 'features')
    data = Df[feature_set].values

    for c in Df.columns:
        if Df[c].dtype == 'object':
            lbl = preprocessing.LabelEncoder()
            lbl.fit(list(Df[c].values))
            Df[c] = lbl.transform(list(Df[c].values))
            
    imp = preprocessing.Imputer()
    data = imp.fit_transform(data)
    data = preprocessing.scale(data)
    pca = PCA(num_feature)
    pca.fit(data)
    print('explained_variance_ratio_:', pca.explained_variance_ratio_)
    trans = pca.transform(data)
    for i in range(0,num_feature):
        Df[s+'_%d'%(i+1)] = trans[:,i]
    Df.drop(feature_set,1,inplace=True)
    return Df 
开发者ID:LenzDu,项目名称:Kaggle-Competition-Sberbank,代码行数:26,代码来源:Utils.py

示例9: test_imputer

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def test_imputer(self):
        try:
            model = Imputer(missing_values='NaN', strategy='mean', axis=0)
        except TypeError:
            model = Imputer(missing_values=np.nan, strategy='mean')
            model.axis = 0
        data = [[1, 2], [np.nan, 3], [7, 6]]
        model.fit(data)
        from onnxmltools.convert.coreml.convert import convert
        import coremltools  # noqa
        try:
            model_coreml = coremltools.converters.sklearn.convert(model)
        except ValueError as e:
            if 'not supported' in str(e):
                # Python 2.7 + scikit-learn 0.22
                return
        model_onnx = convert(model_coreml.get_spec())
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(np.array(data, dtype=np.float32),
                            model, model_onnx, basename="CmlImputerMeanFloat32") 
开发者ID:onnx,项目名称:onnxmltools,代码行数:22,代码来源:test_cml_ImputerConverter.py

示例10: impute_data

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def impute_data(self,x):
        """Imputes data set containing Nan values"""
        imp = Imputer(missing_values='NaN', strategy='mean', axis=0)
        return imp.fit_transform(x) 
开发者ID:lacava,项目名称:few,代码行数:6,代码来源:few.py

示例11: __init__

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def __init__(self, params, dataset):
        """Initializes a UMAPTransformer object.

        Args:
            params (Namespace): Contains parameters used to instantiate the transformer.
            dataset (Dataset): Dataset used to "train" the projection mapping.
        """

        # TODO: decide whether to make n_epochs a parameter
        #default_n_epochs = None
        default_n_epochs = 500

        if params.prediction_type == 'classification':
            target_metric = 'categorical'
        else:
            target_metric = 'l2'
        self.scaler = RobustScaler()
        # Use Imputer to replace missing values (NaNs) with means for each column
        self.imputer = Imputer()
        scaled_X = self.scaler.fit_transform(self.imputer.fit_transform(dataset.X))
        self.mapper = umap.UMAP(n_neighbors=params.umap_neighbors, 
                                n_components=params.umap_dim,
                                metric=params.umap_metric,
                                target_metric=target_metric,
                                target_weight=params.umap_targ_wt,
                                min_dist=params.umap_min_dist,
                                n_epochs=default_n_epochs)
        # TODO: How to deal with multitask data?
        self.mapper.fit(scaled_X, y=dataset.y.flatten())

    # **************************************************************************************** 
开发者ID:ATOMconsortium,项目名称:AMPL,代码行数:33,代码来源:transformations.py

示例12: get_clf_pipeline

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def get_clf_pipeline():
    clf = models.DefaultClassifier(
            GradientBoostingClassifier(
                       loss='deviance', learning_rate=0.01, n_estimators=3000,
                       subsample=0.6, min_samples_split=12, min_samples_leaf=12,
                       max_depth=6, random_state=1357, verbose=0)
           )
    steps = [('features', models.FeatureSelector()),
             ('Impute', Imputer(strategy='median')),
             ('scaler', StandardScaler()),
             ('clf', clf)]
    return Pipeline(steps) 
开发者ID:songgc,项目名称:loan-default-prediction,代码行数:14,代码来源:train_predict.py

示例13: get_reg_pipeline

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def get_reg_pipeline():
    clf = models.PartialRegressor(
            GradientBoostingRegressor(loss='ls', learning_rate=0.0075, n_estimators=5000,
                 subsample=0.5, min_samples_split=20, min_samples_leaf=20, max_leaf_nodes=30,
                 random_state=9753, verbose=0)
            )
    steps = [('features', models.FeatureSelector()),
             ('Impute', Imputer(strategy='median')),
             ('scaler', StandardScaler()),
             ('clf', clf)]
    return Pipeline(steps) 
开发者ID:songgc,项目名称:loan-default-prediction,代码行数:13,代码来源:train_predict.py

示例14: __init__

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def __init__(self, max_iter=10, initial_strategy='mean', tol=1e-3, f_model="RandomForest"):
        self.max_iter = max_iter
        self.initial_strategy = initial_strategy
        self.initial_imputer = Imputer(strategy=initial_strategy)
        self.tol = tol
        self.f_model = f_model 
开发者ID:log0ymxm,项目名称:predictive_imputer,代码行数:8,代码来源:predictive_imputer.py

示例15: data_handlemissing

# 需要导入模块: from sklearn import preprocessing [as 别名]
# 或者: from sklearn.preprocessing import Imputer [as 别名]
def data_handlemissing(dataframe, pipeline):
    try:
        if pipeline['options']['type'] == "dropcolumns":
            thresh = pipeline['options']['thresh']
            if thresh == -1:
                dataframe.dropna(axis=1, how="all", inplace=True)
            elif thresh == 0:
                dataframe.dropna(axis=1, how="any", inplace=True)
            elif thresh > 0:
                dataframe.dropna(axis=1, thresh=thresh, inplace=True)
        elif pipeline['options']['type'] == "droprows":
            thresh = pipeline['options']['thresh']
            if thresh == -1:
                dataframe.dropna(axis=0, how="all", inplace=True)
            elif thresh == 0:
                dataframe.dropna(axis=0, how="any", inplace=True)
            elif thresh > 0:
                dataframe.dropna(axis=0, thresh=thresh)
        elif pipeline['options']['type'] == "fillmissing":
            strategy = pipeline['options']['strategy']
            imp = Imputer(missing_values='NaN', strategy=strategy, axis=0)
            array = imp.fit_transform(dataframe.values)
            dataframe = pandas.DataFrame(array, columns = dataframe.columns)

        return dataframe
    except Exception as e:
        raise Exception("data_handlemissing: " + str(e)) 
开发者ID:tech-quantum,项目名称:sia-cog,代码行数:29,代码来源:pipelinecomponents.py


注:本文中的sklearn.preprocessing.Imputer方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。