Python impute.SimpleImputer方法代码示例

本文整理汇总了Python中sklearn.impute.SimpleImputer方法的典型用法代码示例。如果您正苦于以下问题：Python impute.SimpleImputer方法的具体用法？Python impute.SimpleImputer怎么用？Python impute.SimpleImputer使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.impute的用法示例。

在下文中一共展示了impute.SimpleImputer方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_simple_imputation_add_indicator_sparse_matrix

# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def test_simple_imputation_add_indicator_sparse_matrix(arr_type):
    X_sparse = arr_type([
        [np.nan, 1, 5],
        [2, np.nan, 1],
        [6, 3, np.nan],
        [1, 2, 9]
    ])
    X_true = np.array([
        [3., 1., 5., 1., 0., 0.],
        [2., 2., 1., 0., 1., 0.],
        [6., 3., 5., 0., 0., 1.],
        [1., 2., 9., 0., 0., 0.],
    ])

    imputer = SimpleImputer(missing_values=np.nan, add_indicator=True)
    X_trans = imputer.fit_transform(X_sparse)

    assert sparse.issparse(X_trans)
    assert X_trans.shape == X_true.shape
    assert_allclose(X_trans.toarray(), X_true)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:22，代码来源:test_impute.py

示例2: test_imputation_most_frequent

# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def test_imputation_most_frequent():
    # Test imputation using the most-frequent strategy.
    X = np.array([
        [-1, -1, 0, 5],
        [-1, 2, -1, 3],
        [-1, 1, 3, -1],
        [-1, 2, 3, 7],
    ])

    X_true = np.array([
        [2, 0, 5],
        [2, 3, 3],
        [1, 3, 3],
        [2, 3, 7],
    ])

    # scipy.stats.mode, used in SimpleImputer, doesn't return the first most
    # frequent as promised in the doc but the lowest most frequent. When this
    # test will fail after an update of scipy, SimpleImputer will need to be
    # updated to be consistent with the new (correct) behaviour
    _check_statistics(X, X_true, "most_frequent", [np.nan, 2, 3, 3], -1)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:23，代码来源:test_impute.py

示例3: test_imputation_most_frequent_objects

# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def test_imputation_most_frequent_objects(marker):
    # Test imputation using the most-frequent strategy.
    X = np.array([
        [marker, marker, "a", "f"],
        [marker, "c", marker, "d"],
        [marker, "b", "d", marker],
        [marker, "c", "d", "h"],
    ], dtype=object)

    X_true = np.array([
        ["c", "a", "f"],
        ["c", "d", "d"],
        ["b", "d", "d"],
        ["c", "d", "h"],
    ], dtype=object)

    imputer = SimpleImputer(missing_values=marker,
                            strategy="most_frequent")
    X_trans = imputer.fit(X).transform(X)

    assert_array_equal(X_trans, X_true)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:23，代码来源:test_impute.py

示例4: test_imputation_most_frequent_pandas

# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def test_imputation_most_frequent_pandas(dtype):
    # Test imputation using the most frequent strategy on pandas df
    pd = pytest.importorskip("pandas")

    f = io.StringIO("Cat1,Cat2,Cat3,Cat4\n"
                    ",i,x,\n"
                    "a,,y,\n"
                    "a,j,,\n"
                    "b,j,x,")

    df = pd.read_csv(f, dtype=dtype)

    X_true = np.array([
        ["a", "i", "x"],
        ["a", "j", "y"],
        ["a", "j", "x"],
        ["b", "j", "x"]
    ], dtype=object)

    imputer = SimpleImputer(strategy="most_frequent")
    X_trans = imputer.fit_transform(df)

    assert_array_equal(X_trans, X_true)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:25，代码来源:test_impute.py

示例5: test_imputation_constant_float

# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def test_imputation_constant_float(array_constructor):
    # Test imputation using the constant strategy on floats
    X = np.array([
        [np.nan, 1.1, 0, np.nan],
        [1.2, np.nan, 1.3, np.nan],
        [0, 0, np.nan, np.nan],
        [1.4, 1.5, 0, np.nan]
    ])

    X_true = np.array([
        [-1, 1.1, 0, -1],
        [1.2, -1, 1.3, -1],
        [0, 0, -1, -1],
        [1.4, 1.5, 0, -1]
    ])

    X = array_constructor(X)

    X_true = array_constructor(X_true)

    imputer = SimpleImputer(strategy="constant", fill_value=-1)
    X_trans = imputer.fit_transform(X)

    assert_allclose_dense_sparse(X_trans, X_true)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:26，代码来源:test_impute.py

示例6: test_imputation_constant_object

# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def test_imputation_constant_object(marker):
    # Test imputation using the constant strategy on objects
    X = np.array([
        [marker, "a", "b", marker],
        ["c", marker, "d", marker],
        ["e", "f", marker, marker],
        ["g", "h", "i", marker]
    ], dtype=object)

    X_true = np.array([
        ["missing", "a", "b", "missing"],
        ["c", "missing", "d", "missing"],
        ["e", "f", "missing", "missing"],
        ["g", "h", "i", "missing"]
    ], dtype=object)

    imputer = SimpleImputer(missing_values=marker, strategy="constant",
                            fill_value="missing")
    X_trans = imputer.fit_transform(X)

    assert_array_equal(X_trans, X_true)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:23，代码来源:test_impute.py

示例7: test_imputation_constant_pandas

# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def test_imputation_constant_pandas(dtype):
    # Test imputation using the constant strategy on pandas df
    pd = pytest.importorskip("pandas")

    f = io.StringIO("Cat1,Cat2,Cat3,Cat4\n"
                    ",i,x,\n"
                    "a,,y,\n"
                    "a,j,,\n"
                    "b,j,x,")

    df = pd.read_csv(f, dtype=dtype)

    X_true = np.array([
        ["missing_value", "i", "x", "missing_value"],
        ["a", "missing_value", "y", "missing_value"],
        ["a", "j", "missing_value", "missing_value"],
        ["b", "j", "x", "missing_value"]
    ], dtype=object)

    imputer = SimpleImputer(strategy="constant")
    X_trans = imputer.fit_transform(df)

    assert_array_equal(X_trans, X_true)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:25，代码来源:test_impute.py

示例8: test_imputation_pipeline_grid_search

# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def test_imputation_pipeline_grid_search():
    # Test imputation within a pipeline + gridsearch.
    X = sparse_random_matrix(100, 100, density=0.10)
    missing_values = X.data[0]

    pipeline = Pipeline([('imputer',
                          SimpleImputer(missing_values=missing_values)),
                         ('tree',
                          tree.DecisionTreeRegressor(random_state=0))])

    parameters = {
        'imputer__strategy': ["mean", "median", "most_frequent"]
    }

    Y = sparse_random_matrix(100, 1, density=0.10).toarray()
    gs = GridSearchCV(pipeline, parameters)
    gs.fit(X, Y)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:19，代码来源:test_impute.py

示例9: _impute_values

# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def _impute_values(self, features):
        """Impute missing values in a feature set.

        Parameters
        ----------
        features: array-like {n_samples, n_features}
            A feature matrix

        Returns
        -------
        array-like {n_samples, n_features}
        """
        if self.verbosity > 1:
            print('Imputing missing values in feature set')

        if self._fitted_imputer is None:
            self._fitted_imputer = SimpleImputer(strategy="median")
            self._fitted_imputer.fit(features)

        return self._fitted_imputer.transform(features)

开发者ID:EpistasisLab，项目名称:tpot，代码行数:22，代码来源:base.py

示例10: get_estimator

# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def get_estimator():

    categorical_cols = ['Sex', 'Pclass', 'Embarked']
    numerical_cols = ['Age', 'SibSp', 'Parch', 'Fare']

    preprocessor = make_column_transformer(
        (OneHotEncoder(handle_unknown='ignore'), categorical_cols),
        (SimpleImputer(strategy='constant', fill_value=-1), numerical_cols),
    )

    pipeline = Pipeline([
        ('transformer', preprocessor),
        ('classifier', LogisticRegression()),
    ])

    return pipeline

开发者ID:paris-saclay-cds，项目名称:ramp-workflow，代码行数:18，代码来源:estimator.py

示例11: test_simple_imputer_float_inputs

# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def test_simple_imputer_float_inputs(self):
        model = SimpleImputer(strategy="mean", fill_value="nan")
        data = [[1, 2], [np.nan, 3], [7, 6]]
        model.fit(data)

        model_onnx = convert_sklearn(
            model,
            "scikit-learn simple imputer",
            [("input", FloatTensorType([None, 2]))],
            target_opset=TARGET_OPSET)
        self.assertTrue(model_onnx.graph.node is not None)

        # should contain only node
        self.assertEqual(len(model_onnx.graph.node), 1)

        # last node should contain the Imputer
        outputs = model_onnx.graph.output
        self.assertEqual(len(outputs), 1)
        self.assertEqual(
            outputs[0].type.tensor_type.shape.dim[-1].dim_value, 2)
        dump_data_and_model(
            np.array(data, dtype=np.float32),
            model, model_onnx,
            basename="SklearnSimpleImputerMeanFloat32")

开发者ID:onnx，项目名称:sklearn-onnx，代码行数:26，代码来源:test_sklearn_imputer_converter.py

示例12: __load_dataset__

# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def __load_dataset__(self):
        df = pd.io.stata.read_stata(self.train_file)
        orderings = []
        features = []
        for row in df.itertuples():
            orderings.append(row[4:8])
            context_feature = [float(i) if i != "." else np.NAN for i in row[13:33]]
            features.append(context_feature)
        X = np.array(features)
        X = SimpleImputer().fit_transform(X)
        X = np.array([np.log(np.array(X[:, i]) + 1) for i in range(len(features[0]))])
        X = np.array(X.T)
        self.X = StandardScaler().fit_transform(X)
        orderings = np.array(orderings) - 1
        self.Y = ranking_ordering_conversion(orderings)
        self.__check_dataset_validity__()

开发者ID:kiudee，项目名称:cs-ranking，代码行数:18，代码来源:survey_dataset_reader.py

示例13: fit

# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def fit(self, hyperparameter_config, X, train_indices, dataset_info):
        hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config)

        if dataset_info.is_sparse:
            return {'imputation_preprocessor': None, 'all_nan_columns': None}

        # delete all nan columns
        all_nan = np.all(np.isnan(X), axis=0)
        X = X[:, ~all_nan]
        dataset_info.categorical_features = [dataset_info.categorical_features[i] for i, is_nan in enumerate(all_nan) if not is_nan]

        strategy = hyperparameter_config['strategy']
        fill_value = int(np.nanmax(X)) + 1 if not dataset_info.is_sparse else 0
        numerical_imputer = SimpleImputer(strategy=strategy, copy=False)
        categorical_imputer = SimpleImputer(strategy='constant', copy=False, fill_value=fill_value)
        transformer = ColumnTransformer(
            transformers=[('numerical_imputer', numerical_imputer, [i for i, c in enumerate(dataset_info.categorical_features) if not c]),
                          ('categorical_imputer', categorical_imputer,  [i for i, c in enumerate(dataset_info.categorical_features) if c])])
        transformer.fit(X[train_indices])
        X = transformer.transform(X)
        
        dataset_info.categorical_features = sorted(dataset_info.categorical_features)
        return { 'X': X, 'imputation_preprocessor': transformer, 'dataset_info': dataset_info , 'all_nan_columns': all_nan}

开发者ID:automl，项目名称:Auto-PyTorch，代码行数:25，代码来源:imputation.py

示例14: test_imputer

# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def test_imputer(self):
        try:
            model = Imputer(missing_values='NaN', strategy='mean', axis=0)
        except TypeError:
            model = Imputer(missing_values=np.nan, strategy='mean')
            model.axis = 0
        data = [[1, 2], [np.nan, 3], [7, 6]]
        model.fit(data)
        from onnxmltools.convert.coreml.convert import convert
        import coremltools  # noqa
        try:
            model_coreml = coremltools.converters.sklearn.convert(model)
        except ValueError as e:
            if 'not supported' in str(e):
                # Python 2.7 + scikit-learn 0.22
                return
        model_onnx = convert(model_coreml.get_spec())
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(np.array(data, dtype=np.float32),
                            model, model_onnx, basename="CmlImputerMeanFloat32")

开发者ID:onnx，项目名称:onnxmltools，代码行数:22，代码来源:test_cml_ImputerConverter.py

示例15: _check_statistics

# 需要导入模块: from sklearn import impute [as 别名]
# 或者: from sklearn.impute import SimpleImputer [as 别名]
def _check_statistics(X, X_true,
                      strategy, statistics, missing_values):
    """Utility function for testing imputation for a given strategy.

    Test with dense and sparse arrays

    Check that:
        - the statistics (mean, median, mode) are correct
        - the missing values are imputed correctly"""

    err_msg = "Parameters: strategy = %s, missing_values = %s, " \
              "sparse = {0}" % (strategy, missing_values)

    assert_ae = assert_array_equal

    if X.dtype.kind == 'f' or X_true.dtype.kind == 'f':
        assert_ae = assert_array_almost_equal

    # Normal matrix
    imputer = SimpleImputer(missing_values, strategy=strategy)
    X_trans = imputer.fit(X).transform(X.copy())
    assert_ae(imputer.statistics_, statistics,
              err_msg=err_msg.format(False))
    assert_ae(X_trans, X_true, err_msg=err_msg.format(False))

    # Sparse matrix
    imputer = SimpleImputer(missing_values, strategy=strategy)
    imputer.fit(sparse.csc_matrix(X))
    X_trans = imputer.transform(sparse.csc_matrix(X.copy()))

    if sparse.issparse(X_trans):
        X_trans = X_trans.toarray()

    assert_ae(imputer.statistics_, statistics,
              err_msg=err_msg.format(True))
    assert_ae(X_trans, X_true, err_msg=err_msg.format(True))

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:38，代码来源:test_impute.py

注：本文中的sklearn.impute.SimpleImputer方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。