當前位置: 首頁>>代碼示例>>Python>>正文


Python imputation.Imputer類代碼示例

本文整理匯總了Python中sklearn.preprocessing.imputation.Imputer的典型用法代碼示例。如果您正苦於以下問題:Python Imputer類的具體用法?Python Imputer怎麽用?Python Imputer使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


在下文中一共展示了Imputer類的10個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: _impute

def _impute(features, imputer=True):
    """
    Helper function that uses the safest imputing method to remove null values, in terms of compatibility with the data size
    @param features: the feature values that need to be imputed
    @type features: numpy.array
    @param imputer: whether or not the scikit imputing method should be used
    @type imputer: boolean
    @return: the modified feature values
    @rtype: numpy.array
    """
    if not imputer: #run imputer only if enabled (default)
        return np.nan_to_num(features)
    else:
        imp = Imputer(missing_values='NaN', strategy='mean', axis=0, verbose=2)
        try:
            impfeatures = imp.fit_transform(features)
        except ValueError as exc:
            #catch errors with illegal values (e.g. strings)
            log.warning("Exception trying to run scikit imputation: {}".format(exc))
            impfeatures = features
        #show size for debugging purposes
        #log.debug("Featurevectors {} after imputation: {}".format(impfeatures.shape, features))i

        #we don't want shgrid_scores_ape to change, so if this happens, then just replace nans with zero and infinites
        if impfeatures.shape == features.shape:
            features = impfeatures
        else:
            log.warning("Imputer failed, filtering NaN based on numpy converter")
            features = np.nan_to_num(features)
    return features
開發者ID:lefterav,項目名稱:qualitative,代碼行數:30,代碼來源:ranking.py

示例2: setUp

    def setUp(self):
        self.cwd = os.getcwd()
        tests_dir = __file__
        os.chdir(os.path.dirname(tests_dir))

        decoder = arff.ArffDecoder()
        with open(os.path.join("datasets", "dataset.arff")) as fh:
            dataset = decoder.decode(fh, encode_nominal=True)

        # -1 because the last attribute is the class
        self.attribute_types = [
            'numeric' if type(type_) != list else 'nominal'
            for name, type_ in dataset['attributes'][:-1]]
        self.categorical = [True if attribute == 'nominal' else False
                            for attribute in self.attribute_types]

        data = np.array(dataset['data'], dtype=np.float64)
        X = data[:,:-1]
        y = data[:,-1].reshape((-1,))

        ohe = OneHotEncoder(self.categorical)
        X_transformed = ohe.fit_transform(X)
        imp = Imputer(copy=False)
        X_transformed = imp.fit_transform(X_transformed)
        center = not scipy.sparse.isspmatrix((X_transformed))
        standard_scaler = StandardScaler(with_mean=center)
        X_transformed = standard_scaler.fit_transform(X_transformed)
        X_transformed = X_transformed.todense()

        # Transform the array which indicates the categorical metafeatures
        number_numerical = np.sum(~np.array(self.categorical))
        categorical_transformed = [True] * (X_transformed.shape[1] -
                                            number_numerical) + \
                                  [False] * number_numerical
        self.categorical_transformed = categorical_transformed

        self.X = X
        self.X_transformed = X_transformed
        self.y = y
        self.mf = meta_features.metafeatures
        self.helpers = meta_features.helper_functions

        # Precompute some helper functions
        self.helpers.set_value("PCA", self.helpers["PCA"]
            (self.X_transformed, self.y))
        self.helpers.set_value("MissingValues", self.helpers[
            "MissingValues"](self.X, self.y, self.categorical))
        self.helpers.set_value("NumSymbols", self.helpers["NumSymbols"](
            self.X, self.y, self.categorical))
        self.helpers.set_value("ClassOccurences",
                               self.helpers["ClassOccurences"](self.X, self.y))
        self.helpers.set_value("Skewnesses",
            self.helpers["Skewnesses"](self.X_transformed, self.y,
                                       self.categorical_transformed))
        self.helpers.set_value("Kurtosisses",
            self.helpers["Kurtosisses"](self.X_transformed, self.y,
                                        self.categorical_transformed))
開發者ID:Bryan-LL,項目名稱:auto-sklearn,代碼行數:57,代碼來源:test_meta_features.py

示例3: test_imputation_shape

def test_imputation_shape():
    # Verify the shapes of the imputed matrix for different strategies.
    X = np.random.randn(10, 2)
    X[::2] = np.nan

    for strategy in ["mean", "median", "most_frequent"]:
        imputer = Imputer(strategy=strategy)
        X_imputed = imputer.fit_transform(X)
        assert_equal(X_imputed.shape, (10, 2))
        X_imputed = imputer.fit_transform(sparse.csr_matrix(X))
        assert_equal(X_imputed.shape, (10, 2))
開發者ID:abhisg,項目名稱:scikit-learn,代碼行數:11,代碼來源:test_imputation.py

示例4: test_imputation_pickle

def test_imputation_pickle():
    """Test for pickling imputers."""
    import pickle

    l = 100
    X = sparse_random_matrix(l, l, density=0.10)

    for strategy in ["mean", "median", "most_frequent"]:
        imputer = Imputer(missing_values=0, strategy=strategy)
        imputer.fit(X)

        imputer_pickled = pickle.loads(pickle.dumps(imputer))

        assert_array_equal(imputer.transform(X.copy()),
                           imputer_pickled.transform(X.copy()),
                           "Fail to transform the data after pickling "
                           "(strategy = %s)" % (strategy))
開發者ID:DanielWeitzenfeld,項目名稱:scikit-learn,代碼行數:17,代碼來源:test_imputation.py

示例5: check_indicator

def check_indicator(X, expected_imputed_features, axis):
    n_samples, n_features = X.shape
    imputer = Imputer(missing_values=-1, strategy='mean', axis=axis)
    imputer_with_in = clone(imputer).set_params(add_indicator_features=True)
    Xt = imputer.fit_transform(X)
    Xt_with_in = imputer_with_in.fit_transform(X)
    imputed_features_mask = X[:, expected_imputed_features] == -1
    n_features_new = Xt.shape[1]
    n_imputed_features = len(imputer_with_in.imputed_features_)
    assert_array_equal(imputer.imputed_features_, expected_imputed_features)
    assert_array_equal(imputer_with_in.imputed_features_,
                       expected_imputed_features)
    assert_equal(Xt_with_in.shape,
                 (n_samples, n_features_new + n_imputed_features))
    assert_array_equal(Xt_with_in, np.hstack((Xt, imputed_features_mask)))
    imputer_with_in = clone(imputer).set_params(add_indicator_features=True)
    assert_array_equal(Xt_with_in,
                       imputer_with_in.fit_transform(sparse.csc_matrix(X)).A)
    assert_array_equal(Xt_with_in,
                       imputer_with_in.fit_transform(sparse.csr_matrix(X)).A)
開發者ID:0664j35t3r,項目名稱:scikit-learn,代碼行數:20,代碼來源:test_imputation.py

示例6: test_imputation_copy

def test_imputation_copy():
    """Test imputation with copy=True."""
    l = 5

    # Test default behaviour and with copy=True
    for params in [{}, {'copy': True}]:
        X = sparse_random_matrix(l, l, density=0.75, random_state=0)

        # Dense
        imputer = Imputer(missing_values=0, strategy="mean", **params)
        Xt = imputer.fit(X).transform(X)
        Xt[0, 0] = np.nan
        # Check that the objects are different and that they don't use
        # the same buffer
        assert_false(np.all(X.todense() == Xt))

        # Sparse
        imputer = Imputer(missing_values=0, strategy="mean", **params)
        X = X.todense()
        Xt = imputer.fit(X).transform(X)
        Xt[0, 0] = np.nan
        # Check that the objects are different and that they don't use
        # the same buffer
        assert_false(np.all(X == Xt))
開發者ID:99plus2,項目名稱:scikit-learn,代碼行數:24,代碼來源:test_imputation.py

示例7: test_imputation_copy

def test_imputation_copy():
    # Test imputation with copy
    X_orig = sparse_random_matrix(5, 5, density=0.75, random_state=0)

    # copy=True, dense => copy
    X = X_orig.copy().toarray()
    imputer = Imputer(missing_values=0, strategy="mean", copy=True)
    Xt = imputer.fit(X).transform(X)
    Xt[0, 0] = -1
    assert_false(np.all(X == Xt))

    # copy=True, sparse csr => copy
    X = X_orig.copy()
    imputer = Imputer(missing_values=X.data[0], strategy="mean", copy=True)
    Xt = imputer.fit(X).transform(X)
    Xt.data[0] = -1
    assert_false(np.all(X.data == Xt.data))

    # copy=False, dense => no copy
    X = X_orig.copy().toarray()
    imputer = Imputer(missing_values=0, strategy="mean", copy=False)
    Xt = imputer.fit(X).transform(X)
    Xt[0, 0] = -1
    assert_true(np.all(X == Xt))

    # copy=False, sparse csr, axis=1 => no copy
    X = X_orig.copy()
    imputer = Imputer(missing_values=X.data[0], strategy="mean", copy=False, axis=1)
    Xt = imputer.fit(X).transform(X)
    Xt.data[0] = -1
    assert_true(np.all(X.data == Xt.data))

    # copy=False, sparse csc, axis=0 => no copy
    X = X_orig.copy().tocsc()
    imputer = Imputer(missing_values=X.data[0], strategy="mean", copy=False, axis=0)
    Xt = imputer.fit(X).transform(X)
    Xt.data[0] = -1
    assert_true(np.all(X.data == Xt.data))

    # copy=False, sparse csr, axis=0 => copy
    X = X_orig.copy()
    imputer = Imputer(missing_values=X.data[0], strategy="mean", copy=False, axis=0)
    Xt = imputer.fit(X).transform(X)
    Xt.data[0] = -1
    assert_false(np.all(X.data == Xt.data))

    # copy=False, sparse csc, axis=1 => copy
    X = X_orig.copy().tocsc()
    imputer = Imputer(missing_values=X.data[0], strategy="mean", copy=False, axis=1)
    Xt = imputer.fit(X).transform(X)
    Xt.data[0] = -1
    assert_false(np.all(X.data == Xt.data))

    # copy=False, sparse csr, axis=1, missing_values=0 => copy
    X = X_orig.copy()
    imputer = Imputer(missing_values=0, strategy="mean", copy=False, axis=1)
    Xt = imputer.fit(X).transform(X)
    assert_false(sparse.issparse(Xt))
開發者ID:abhisg,項目名稱:scikit-learn,代碼行數:58,代碼來源:test_imputation.py

示例8: _check_statistics

def _check_statistics(X, X_true, strategy, statistics, missing_values):
    """Utility function for testing imputation for a given strategy.

    Test:
        - along the two axes
        - with dense and sparse arrays

    Check that:
        - the statistics (mean, median, mode) are correct
        - the missing values are imputed correctly"""

    err_msg = "Parameters: strategy = %s, missing_values = %s, " "axis = {0}, sparse = {1}" % (strategy, missing_values)

    # Normal matrix, axis = 0
    imputer = Imputer(missing_values, strategy=strategy, axis=0)
    X_trans = imputer.fit(X).transform(X.copy())
    assert_array_equal(imputer.statistics_, statistics, err_msg.format(0, False))
    assert_array_equal(X_trans, X_true, err_msg.format(0, False))

    # Normal matrix, axis = 1
    imputer = Imputer(missing_values, strategy=strategy, axis=1)
    imputer.fit(X.transpose())
    if np.isnan(statistics).any():
        assert_raises(ValueError, imputer.transform, X.copy().transpose())
    else:
        X_trans = imputer.transform(X.copy().transpose())
        assert_array_equal(X_trans, X_true.transpose(), err_msg.format(1, False))

    # Sparse matrix, axis = 0
    imputer = Imputer(missing_values, strategy=strategy, axis=0)
    imputer.fit(sparse.csc_matrix(X))
    X_trans = imputer.transform(sparse.csc_matrix(X.copy()))

    if sparse.issparse(X_trans):
        X_trans = X_trans.toarray()

    assert_array_equal(imputer.statistics_, statistics, err_msg.format(0, True))
    assert_array_equal(X_trans, X_true, err_msg.format(0, True))

    # Sparse matrix, axis = 1
    imputer = Imputer(missing_values, strategy=strategy, axis=1)
    imputer.fit(sparse.csc_matrix(X.transpose()))
    if np.isnan(statistics).any():
        assert_raises(ValueError, imputer.transform, sparse.csc_matrix(X.copy().transpose()))
    else:
        X_trans = imputer.transform(sparse.csc_matrix(X.copy().transpose()))

        if sparse.issparse(X_trans):
            X_trans = X_trans.toarray()

        assert_array_equal(X_trans, X_true.transpose(), err_msg.format(1, True))
開發者ID:abhisg,項目名稱:scikit-learn,代碼行數:51,代碼來源:test_imputation.py

示例9: setUp

    def setUp(self):
        self.cwd = os.getcwd()
        tests_dir = __file__
        os.chdir(os.path.dirname(tests_dir))

        decoder = arff.ArffDecoder()
        with open(os.path.join("datasets", "dataset.arff")) as fh:
            dataset = decoder.decode(fh, encode_nominal=True)

        # -1 because the last attribute is the class
        self.attribute_types = [
            'numeric' if type(type_) != list else 'nominal'
            for name, type_ in dataset['attributes'][:-1]]
        self.categorical = [True if attribute == 'nominal' else False
                            for attribute in self.attribute_types]

        data = np.array(dataset['data'], dtype=np.float64)
        X = data[:, :-1]
        y = data[:, -1].reshape((-1,))

        # First, swap NaNs and zeros, because when converting an encoded
        # dense matrix to sparse, the values which are encoded to zero are lost
        X_sparse = X.copy()
        NaNs = ~np.isfinite(X_sparse)
        X_sparse[NaNs] = 0
        X_sparse = sparse.csr_matrix(X_sparse)

        ohe = OneHotEncoder(self.categorical)
        X_transformed = X_sparse.copy()
        X_transformed = ohe.fit_transform(X_transformed)
        imp = Imputer(copy=False)
        X_transformed = imp.fit_transform(X_transformed)
        standard_scaler = StandardScaler()
        X_transformed = standard_scaler.fit_transform(X_transformed)

        # Transform the array which indicates the categorical metafeatures
        number_numerical = np.sum(~np.array(self.categorical))
        categorical_transformed = [True] * (X_transformed.shape[1] -
                                            number_numerical) + \
                                  [False] * number_numerical
        self.categorical_transformed = categorical_transformed

        self.X = X_sparse
        self.X_transformed = X_transformed
        self.y = y
        self.mf = meta_features.metafeatures
        self.helpers = meta_features.helper_functions

        # Precompute some helper functions
        self.helpers.set_value("PCA", self.helpers["PCA"]
            (self.X_transformed, self.y))
        self.helpers.set_value("MissingValues", self.helpers[
            "MissingValues"](self.X, self.y, self.categorical))
        self.mf.set_value("NumberOfMissingValues",
            self.mf["NumberOfMissingValues"](self.X, self.y, self.categorical))
        self.helpers.set_value("NumSymbols", self.helpers["NumSymbols"](
            self.X, self.y, self.categorical))
        self.helpers.set_value("ClassOccurences",
            self.helpers["ClassOccurences"](self.X, self.y))
        self.helpers.set_value("Skewnesses",
            self.helpers["Skewnesses"](self.X_transformed, self.y,
                                       self.categorical_transformed))
        self.helpers.set_value("Kurtosisses",
            self.helpers["Kurtosisses"](self.X_transformed, self.y,
                                        self.categorical_transformed))
開發者ID:Mahgoobi,項目名稱:auto-sklearn,代碼行數:65,代碼來源:test_meta_features_sparse.py

示例10: print

        count += 1
        if count % 1000 == 0:
            print(count)
        val = noncat_matrix[x, y]
        if val - math.floor(val) != 0.0:
            for i in range(20):
                if abs(abs(val) * i - math.ceil(abs(val) * i)) < 0.001:
                    X[x, 2 * y] = math.ceil(abs(val) * i)
                    X[x, 2 * y + 1] = i
    return X


# категории
print("building train")
train_cat_matr = train_df.ix[:, 0:CAT_COUNT].as_matrix()
imp = Imputer(missing_values="NaN", strategy="most_frequent", axis=0)
train_cat_matr = imp.fit_transform(train_cat_matr)
# imp2 = Imputer(missing_values='NaN', strategy='median')
train_noncat_matr = train_df.ix[:, CAT_COUNT:].fillna(0).as_matrix()
# train_noncat_matr = train_df.ix[:, CAT_COUNT:].as_matrix()
# train_noncat_matr = imp2.fit_transform(train_noncat_matr)
# allf = np.hstack((train_cat_matr, train_noncat_matr))


print("building test")
test_df.ix[:, 0:CAT_COUNT] = test_set_to_encode
test_cat_matr = test_df.ix[:, 0:CAT_COUNT].as_matrix()
test_cat_matr = imp.transform(test_cat_matr)
test_noncat_matr = test_df.ix[:, CAT_COUNT:].fillna(0).as_matrix()
# test_noncat_matr = test_df.ix[:, CAT_COUNT:].as_matrix()
# test_noncat_matr = imp2.transform(test_noncat_matr)
開發者ID:SammyVimes,項目名稱:storage_systems__classifier,代碼行數:31,代碼來源:netcover.py


注:本文中的sklearn.preprocessing.imputation.Imputer類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。