当前位置: 首页>>代码示例>>Python>>正文


Python EmpiricalCovariance.mahalanobis方法代码示例

本文整理汇总了Python中sklearn.covariance.EmpiricalCovariance.mahalanobis方法的典型用法代码示例。如果您正苦于以下问题:Python EmpiricalCovariance.mahalanobis方法的具体用法?Python EmpiricalCovariance.mahalanobis怎么用?Python EmpiricalCovariance.mahalanobis使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.covariance.EmpiricalCovariance的用法示例。


在下文中一共展示了EmpiricalCovariance.mahalanobis方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_covariance

# 需要导入模块: from sklearn.covariance import EmpiricalCovariance [as 别名]
# 或者: from sklearn.covariance.EmpiricalCovariance import mahalanobis [as 别名]
def test_covariance():
    """Tests Covariance module on a simple dataset.

    """
    # test covariance fit from data
    cov = EmpiricalCovariance()
    cov.fit(X)
    assert_array_almost_equal(empirical_covariance(X), cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(empirical_covariance(X)), 0)
    assert_almost_equal(
        cov.error_norm(empirical_covariance(X), norm='spectral'), 0)
    assert_almost_equal(
        cov.error_norm(empirical_covariance(X), norm='frobenius'), 0)
    assert_almost_equal(
        cov.error_norm(empirical_covariance(X), scaling=False), 0)
    assert_almost_equal(
        cov.error_norm(empirical_covariance(X), squared=False), 0)
    # Mahalanobis distances computation test
    mahal_dist = cov.mahalanobis(X)
    assert(np.amax(mahal_dist) < 250)
    assert(np.amin(mahal_dist) > 50)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    cov = EmpiricalCovariance()
    cov.fit(X_1d)
    assert_array_almost_equal(empirical_covariance(X_1d), cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(empirical_covariance(X_1d)), 0)
    assert_almost_equal(
        cov.error_norm(empirical_covariance(X_1d), norm='spectral'), 0)

    # test integer type
    X_integer = np.asarray([[0, 1], [1, 0]])
    result = np.asarray([[0.25, -0.25], [-0.25, 0.25]])
    assert_array_almost_equal(empirical_covariance(X_integer), result)
开发者ID:forkloop,项目名称:scikit-learn,代码行数:37,代码来源:test_covariance.py

示例2: OneClassMahalanobis

# 需要导入模块: from sklearn.covariance import EmpiricalCovariance [as 别名]
# 或者: from sklearn.covariance.EmpiricalCovariance import mahalanobis [as 别名]
class OneClassMahalanobis(BaseClassifier):
    _fit_params = ['perc_keep']
    _predict_params = []
    def __init__(self,*args, **kwargs):
#         BaseClassifier.__init__(self, *args, **kwargs)
        self.perc_keep = kwargs["perc_keep"]
    
    def fit(self, data):
        nu = 0.01
        n_sample  = data.shape[0]
        n_feature = data.shape[1]
        
        exclude = set()
        for d in range(n_feature):
            feature = data[:, d]
            s_feature = feature.copy()
            s_feature.sort()
            low = s_feature[int(n_sample*nu/2)]
            upp = s_feature[n_sample-int(n_sample*nu/2)]

            exld = numpy.nonzero(numpy.logical_or((feature > upp),(feature < low)))[0]
            [exclude.add(e) for e in exld]
            
        use = numpy.array([f for f in range(n_sample) if f not in exclude])
        
        data_ = data[use, :]
            
        self.cov = EmpiricalCovariance().fit(data_)
        
        dist = self.cov.mahalanobis(data)
        
        self.cutoff = numpy.percentile(dist, self.perc_keep)
        print self.cutoff
    

    
    def predict(self, data):
        mahal_dist = self.cov.mahalanobis(data)
        self.mahal_dist = mahal_dist
        print mahal_dist.min(), mahal_dist.max(), self.cutoff, (mahal_dist > self.cutoff).sum(), "of", len(mahal_dist)
        
        return (mahal_dist > self.cutoff).astype(numpy.uint8)*-2+1
    
    def decision_function(self, data=None):
        return self.mahal_dist
开发者ID:CellH5,项目名称:cellh5apps,代码行数:47,代码来源:learner.py

示例3: Mahalanobis

# 需要导入模块: from sklearn.covariance import EmpiricalCovariance [as 别名]
# 或者: from sklearn.covariance.EmpiricalCovariance import mahalanobis [as 别名]
class Mahalanobis (BaseEstimator):
    """Mahalanobis distance estimator. Uses Covariance estimate
    to compute mahalanobis distance of the observations
    from the model.

    Parameters
    ----------
    robust : boolean to determine wheter to use robust estimator
        based on Minimum Covariance Determinant computation
    """
    def __init__(self, robust=False):
        if not robust:
            from sklearn.covariance import EmpiricalCovariance as CovarianceEstimator #
        else:
            from sklearn.covariance import MinCovDet as CovarianceEstimator #
        self.model = CovarianceEstimator()
        self.cov = None
    def fit(self, X, y=None, **params):
        """Fits the covariance model according to the given training
        data and parameters.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training data, where n_samples is the number of samples and
            n_features is the number of features.

        Returns
        -------
        self : object
            Returns self.
        """
        self.cov = self.model.fit(X)
        return self
    def score(self, X, y=None):
        """Computes the mahalanobis distances of given observations.

        The provided observations are assumed to be centered. One may want to
        center them using a location estimate first.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
          The observations, the Mahalanobis distances of the which we compute.

        Returns
        -------
        mahalanobis_distance : array, shape = [n_observations,]
            Mahalanobis distances of the observations.
        """

        #return self.model.score(X,assume_centered=True)
        return - self.model.mahalanobis(X-self.model.location_) ** 0.33
开发者ID:pborky,项目名称:pynfsa,代码行数:55,代码来源:models.py

示例4: test_covariance

# 需要导入模块: from sklearn.covariance import EmpiricalCovariance [as 别名]
# 或者: from sklearn.covariance.EmpiricalCovariance import mahalanobis [as 别名]
def test_covariance():
    """Tests Covariance module on a simple dataset.

    """
    # test covariance fit from data
    cov = EmpiricalCovariance()
    cov.fit(X)
    emp_cov = empirical_covariance(X)
    assert_array_almost_equal(emp_cov, cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(emp_cov), 0)
    assert_almost_equal(
        cov.error_norm(emp_cov, norm='spectral'), 0)
    assert_almost_equal(
        cov.error_norm(emp_cov, norm='frobenius'), 0)
    assert_almost_equal(
        cov.error_norm(emp_cov, scaling=False), 0)
    assert_almost_equal(
        cov.error_norm(emp_cov, squared=False), 0)
    assert_raises(NotImplementedError,
                  cov.error_norm, emp_cov, norm='foo')
    # Mahalanobis distances computation test
    mahal_dist = cov.mahalanobis(X)
    print(np.amin(mahal_dist), np.amax(mahal_dist))
    assert(np.amin(mahal_dist) > 0)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    cov = EmpiricalCovariance()
    cov.fit(X_1d)
    assert_array_almost_equal(empirical_covariance(X_1d), cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(empirical_covariance(X_1d)), 0)
    assert_almost_equal(
        cov.error_norm(empirical_covariance(X_1d), norm='spectral'), 0)

    # test with one sample
    # FIXME I don't know what this test does
    X_1sample = np.arange(5)
    cov = EmpiricalCovariance()
    assert_warns(UserWarning, cov.fit, X_1sample)
    assert_array_almost_equal(cov.covariance_,
                              np.zeros(shape=(5, 5), dtype=np.float64))

    # test integer type
    X_integer = np.asarray([[0, 1], [1, 0]])
    result = np.asarray([[0.25, -0.25], [-0.25, 0.25]])
    assert_array_almost_equal(empirical_covariance(X_integer), result)

    # test centered case
    cov = EmpiricalCovariance(assume_centered=True)
    cov.fit(X)
    assert_array_equal(cov.location_, np.zeros(X.shape[1]))
开发者ID:HapeMask,项目名称:scikit-learn,代码行数:53,代码来源:test_covariance.py

示例5: mahalanobis_plot

# 需要导入模块: from sklearn.covariance import EmpiricalCovariance [as 别名]
# 或者: from sklearn.covariance.EmpiricalCovariance import mahalanobis [as 别名]
def mahalanobis_plot(ctry=None, df=None, weighted=True, inliers=False):
    """
    See http://scikit-learn.org/0.13/modules/outlier_detection.html#\
        fitting-an-elliptic-envelop

    for details.
    """
    if df is None and ctry is None:
        raise ValueError('Either the country or a dataframe must be supplied')
    elif df is None:
        df = load_res(ctry, weighted=weighted)
    if inliers:
        df = get_inliers(df=df)
    X = df.values
    robust_cov = MinCovDet().fit(X)
    #-----------------------------------------------------------------------------
    # compare estimators learnt from the full data set with true parameters
    emp_cov = EmpiricalCovariance().fit(X)
    #-----------------------------------------------------------------------------
    # Display results
    fig = plt.figure()
    fig.subplots_adjust(hspace=-.1, wspace=.4, top=.95, bottom=.05)
    #-----------------------------------------------------------------------------
    # Show data set
    ax1 = fig.add_subplot(1, 1, 1)
    ax1.scatter(X[:, 0], X[:, 1], alpha=.5, color='k', marker='.')
    ax1.set_title(country_code[ctry])
    #-----------------------------------------------------------------------------
    # Show contours of the distance functions
    xx, yy = np.meshgrid(np.linspace(ax1.get_xlim()[0], ax1.get_xlim()[1],
                                     100),
                         np.linspace(ax1.get_ylim()[0], ax1.get_ylim()[1],
                                     100))
    zz = np.c_[xx.ravel(), yy.ravel()]
    #-----------------------------------------------------------------------------
    mahal_emp_cov = emp_cov.mahalanobis(zz)
    mahal_emp_cov = mahal_emp_cov.reshape(xx.shape)
    emp_cov_contour = ax1.contour(xx, yy, np.sqrt(mahal_emp_cov),
                                  cmap=plt.cm.PuBu_r,
                                  linestyles='dashed')
    #-----------------------------------------------------------------------------
    mahal_robust_cov = robust_cov.mahalanobis(zz)
    mahal_robust_cov = mahal_robust_cov.reshape(xx.shape)
    robust_contour = ax1.contour(xx, yy, np.sqrt(mahal_robust_cov),
                                 cmap=plt.cm.YlOrBr_r, linestyles='dotted')
    ax1.legend([emp_cov_contour.collections[1], robust_contour.collections[1]],
               ['MLE dist', 'robust dist'],
               loc="upper right", borderaxespad=0)
    ax1.grid()
    return (fig, ax1, ctry)
开发者ID:RaoUmer,项目名称:data-wrangling,代码行数:52,代码来源:outliers_after_weighting.py

示例6: test_covariance

# 需要导入模块: from sklearn.covariance import EmpiricalCovariance [as 别名]
# 或者: from sklearn.covariance.EmpiricalCovariance import mahalanobis [as 别名]
def test_covariance():
    """Tests Covariance module on a simple dataset.

    """
    # test covariance fit from data
    cov = EmpiricalCovariance()
    cov.fit(X)
    emp_cov = empirical_covariance(X)
    assert_array_almost_equal(emp_cov, cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(emp_cov), 0)
    assert_almost_equal(cov.error_norm(emp_cov, norm="spectral"), 0)
    assert_almost_equal(cov.error_norm(emp_cov, norm="frobenius"), 0)
    assert_almost_equal(cov.error_norm(emp_cov, scaling=False), 0)
    assert_almost_equal(cov.error_norm(emp_cov, squared=False), 0)
    assert_raises(NotImplementedError, cov.error_norm, emp_cov, norm="foo")
    # Mahalanobis distances computation test
    mahal_dist = cov.mahalanobis(X)
    print(np.amin(mahal_dist), np.amax(mahal_dist))
    assert np.amin(mahal_dist) > 0

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    cov = EmpiricalCovariance()
    cov.fit(X_1d)
    assert_array_almost_equal(empirical_covariance(X_1d), cov.covariance_, 4)
    assert_almost_equal(cov.error_norm(empirical_covariance(X_1d)), 0)
    assert_almost_equal(cov.error_norm(empirical_covariance(X_1d), norm="spectral"), 0)

    # test with one sample
    X_1sample = np.arange(5)
    cov = EmpiricalCovariance()
    with warnings.catch_warnings(record=True):
        cov.fit(X_1sample)

    # test integer type
    X_integer = np.asarray([[0, 1], [1, 0]])
    result = np.asarray([[0.25, -0.25], [-0.25, 0.25]])
    assert_array_almost_equal(empirical_covariance(X_integer), result)

    # test centered case
    cov = EmpiricalCovariance(assume_centered=True)
    cov.fit(X)
    assert_array_equal(cov.location_, np.zeros(X.shape[1]))
开发者ID:mugiro,项目名称:elm-python,代码行数:45,代码来源:test_covariance.py

示例7: OneClassMahalanobis

# 需要导入模块: from sklearn.covariance import EmpiricalCovariance [as 别名]
# 或者: from sklearn.covariance.EmpiricalCovariance import mahalanobis [as 别名]
class OneClassMahalanobis(BaseClassifier):
    _fit_params = []
    def __init__(self, *args, **kwargs):
        pass
    
    def fit(self, data):
        #self.cov = MinCovDet().fit(data)
        self.cov = EmpiricalCovariance().fit(data)
    
    def predict(self, data):
        mahal_emp_cov = self.cov.mahalanobis(data)
        d = data.shape[1]
        thres = scipy.stats.chi2.ppf(0.95, d)
        
        self.mahal_emp_cov = mahal_emp_cov
        
        return (mahal_emp_cov > thres).astype(numpy.int32)*-2+1
    
    def decision_function(self, data):
        return self.mahal_emp_cov
开发者ID:sommerc,项目名称:cellh5apps,代码行数:22,代码来源:learner.py

示例8:

# 需要导入模块: from sklearn.covariance import EmpiricalCovariance [as 别名]
# 或者: from sklearn.covariance.EmpiricalCovariance import mahalanobis [as 别名]
# Show data set
subfig1 = plt.subplot(3, 1, 1)
inlier_plot = subfig1.scatter(X[:, 0], X[:, 1],
                              color='black', label='inliers')
outlier_plot = subfig1.scatter(X[:, 0][-n_outliers:], X[:, 1][-n_outliers:],
                               color='red', label='outliers')
subfig1.set_xlim(subfig1.get_xlim()[0], 11.)
subfig1.set_title("Mahalanobis distances of a contaminated data set:")

# Show contours of the distance functions
xx, yy = np.meshgrid(np.linspace(plt.xlim()[0], plt.xlim()[1], 100),
                     np.linspace(plt.ylim()[0], plt.ylim()[1], 100))
zz = np.c_[xx.ravel(), yy.ravel()]

mahal_emp_cov = emp_cov.mahalanobis(zz)
mahal_emp_cov = mahal_emp_cov.reshape(xx.shape)
emp_cov_contour = subfig1.contour(xx, yy, np.sqrt(mahal_emp_cov),
                                  cmap=plt.cm.PuBu_r,
                                  linestyles='dashed')

mahal_robust_cov = robust_cov.mahalanobis(zz)
mahal_robust_cov = mahal_robust_cov.reshape(xx.shape)
robust_contour = subfig1.contour(xx, yy, np.sqrt(mahal_robust_cov),
                                 cmap=plt.cm.YlOrBr_r, linestyles='dotted')

subfig1.legend([emp_cov_contour.collections[1], robust_contour.collections[1],
                inlier_plot, outlier_plot],
               ['MLE dist', 'robust dist', 'inliers', 'outliers'],
               loc="upper right", borderaxespad=0)
plt.xticks(())
开发者ID:tkskow,项目名称:ProsjektOppgave,代码行数:32,代码来源:mahalanobis_test.py

示例9:

# 需要导入模块: from sklearn.covariance import EmpiricalCovariance [as 别名]
# 或者: from sklearn.covariance.EmpiricalCovariance import mahalanobis [as 别名]
# Show data set
subfig1 = pl.subplot(3, 1, 1)
inlier_plot = subfig1.scatter(X[:, 0], X[:, 1],
                              color='black', label='inliers')
outlier_plot = subfig1.scatter(X[:, 0][-n_outliers:], X[:, 1][-n_outliers:],
                               color='red', label='outliers')
subfig1.set_xlim(subfig1.get_xlim()[0], 11.)
subfig1.set_title("Mahalanobis distances of a contaminated data set:")

# Show contours of the distance functions
xx, yy = np.meshgrid(np.linspace(pl.xlim()[0], pl.xlim()[1], 100),
                     np.linspace(pl.ylim()[0], pl.ylim()[1], 100))
zz = np.c_[xx.ravel(), yy.ravel()]

mahal_emp_cov = emp_cov.mahalanobis(zz)
mahal_emp_cov = mahal_emp_cov.reshape(xx.shape)
emp_cov_contour = subfig1.contour(xx, yy, np.sqrt(mahal_emp_cov),
                                  cmap=pl.cm.PuBu_r,
                                  linestyles='dashed')

mahal_robust_cov = robust_cov.mahalanobis(zz)
mahal_robust_cov = mahal_robust_cov.reshape(xx.shape)
robust_contour = subfig1.contour(xx, yy, np.sqrt(mahal_robust_cov),
                                 cmap=pl.cm.YlOrBr_r, linestyles='dotted')

subfig1.legend([emp_cov_contour.collections[1], robust_contour.collections[1],
                inlier_plot, outlier_plot],
               ['MLE dist', 'robust dist', 'inliers', 'outliers'],
               loc="upper right", borderaxespad=0)
pl.xticks(())
开发者ID:FH235918,项目名称:scikit-learn,代码行数:32,代码来源:plot_mahalanobis_distances.py

示例10: main

# 需要导入模块: from sklearn.covariance import EmpiricalCovariance [as 别名]
# 或者: from sklearn.covariance.EmpiricalCovariance import mahalanobis [as 别名]
def main():
    parser = argparse.ArgumentParser(
        description='Plot outlier-like distances for a 2-dimensional dataset')
    parser.add_argument(
        'dataset', type=argparse.FileType('r'),
        help='a CSV file containing the dataset')
    parser.add_argument(
        '--plot', type=str, choices=['train', 'grid'], default='grid',
        help='plot the dataset or a grid evenly distributed over its span')
    parser.add_argument(
        '--plotdims', type=int, choices=[2, 3], default=2,
        help='the number of dimensions to plot')

    args = parser.parse_args()

    X = np.loadtxt(args.dataset, delimiter=',')
    fig = plt.figure()

    xformer = NullTransformer()

    if X.shape[1] > 2:
        xformer = PCA(n_components=2)
        X = xformer.fit_transform(X)

    if args.plotdims == 2:
        plt.scatter(X[:, 0], X[:, 1], s=60, linewidth='0')
    else:
        plt.scatter(X[:, 0], X[:, 1])
    plt.show(block=False)

    path_to_script = os.path.realpath(__file__)
    dir_of_script = os.path.dirname(path_to_script)
    dataset_path = dir_of_script + '/outliers.npy'
    np.save(dataset_path, X)
    
    ###########################################################################
    # Train autoencoder with the n samples until convergence.  Run
    # evenly distributed samples through the autoencoder and compute
    # their reconstruction error.
    ###########################################################################

    maxseq_orig = np.max(X)
    minseq_orig = np.min(X)
    seqrange = np.abs(maxseq_orig - minseq_orig)
    maxseq = maxseq_orig + 0.5 * seqrange
    minseq = minseq_orig - 0.5 * seqrange
    print("minseq", minseq, "maxseq", maxseq)
    if args.plot == 'grid':
        seq = np.linspace(minseq, maxseq, num=50, endpoint=True)
        Xplot = np.array([_ for _ in product(seq, seq)])
    else:
        Xplot = X

    robust_cov = MinCovDet().fit(X)
    robust_md = robust_cov.mahalanobis(Xplot)

    empirical_cov = EmpiricalCovariance().fit(X)
    empirical_md = empirical_cov.mahalanobis(Xplot)

    # Assume Xplot is at least 2-dimensional.
    if Xplot.shape[1] > 2:
        Xplot2d = bh_sne(Xplot)
    else:
        Xplot2d = Xplot

    robust_md01 = robust_md - np.nanmin(robust_md)
    robust_md01 = robust_md01 / np.nanmax(robust_md01)

    empirical_md01 = empirical_md - np.nanmin(empirical_md)
    empirical_md01 = empirical_md01 / np.nanmax(empirical_md01)

    fig = plt.figure()
    if args.plotdims == 2:
        ax = fig.add_subplot(1, 1, 1)
        ax.scatter(Xplot2d[:, 0], Xplot2d[:, 1], 
            cmap=plt.cm.jet, c=robust_md01, s=60, linewidth='0')
    else:
        ax = fig.add_subplot(1, 1, 1, projection='3d')
        ax.plot_trisurf(Xplot2d[:, 0], Xplot2d[:, 1], robust_md01,
            cmap=plt.cm.jet, color=robust_md01)
        ax.set_zlabel('Mahalanobis distance')
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_title('Mahalanobis distance (robust covariance)')

    fig = plt.figure()
    if args.plotdims == 2:
        ax = fig.add_subplot(1, 1, 1)
        ax.scatter(Xplot2d[:, 0], Xplot2d[:, 1], 
            cmap=plt.cm.jet, c=empirical_md01, s=60, linewidth='0')
    else:
        ax = fig.add_subplot(1, 1, 1, projection='3d')
        ax.plot_trisurf(Xplot2d[:, 0], Xplot2d[:, 1], empirical_md01,
            cmap=plt.cm.jet, color=empirical_md01)
        ax.set_zlabel('Mahalanobis distance')

    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_title('Mahalanobis distance (empirical covariance)')
    
#.........这里部分代码省略.........
开发者ID:Libardo1,项目名称:modeling,代码行数:103,代码来源:outliers.py

示例11:

# 需要导入模块: from sklearn.covariance import EmpiricalCovariance [as 别名]
# 或者: from sklearn.covariance.EmpiricalCovariance import mahalanobis [as 别名]
offset_bottom = fig.subplotpars.bottom
width = fig.subplotpars.right - offset_left
subfig1 = pl.subplot(3, 1, 1)
subfig2 = pl.subplot(3, 1, 2)
subfig3 = pl.subplot(3, 1, 3)

# Show data set
subfig1.scatter(X[:, 0], X[:, 1], color='black', label='inliers')
subfig1.scatter(X[:, 0][-n_outliers:], X[:, 1][-n_outliers:],
                color='red', label='outliers')
subfig1.set_xlim(subfig1.get_xlim()[0], 11.)
subfig1.set_title("Mahalanobis distances of a contaminated data set:")
subfig1.legend(loc="upper right")

# Empirical covariance -based Mahalanobis distances
subfig2.scatter(np.arange(n_samples), emp_cov.mahalanobis(X),
                color='black', label='inliers')
subfig2.scatter(np.arange(n_samples)[-n_outliers:],
                emp_cov.mahalanobis(X)[-n_outliers:],
                color='red', label='outliers')
subfig2.set_ylabel("Mahal. dist.")
subfig2.set_title("1. from empirical estimates")
subfig2.axes.set_position(pos=[offset_left, 0.39, width, .2])

# MCD-based Mahalanobis distances
subfig3.scatter(np.arange(n_samples), robust_cov.mahalanobis(X),
                color='black', label='inliers')
subfig3.scatter(np.arange(n_samples)[-n_outliers:],
                robust_cov.mahalanobis(X)[-n_outliers:],
                color='red', label='outliers')
subfig3.set_ylabel("Mahal. dist.")
开发者ID:bvtrach,项目名称:scikit-learn,代码行数:33,代码来源:plot_mahalanobis_distances.py

示例12: EmpiricalCovariance

# 需要导入模块: from sklearn.covariance import EmpiricalCovariance [as 别名]
# 或者: from sklearn.covariance.EmpiricalCovariance import mahalanobis [as 别名]
emp_cov = EmpiricalCovariance().fit(X)


# Display results
fig = pl.figure()

# Show data set
subfig1 = pl.subplot(3, 1, 1)
subfig1.scatter(X[:, 0], X[:, 1], color='black', label='inliers')
subfig1.scatter(X[:, 0][-n_outliers:], X[:, 1][-n_outliers:],
                color='red', label='outliers')
subfig1.set_xlim(subfig1.get_xlim()[0], 11.)
subfig1.set_title("Mahalanobis distances of a contaminated data set:")
subfig1.legend(loc="upper right")

emp_mahal = emp_cov.mahalanobis(X) ** (0.33)
subfig2 = pl.subplot(2, 2, 3)
subfig2.boxplot([emp_mahal[:-n_outliers], emp_mahal[-n_outliers:]], widths=.25)
subfig2.plot(1.26 * np.ones(n_samples - n_outliers),
             emp_mahal[:-n_outliers], '+k', markeredgewidth=1)
subfig2.plot(2.26 * np.ones(n_outliers),
             emp_mahal[-n_outliers:], '+k', markeredgewidth=1)
subfig2.axes.set_xticklabels(('inliers', 'outliers'), size=11)
subfig2.set_ylabel(r"$\sqrt[3]{\rm{(Mahal. dist.)}}$")
subfig2.set_title("1. from non-robust estimates\n(Maximum Likelihood)")

robust_mahal = robust_cov.mahalanobis(X) ** (0.33)
subfig3 = pl.subplot(2, 2, 4)
subfig3.boxplot([robust_mahal[:-n_outliers], robust_mahal[-n_outliers:]],
                widths=.25)
subfig3.plot(1.26 * np.ones(n_samples - n_outliers),
开发者ID:aravindgd,项目名称:scikit-learn,代码行数:33,代码来源:plot_mahalanobis_distances.py

示例13: ECDF

# 需要导入模块: from sklearn.covariance import EmpiricalCovariance [as 别名]
# 或者: from sklearn.covariance.EmpiricalCovariance import mahalanobis [as 别名]
# save for heuristic correction
age = df_test['var15']
age_ecdf = ECDF(df_train['var15'])
df_train['var15'] = age_ecdf(df_train['var15'])
df_test['var15'] = age_ecdf(df_test['var15'])

# feature engineering
df_train.loc[df_train['var3'] == -999999.000000, 'var3'] = 2.0
df_train['num_zeros'] = (df_train == 0).sum(axis=1)
df_test.loc[df_train['var3'] == -999999.000000, 'var3'] = 2.0
df_test['num_zeros'] = (df_test == 0).sum(axis=1)

# outliers
ec = EmpiricalCovariance()
ec = ec.fit(df_train)
m2 = ec.mahalanobis(df_train)
df_train = df_train[m2 < 40000]
df_target = df_target[m2 < 40000]

# clip
# df_test = df_test.clip(df_train.min(), df_train.max(), axis=1)

# standard preprocessing
prep = Pipeline([
    ('cd', ColumnDropper(drop=ZERO_VARIANCE_COLUMNS + CORRELATED_COLUMNS)),
    ('std', StandardScaler())
])

X_train = prep.fit_transform(df_train)
X_test = prep.transform(df_test)
y_train = df_target.values
开发者ID:dwyatte,项目名称:kaggle-santander,代码行数:33,代码来源:submission_stack.py


注:本文中的sklearn.covariance.EmpiricalCovariance.mahalanobis方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。