当前位置: 首页>>代码示例>>Python>>正文


Python MinCovDet.mahalanobis方法代码示例

本文整理汇总了Python中sklearn.covariance.MinCovDet.mahalanobis方法的典型用法代码示例。如果您正苦于以下问题:Python MinCovDet.mahalanobis方法的具体用法?Python MinCovDet.mahalanobis怎么用?Python MinCovDet.mahalanobis使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.covariance.MinCovDet的用法示例。


在下文中一共展示了MinCovDet.mahalanobis方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: getMahalanobisRobust

# 需要导入模块: from sklearn.covariance import MinCovDet [as 别名]
# 或者: from sklearn.covariance.MinCovDet import mahalanobis [as 别名]
def getMahalanobisRobust(dat, critical_alpha = 0.01, good_rows = np.zeros(0)):

    '''Calculate the Mahalanobis distance from the sample vector.'''
    
    
    if good_rows.size == 0:
        good_rows = np.any(~np.isnan(dat), axis=1);
    
    #import pdb
    #pdb.set_trace()

    try:

        robust_cov = MinCovDet().fit(dat[good_rows])
        mahalanobis_dist = np.sqrt(robust_cov.mahalanobis(dat))
    except ValueError:
        #this step will fail if the covariance matrix is not singular. This happens if the data is not 
        #a unimodal symetric distribution. For example there is too many small noisy particles. Therefore
        #I will take a safe option and return zeros in the mahalanobis distance if this is the case.
        mahalanobis_dist = np.zeros(dat.shape[0])

    #critial distance of the maholanobis distance using the chi-square distirbution
    #https://en.wikiversity.org/wiki/Mahalanobis%27_distance
    #http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.chi2.html
    maha_lim = chi2.ppf(1-critical_alpha, dat.shape[1])
    outliers = mahalanobis_dist>maha_lim
    
    return mahalanobis_dist, outliers, maha_lim
开发者ID:KezhiLi,项目名称:Multiworm_Tracking,代码行数:30,代码来源:getFilteredSkels.py

示例2: launch_mcd_on_dataset

# 需要导入模块: from sklearn.covariance import MinCovDet [as 别名]
# 或者: from sklearn.covariance.MinCovDet import mahalanobis [as 别名]
def launch_mcd_on_dataset(n_samples, n_features, n_outliers, tol_loc, tol_cov,
                          tol_support):

    rand_gen = np.random.RandomState(0)
    data = rand_gen.randn(n_samples, n_features)
    # add some outliers
    outliers_index = rand_gen.permutation(n_samples)[:n_outliers]
    outliers_offset = 10. * \
        (rand_gen.randint(2, size=(n_outliers, n_features)) - 0.5)
    data[outliers_index] += outliers_offset
    inliers_mask = np.ones(n_samples).astype(bool)
    inliers_mask[outliers_index] = False

    pure_data = data[inliers_mask]
    # compute MCD by fitting an object
    mcd_fit = MinCovDet(random_state=rand_gen).fit(data)
    T = mcd_fit.location_
    S = mcd_fit.covariance_
    H = mcd_fit.support_
    # compare with the estimates learnt from the inliers
    error_location = np.mean((pure_data.mean(0) - T) ** 2)
    assert(error_location < tol_loc)
    error_cov = np.mean((empirical_covariance(pure_data) - S) ** 2)
    assert(error_cov < tol_cov)
    assert(np.sum(H) >= tol_support)
    assert_array_almost_equal(mcd_fit.mahalanobis(data), mcd_fit.dist_)
开发者ID:JeongSeonGyo,项目名称:EnergyData,代码行数:28,代码来源:test_robust_covariance.py

示例3: mahalanobis_plot

# 需要导入模块: from sklearn.covariance import MinCovDet [as 别名]
# 或者: from sklearn.covariance.MinCovDet import mahalanobis [as 别名]
def mahalanobis_plot(ctry=None, df=None, weighted=True, inliers=False):
    """
    See http://scikit-learn.org/0.13/modules/outlier_detection.html#\
        fitting-an-elliptic-envelop

    for details.
    """
    if df is None and ctry is None:
        raise ValueError('Either the country or a dataframe must be supplied')
    elif df is None:
        df = load_res(ctry, weighted=weighted)
    if inliers:
        df = get_inliers(df=df)
    X = df.values
    robust_cov = MinCovDet().fit(X)
    #-----------------------------------------------------------------------------
    # compare estimators learnt from the full data set with true parameters
    emp_cov = EmpiricalCovariance().fit(X)
    #-----------------------------------------------------------------------------
    # Display results
    fig = plt.figure()
    fig.subplots_adjust(hspace=-.1, wspace=.4, top=.95, bottom=.05)
    #-----------------------------------------------------------------------------
    # Show data set
    ax1 = fig.add_subplot(1, 1, 1)
    ax1.scatter(X[:, 0], X[:, 1], alpha=.5, color='k', marker='.')
    ax1.set_title(country_code[ctry])
    #-----------------------------------------------------------------------------
    # Show contours of the distance functions
    xx, yy = np.meshgrid(np.linspace(ax1.get_xlim()[0], ax1.get_xlim()[1],
                                     100),
                         np.linspace(ax1.get_ylim()[0], ax1.get_ylim()[1],
                                     100))
    zz = np.c_[xx.ravel(), yy.ravel()]
    #-----------------------------------------------------------------------------
    mahal_emp_cov = emp_cov.mahalanobis(zz)
    mahal_emp_cov = mahal_emp_cov.reshape(xx.shape)
    emp_cov_contour = ax1.contour(xx, yy, np.sqrt(mahal_emp_cov),
                                  cmap=plt.cm.PuBu_r,
                                  linestyles='dashed')
    #-----------------------------------------------------------------------------
    mahal_robust_cov = robust_cov.mahalanobis(zz)
    mahal_robust_cov = mahal_robust_cov.reshape(xx.shape)
    robust_contour = ax1.contour(xx, yy, np.sqrt(mahal_robust_cov),
                                 cmap=plt.cm.YlOrBr_r, linestyles='dotted')
    ax1.legend([emp_cov_contour.collections[1], robust_contour.collections[1]],
               ['MLE dist', 'robust dist'],
               loc="upper right", borderaxespad=0)
    ax1.grid()
    return (fig, ax1, ctry)
开发者ID:RaoUmer,项目名称:data-wrangling,代码行数:52,代码来源:outliers_after_weighting.py

示例4: estimateGaussian

# 需要导入模块: from sklearn.covariance import MinCovDet [as 别名]
# 或者: from sklearn.covariance.MinCovDet import mahalanobis [as 别名]
def estimateGaussian(nb_objects_init, nb_objects_final, thr, who, genes, siRNA,
                     loadingFolder = '../resultData/thrivisions/predictions',
                     threshold=0.05,):
    
    arr=np.vstack((thr, nb_objects_init, nb_objects_final)).T    
    #deleting siRNAs that have only one experiment
    print len(siRNA)
    all_=Counter(siRNA);siRNA = np.array(siRNA)
    toDelsi=filter(lambda x: all_[x]==1, all_)
    toDelInd=[]
    for si in toDelsi:
        toDelInd.extend(np.where(siRNA==si)[0])
    print len(toDelInd)
    dd=dict(zip(range(4), [arr, who, genes, siRNA]))
    for array_ in dd:
        dd[array_]=np.delete(dd[array_],toDelInd,0 )
    arr, who, genes, siRNA = [dd[el] for el in range(4)]
    
    print arr.shape
    
    arr_ctrl=arr[np.where(np.array(genes)=='ctrl')]
    ctrlcov=MinCovDet().fit(arr_ctrl)
    
    robdist= ctrlcov.mahalanobis(arr)*np.sign(arr[:,0]-np.mean(arr[:,0]))
    new_siRNA=np.array(siRNA)[np.where((genes!='ctrl')&(robdist>0))]
    pval,qval =empiricalPvalues(np.absolute(robdist[np.where(genes=='ctrl')])[:, np.newaxis],\
                           robdist[np.where((genes!='ctrl')&(robdist>0))][:, np.newaxis],\
                           folder=loadingFolder, name="thrivision", sup=True, also_pval=True)
    assert new_siRNA.shape==qval.shape
    hits=Counter(new_siRNA[np.where(qval<threshold)[0]])
    
    hits=filter(lambda x: float(hits[x])/all_[x]>=0.5, hits)
    gene_hits = [genes[list(siRNA).index(el)] for el in hits]
    gene_hits=Counter(gene_hits)
    
    return robdist, pval,qval, hits, gene_hits
开发者ID:PeterJackNaylor,项目名称:Xb_screen,代码行数:38,代码来源:thrivision.py

示例5:

# 需要导入模块: from sklearn.covariance import MinCovDet [as 别名]
# 或者: from sklearn.covariance.MinCovDet import mahalanobis [as 别名]
                               color='red', label='outliers')
subfig1.set_xlim(subfig1.get_xlim()[0], 11.)
subfig1.set_title("Mahalanobis distances of a contaminated data set:")

# Show contours of the distance functions
xx, yy = np.meshgrid(np.linspace(plt.xlim()[0], plt.xlim()[1], 100),
                     np.linspace(plt.ylim()[0], plt.ylim()[1], 100))
zz = np.c_[xx.ravel(), yy.ravel()]

mahal_emp_cov = emp_cov.mahalanobis(zz)
mahal_emp_cov = mahal_emp_cov.reshape(xx.shape)
emp_cov_contour = subfig1.contour(xx, yy, np.sqrt(mahal_emp_cov),
                                  cmap=plt.cm.PuBu_r,
                                  linestyles='dashed')

mahal_robust_cov = robust_cov.mahalanobis(zz)
mahal_robust_cov = mahal_robust_cov.reshape(xx.shape)
robust_contour = subfig1.contour(xx, yy, np.sqrt(mahal_robust_cov),
                                 cmap=plt.cm.YlOrBr_r, linestyles='dotted')

subfig1.legend([emp_cov_contour.collections[1], robust_contour.collections[1],
                inlier_plot, outlier_plot],
               ['MLE dist', 'robust dist', 'inliers', 'outliers'],
               loc="upper right", borderaxespad=0)
plt.xticks(())
plt.yticks(())

# Plot the scores for each point
# emp_mahal = emp_cov.mahalanobis(X - np.mean(X, 0)) ** (0.33)
# subfig2 = plt.subplot(2, 2, 3)
# subfig2.boxplot([emp_mahal[:-n_outliers], emp_mahal[-n_outliers:]], widths=.25)
开发者ID:tkskow,项目名称:ProsjektOppgave,代码行数:33,代码来源:mahalanobis_test.py

示例6:

# 需要导入模块: from sklearn.covariance import MinCovDet [as 别名]
# 或者: from sklearn.covariance.MinCovDet import mahalanobis [as 别名]
                               color='red', label='outliers')
subfig1.set_xlim(subfig1.get_xlim()[0], 11.)
subfig1.set_title("Mahalanobis distances of a contaminated data set:")

# Show contours of the distance functions
xx, yy = np.meshgrid(np.linspace(pl.xlim()[0], pl.xlim()[1], 100),
                     np.linspace(pl.ylim()[0], pl.ylim()[1], 100))
zz = np.c_[xx.ravel(), yy.ravel()]

mahal_emp_cov = emp_cov.mahalanobis(zz)
mahal_emp_cov = mahal_emp_cov.reshape(xx.shape)
emp_cov_contour = subfig1.contour(xx, yy, np.sqrt(mahal_emp_cov),
                                  cmap=pl.cm.PuBu_r,
                                  linestyles='dashed')

mahal_robust_cov = robust_cov.mahalanobis(zz)
mahal_robust_cov = mahal_robust_cov.reshape(xx.shape)
robust_contour = subfig1.contour(xx, yy, np.sqrt(mahal_robust_cov),
                                 cmap=pl.cm.YlOrBr_r, linestyles='dotted')

subfig1.legend([emp_cov_contour.collections[1], robust_contour.collections[1],
                inlier_plot, outlier_plot],
               ['MLE dist', 'robust dist', 'inliers', 'outliers'],
               loc="upper right", borderaxespad=0)
pl.xticks(())
pl.yticks(())

# Plot the scores for each point
emp_mahal = emp_cov.mahalanobis(X - np.mean(X, 0)) ** (0.33)
subfig2 = pl.subplot(2, 2, 3)
subfig2.boxplot([emp_mahal[:-n_outliers], emp_mahal[-n_outliers:]], widths=.25)
开发者ID:FH235918,项目名称:scikit-learn,代码行数:33,代码来源:plot_mahalanobis_distances.py

示例7: main

# 需要导入模块: from sklearn.covariance import MinCovDet [as 别名]
# 或者: from sklearn.covariance.MinCovDet import mahalanobis [as 别名]
def main():
    parser = argparse.ArgumentParser(
        description='Plot outlier-like distances for a 2-dimensional dataset')
    parser.add_argument(
        'dataset', type=argparse.FileType('r'),
        help='a CSV file containing the dataset')
    parser.add_argument(
        '--plot', type=str, choices=['train', 'grid'], default='grid',
        help='plot the dataset or a grid evenly distributed over its span')
    parser.add_argument(
        '--plotdims', type=int, choices=[2, 3], default=2,
        help='the number of dimensions to plot')

    args = parser.parse_args()

    X = np.loadtxt(args.dataset, delimiter=',')
    fig = plt.figure()

    xformer = NullTransformer()

    if X.shape[1] > 2:
        xformer = PCA(n_components=2)
        X = xformer.fit_transform(X)

    if args.plotdims == 2:
        plt.scatter(X[:, 0], X[:, 1], s=60, linewidth='0')
    else:
        plt.scatter(X[:, 0], X[:, 1])
    plt.show(block=False)

    path_to_script = os.path.realpath(__file__)
    dir_of_script = os.path.dirname(path_to_script)
    dataset_path = dir_of_script + '/outliers.npy'
    np.save(dataset_path, X)
    
    ###########################################################################
    # Train autoencoder with the n samples until convergence.  Run
    # evenly distributed samples through the autoencoder and compute
    # their reconstruction error.
    ###########################################################################

    maxseq_orig = np.max(X)
    minseq_orig = np.min(X)
    seqrange = np.abs(maxseq_orig - minseq_orig)
    maxseq = maxseq_orig + 0.5 * seqrange
    minseq = minseq_orig - 0.5 * seqrange
    print("minseq", minseq, "maxseq", maxseq)
    if args.plot == 'grid':
        seq = np.linspace(minseq, maxseq, num=50, endpoint=True)
        Xplot = np.array([_ for _ in product(seq, seq)])
    else:
        Xplot = X

    robust_cov = MinCovDet().fit(X)
    robust_md = robust_cov.mahalanobis(Xplot)

    empirical_cov = EmpiricalCovariance().fit(X)
    empirical_md = empirical_cov.mahalanobis(Xplot)

    # Assume Xplot is at least 2-dimensional.
    if Xplot.shape[1] > 2:
        Xplot2d = bh_sne(Xplot)
    else:
        Xplot2d = Xplot

    robust_md01 = robust_md - np.nanmin(robust_md)
    robust_md01 = robust_md01 / np.nanmax(robust_md01)

    empirical_md01 = empirical_md - np.nanmin(empirical_md)
    empirical_md01 = empirical_md01 / np.nanmax(empirical_md01)

    fig = plt.figure()
    if args.plotdims == 2:
        ax = fig.add_subplot(1, 1, 1)
        ax.scatter(Xplot2d[:, 0], Xplot2d[:, 1], 
            cmap=plt.cm.jet, c=robust_md01, s=60, linewidth='0')
    else:
        ax = fig.add_subplot(1, 1, 1, projection='3d')
        ax.plot_trisurf(Xplot2d[:, 0], Xplot2d[:, 1], robust_md01,
            cmap=plt.cm.jet, color=robust_md01)
        ax.set_zlabel('Mahalanobis distance')
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_title('Mahalanobis distance (robust covariance)')

    fig = plt.figure()
    if args.plotdims == 2:
        ax = fig.add_subplot(1, 1, 1)
        ax.scatter(Xplot2d[:, 0], Xplot2d[:, 1], 
            cmap=plt.cm.jet, c=empirical_md01, s=60, linewidth='0')
    else:
        ax = fig.add_subplot(1, 1, 1, projection='3d')
        ax.plot_trisurf(Xplot2d[:, 0], Xplot2d[:, 1], empirical_md01,
            cmap=plt.cm.jet, color=empirical_md01)
        ax.set_zlabel('Mahalanobis distance')

    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_title('Mahalanobis distance (empirical covariance)')
    
#.........这里部分代码省略.........
开发者ID:Libardo1,项目名称:modeling,代码行数:103,代码来源:outliers.py

示例8: ols

# 需要导入模块: from sklearn.covariance import MinCovDet [as 别名]
# 或者: from sklearn.covariance.MinCovDet import mahalanobis [as 别名]
lm2 = ols('word_diff ~ Age + C(Centre_ID)',
         data=clean_st,subset=subset).fit()

print(lm2.summary())

# <markdowncell>

# # Snippets. Might come back to this later:

# <codecell>

from scipy.stats import pearsonr
from sklearn.covariance import MinCovDet

# just look at what's interesting for now, and drop the NAs involved
clean = st_v_merged.loc[:,['norm_diff','Interview_Suggested_Ranking_numerical_']]
clean = clean.dropna(axis=0)

# calculate robust covariance estimate, calculate what's too far away
mcd = MinCovDet()
mcd.fit(clean)

pearsonr(clean.iloc[:,0],clean.iloc[:,1])

# <codecell>

d = mcd.mahalanobis(clean)
d.sort()
d

开发者ID:kenben,项目名称:Suas,代码行数:31,代码来源:volunteer_quickLook.py

示例9: __init__

# 需要导入模块: from sklearn.covariance import MinCovDet [as 别名]
# 或者: from sklearn.covariance.MinCovDet import mahalanobis [as 别名]
class Grid:
    def __init__(self, dim=10, noise=0.1, outliers=0):
        self.points = create_grid(dim, noise, outliers)
        self.polardata = np.zeros((len(self.points), 2))
        self.polar_cov = 0
        self.inlier_points = np.zeros((len(self.points), 2))
        self.inlier_indicies = np.zeros((len(self.points), 1))
        self.normalized_points = np.zeros((len(self.points), 2))
        self.theta = 0
        self.step_size = 1
        self.linedata = np.zeros((3*len(self.points), 2))
        self.normalized_point_ids = []
        self.bounds = [0, 0, 0, 0]

    def step(self, rotation=0):
        self.points = rotate(self.points, rotation)

    def analyze(self, mahalanobis_tolerance=2):
        self.inlier_points = np.zeros((len(self.points), 2))
        for id1 in range(len(self.points)):
            id2 = closest_point(self.points, self.points[id1], id1)[0]

            #keep lines fro plotting purposes
            self.linedata[3*id1] = self.points[id1]
            self.linedata[3*id1+1] = self.points[id2]
            self.linedata[3*id1+2] = [None, None]

            # we are repeating every pi/2, so we compress the angle space by 4x
            a = 4*math.atan2((self.points[id1, 1] - self.points[id2, 1]), (self.points[id1, 0] - self.points[id2, 0]))
            r = np.linalg.norm(self.points[id1] - self.points[id2])
            self.polardata[id1] = [r*math.cos(a), r*math.sin(a)]

        #find the minimal covariance inlier cluster
        self.polar_cov = MinCovDet().fit(self.polardata)

        # extract the grid angle and size.  angle is divided by 4 because
        # we previously scaled it up to repeat every 90 deg
        self.theta = math.atan2(-self.polar_cov.location_[1], self.polar_cov.location_[0])/4
        self.step_size = np.linalg.norm(self.polar_cov.location_)

        # extract inlier points
        polar_mahal = self.polar_cov.mahalanobis(self.polardata)**(0.33)
        inlier_count = 0
        for i in range(len(polar_mahal)):
            if polar_mahal[i] < mahalanobis_tolerance: # stdev tolerance to outliers
                self.inlier_points[inlier_count] = self.points[i]
                self.inlier_indicies[inlier_count] = i
                inlier_count += 1

        self.normalized_points = rotate(self.inlier_points[:inlier_count], -self.theta)/self.step_size

        #enumerate grid IDs
        origin_id = closest_point(self.normalized_points, np.mean(self.normalized_points))[0]
        self.normalized_points = self.normalized_points - self.normalized_points[origin_id]
        inlier_count = 0

        self.bounds = [sys.maxint, sys.maxint, -sys.maxint, -sys.maxint]
        for p in self.normalized_points:
            x = round(p[0])
            y = round(p[1])
            d = np.linalg.norm(p-[x, y])
            if d < 0.4: #tolerance from unit position
                self.normalized_points[inlier_count] = [x, y]
                if (x < self.bounds[0]):
                    self.bounds[0] = x
                if (x > self.bounds[2]):
                    self.bounds[2] = x
                if (y < self.bounds[1]):
                    self.bounds[1] = y
                if (y > self.bounds[3]):
                    self.bounds[3] = y
                inlier_count += 1

        self.normalized_points = self.normalized_points[:inlier_count]
开发者ID:jcl5m1,项目名称:CVToolsPython,代码行数:76,代码来源:GridTest.py

示例10:

# 需要导入模块: from sklearn.covariance import MinCovDet [as 别名]
# 或者: from sklearn.covariance.MinCovDet import mahalanobis [as 别名]
subfig1.scatter(X[:, 0][-n_outliers:], X[:, 1][-n_outliers:],
                color='red', label='outliers')
subfig1.set_xlim(subfig1.get_xlim()[0], 11.)
subfig1.set_title("Mahalanobis distances of a contaminated data set:")
subfig1.legend(loc="upper right")

emp_mahal = emp_cov.mahalanobis(X) ** (0.33)
subfig2 = pl.subplot(2, 2, 3)
subfig2.boxplot([emp_mahal[:-n_outliers], emp_mahal[-n_outliers:]], widths=.25)
subfig2.plot(1.26 * np.ones(n_samples - n_outliers),
             emp_mahal[:-n_outliers], '+k', markeredgewidth=1)
subfig2.plot(2.26 * np.ones(n_outliers),
             emp_mahal[-n_outliers:], '+k', markeredgewidth=1)
subfig2.axes.set_xticklabels(('inliers', 'outliers'), size=11)
subfig2.set_ylabel(r"$\sqrt[3]{\rm{(Mahal. dist.)}}$")
subfig2.set_title("1. from non-robust estimates\n(Maximum Likelihood)")

robust_mahal = robust_cov.mahalanobis(X) ** (0.33)
subfig3 = pl.subplot(2, 2, 4)
subfig3.boxplot([robust_mahal[:-n_outliers], robust_mahal[-n_outliers:]],
                widths=.25)
subfig3.plot(1.26 * np.ones(n_samples - n_outliers),
             robust_mahal[:-n_outliers], '+k', markeredgewidth=1)
subfig3.plot(2.26 * np.ones(n_outliers),
             robust_mahal[-n_outliers:], '+k', markeredgewidth=1)
subfig3.axes.set_xticklabels(('inliers', 'outliers'), size=11)
subfig3.set_ylabel(r"$\sqrt[3]{\rm{(Mahal. dist.)}}$")
subfig3.set_title("2. from robust estimates\n(Minimum Covariance Determinant)")

pl.show()
开发者ID:aravindgd,项目名称:scikit-learn,代码行数:32,代码来源:plot_mahalanobis_distances.py


注:本文中的sklearn.covariance.MinCovDet.mahalanobis方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。