本文整理汇总了Python中sklearn.covariance.MinCovDet.fit方法的典型用法代码示例。如果您正苦于以下问题:Python MinCovDet.fit方法的具体用法?Python MinCovDet.fit怎么用?Python MinCovDet.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.covariance.MinCovDet
的用法示例。
在下文中一共展示了MinCovDet.fit方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_mcd_issue1127
# 需要导入模块: from sklearn.covariance import MinCovDet [as 别名]
# 或者: from sklearn.covariance.MinCovDet import fit [as 别名]
def test_mcd_issue1127():
# Check that the code does not break with X.shape = (3, 1)
# (i.e. n_support = n_samples)
rnd = np.random.RandomState(0)
X = rnd.normal(size=(3, 1))
mcd = MinCovDet()
mcd.fit(X)
示例2: ols
# 需要导入模块: from sklearn.covariance import MinCovDet [as 别名]
# 或者: from sklearn.covariance.MinCovDet import fit [as 别名]
lm2 = ols('word_diff ~ Age + C(Centre_ID)',
data=clean_st,subset=subset).fit()
print(lm2.summary())
# <markdowncell>
# # Snippets. Might come back to this later:
# <codecell>
from scipy.stats import pearsonr
from sklearn.covariance import MinCovDet
# just look at what's interesting for now, and drop the NAs involved
clean = st_v_merged.loc[:,['norm_diff','Interview_Suggested_Ranking_numerical_']]
clean = clean.dropna(axis=0)
# calculate robust covariance estimate, calculate what's too far away
mcd = MinCovDet()
mcd.fit(clean)
pearsonr(clean.iloc[:,0],clean.iloc[:,1])
# <codecell>
d = mcd.mahalanobis(clean)
d.sort()
d
示例3: Outlier_detection
# 需要导入模块: from sklearn.covariance import MinCovDet [as 别名]
# 或者: from sklearn.covariance.MinCovDet import fit [as 别名]
class Outlier_detection(object):
def __init__(self, support_fraction = 0.95, verbose = True, chi2_percentile = 0.995):
self.verbose = verbose
self.support_fraction = support_fraction
self.chi2 = stats.chi2
self.mcd = MCD(store_precision = True, support_fraction = support_fraction)
self.chi2_percentile = chi2_percentile
def fit(self, X):
"""Prints some summary stats (if verbose is one) and returns the indices of what it consider to be extreme"""
self.mcd.fit(X)
mahalanobis = lambda p: distance.mahalanobis(p, self.mcd.location_, self.mcd.precision_ )
d = np.array(map(mahalanobis, X)) #Mahalanobis distance values
self.d2 = d ** 2 #MD squared
n, self.degrees_of_freedom_ = X.shape
self.iextreme_values = (self.d2 > self.chi2.ppf(0.995, self.degrees_of_freedom_) )
if self.verbose:
print "%.3f proportion of outliers at %.3f%% chi2 percentile, "%(self.iextreme_values.sum()/float(n), self.chi2_percentile)
print "with support fraction %.2f."%self.support_fraction
return self
def plot(self,log=False, sort = False ):
"""
Cause plotting is always fun.
log: transform the distance-sq to a log ( distance-sq )
sort: sort the data according to distnace before plotting
ifollow: a set if indices to mark with yellow, useful for seeing where data lies across views.
"""
n = self.d2.shape[0]
fig = plt.figure()
x = np.arange( n )
ax = fig.add_subplot(111)
transform = (lambda x: x ) if not log else (lambda x: np.log(x))
chi_line = self.chi2.ppf(self.chi2_percentile, self.degrees_of_freedom_)
chi_line = transform( chi_line )
d2 = transform( self.d2 )
if sort:
isort = np.argsort( d2 )
ax.scatter(x, d2[isort], alpha = 0.7, facecolors='none' )
plt.plot( x, transform(self.chi2.ppf( np.linspace(0,1,n),self.degrees_of_freedom_ )), c="r", label="distribution assuming normal" )
else:
ax.scatter(x, d2 )
extreme_values = d2[ self.iextreme_values ]
ax.scatter( x[self.iextreme_values], extreme_values, color="r" )
ax.hlines( chi_line, 0, n,
label ="%.1f%% $\chi^2$ quantile"%(100*self.chi2_percentile), linestyles = "dotted" )
ax.legend()
ax.set_ylabel("distance squared")
ax.set_xlabel("observation")
ax.set_xlim(0, self.d2.shape[0])
plt.show()