本文整理汇总了Python中sklearn.covariance.EllipticEnvelope.fit方法的典型用法代码示例。如果您正苦于以下问题:Python EllipticEnvelope.fit方法的具体用法?Python EllipticEnvelope.fit怎么用?Python EllipticEnvelope.fit使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.covariance.EllipticEnvelope
的用法示例。
在下文中一共展示了EllipticEnvelope.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: calc
# 需要导入模块: from sklearn.covariance import EllipticEnvelope [as 别名]
# 或者: from sklearn.covariance.EllipticEnvelope import fit [as 别名]
def calc(self,outliers_fraction):
data, dqs, raw = self.get_data()
clf = EllipticEnvelope(contamination=outliers_fraction)
X = zip(data['Tbandwidth'],data['Tlatency'],data['Tframerate'])
clf.fit(X)
#data['y_pred'] = clf.decision_function(X).ravel()
#data['y_pred'] = clf.decision_function(X).ravel()
#threshold = np.percentile(data['y_pred'],100 * outliers_fraction)
data['MDist']=clf.mahalanobis(X)
#picking "bad" outliers, not good ones
outliers = chi2_outliers(data, [.8,.9,.95], 3)
#print outliers
outliers = [i[i['Tbandwidth']<i['Tlatency']] for i in outliers]
#outliers = data[data['y_pred']<threshold]
#data['y_pred'] = data['y_pred'] > threshold
#outliers = [x[['ticketid','MDist']].merge(raw, how='inner').drop_duplicates() for x in outliers]
#print raw
#outliers = [raw[raw['ticketid'].isin(j['ticketid'])] for j in outliers]
outliers = [k[k['Tframerate']<(k['Tframerate'].mean()+k['Tframerate'].std())] for k in outliers] #making sure we don't remove aberrantly good framrates
outliers = [t.sort_values(by='MDist', ascending=False).drop_duplicates().drop(['Tbandwidth','Tlatency','Tframerate'],axis=1) for t in outliers]
#dqs = raw[raw['ticketid'].isin(dqs['ticketid'])]
#data = data.sort_values('MDist', ascending=False).drop_duplicates()
return outliers, dqs, data.sort_values(by='MDist', ascending=False).drop_duplicates().drop(['Tbandwidth','Tlatency','Tframerate'],axis=1)
示例2: clean_series
# 需要导入模块: from sklearn.covariance import EllipticEnvelope [as 别名]
# 或者: from sklearn.covariance.EllipticEnvelope import fit [as 别名]
def clean_series(self, token, discard=5):
"""
Remove outliers from the ratio series for a token.
Args:
discard (int): Drop the most outlying X% of the data.
Returns: OrderedDict{year: wpm}
"""
series = self.ratios[token]
X = np.array(list(series.values()))[:, np.newaxis]
env = EllipticEnvelope()
env.fit(X)
# Score each data point.
y_pred = env.decision_function(X).ravel()
# Get the discard threshold.
threshold = stats.scoreatpercentile(y_pred, discard)
return OrderedDict([
(year, ratio)
for (year, ratio), pred in zip(series.items(), y_pred)
if pred > threshold
])
示例3: filter_remove_outlayers
# 需要导入模块: from sklearn.covariance import EllipticEnvelope [as 别名]
# 或者: from sklearn.covariance.EllipticEnvelope import fit [as 别名]
def filter_remove_outlayers(self, flat, minimum_value=0):
"""
Remove outlayers using ellicptic envelope from scikits learn
:param flat:
:param minimum_value:
:return:
"""
from sklearn.covariance import EllipticEnvelope
flat0 = flat.copy()
flat0[np.isnan(flat)] = 0
x,y = np.nonzero(flat0)
# print np.prod(flat.shape)
# print len(y)
z = flat[(x,y)]
data = np.asarray([x,y,z]).T
clf = EllipticEnvelope(contamination=.1)
clf.fit(data)
y_pred = clf.decision_function(data)
out_inds = y_pred < minimum_value
flat[(x[out_inds], y[out_inds])] = np.NaN
return flat
示例4: outlier_removal2
# 需要导入模块: from sklearn.covariance import EllipticEnvelope [as 别名]
# 或者: from sklearn.covariance.EllipticEnvelope import fit [as 别名]
def outlier_removal2(features, samples, cv_predict):
outliers_fraction = 0.1
print cv_predict.shape
print samples.shape
test = np.column_stack((cv_predict, samples))
#clf = EllipticEnvelope(contamination=.1)
clf = EllipticEnvelope(contamination=.1)
#clf = svm.OneClassSVM(nu=0.95 * outliers_fraction + 0.05,
# kernel="rbf", gamma=0.1)
clf.fit(test)
y_pred = clf.decision_function(test).ravel()
threshold = stats.scoreatpercentile(y_pred,
100 * outliers_fraction)
y_pred_new = y_pred > threshold
print y_pred_new
#print samples[y_pred_new]
print samples.shape
print samples[y_pred_new].shape
print features.shape
print features[y_pred_new].shape
return features[y_pred_new], samples[y_pred_new]
示例5: plot
# 需要导入模块: from sklearn.covariance import EllipticEnvelope [as 别名]
# 或者: from sklearn.covariance.EllipticEnvelope import fit [as 别名]
def plot(X, y):
proj = TSNE().fit_transform(X)
e = EllipticEnvelope(assume_centered=True, contamination=.25) # Outlier detection
e.fit(X)
good = np.where(e.predict(X) == 1)
X = X[good]
y = y[good]
scatter(proj, y)
示例6: filterOut
# 需要导入模块: from sklearn.covariance import EllipticEnvelope [as 别名]
# 或者: from sklearn.covariance.EllipticEnvelope import fit [as 别名]
def filterOut(x):
x = np.array(x)
outliers_fraction=0.05
#clf = svm.OneClassSVM(nu=0.95 * outliers_fraction + 0.05, kernel="rbf", gamma=0.1)
clf = EllipticEnvelope(contamination=outliers_fraction)
clf.fit(x)
y_pred = clf.decision_function(x).ravel()
threshold = stats.scoreatpercentile(y_pred,
100 * outliers_fraction)
y_pred = y_pred > threshold
return y_pred
示例7: test_outlier_detection
# 需要导入模块: from sklearn.covariance import EllipticEnvelope [as 别名]
# 或者: from sklearn.covariance.EllipticEnvelope import fit [as 别名]
def test_outlier_detection():
"""
"""
rnd = np.random.RandomState(0)
X = rnd.randn(100, 10)
clf = EllipticEnvelope(contamination=0.1)
clf.fit(X)
y_pred = clf.predict(X)
assert_array_almost_equal(clf.decision_function(X, raw_mahalanobis=True), clf.mahalanobis(X - clf.location_))
assert_almost_equal(clf.score(X, np.ones(100)), (100 - y_pred[y_pred == -1].size) / 100.0)
示例8: module4
# 需要导入模块: from sklearn.covariance import EllipticEnvelope [as 别名]
# 或者: from sklearn.covariance.EllipticEnvelope import fit [as 别名]
def module4(self):
'''
入力された一次元配列からanomaly detectionを用いて外れ値を検出する
'''
# get data
img = cv2.imread('../saliency_detection/image/pearl.png')
b,g,r = cv2.split(img)
B,G,R = map(lambda x,y,z: x*1. - (y*1. + z*1.)/2., [b,g,r],[r,r,g],[g,b,b])
Y = (r*1. + g*1.)/2. - np.abs(r*1. - g*1.)/2. - b*1.
# 負の部分は0にする
R[R<0] = 0
G[G<0] = 0
B[B<0] = 0
Y[Y<0] = 0
rg = cv2.absdiff(R,G)
by = cv2.absdiff(B,Y)
img1 = rg
img2 = by
rg, by = map(lambda x:x.reshape((len(b[0])*len(b[:,0]),1)),[rg,by])
data = np.hstack((rg,by))
data = data.astype(np.float64)
data = np.delete(data, range( 0,len(data[:,0]),2),0)
# grid
xx1, yy1 = np.meshgrid(np.linspace(-10, 300, 500), np.linspace(-10, 300, 500))
# 学習して境界を求める # contamination大きくすると円は小さく
clf = EllipticEnvelope(support_fraction=1, contamination=0.01)
print 'data.shape =>',data.shape
print 'learning...'
clf.fit(data) #学習 # 0があるとだめっぽいかも
print 'complete learning!'
# 学習した分類器に基づいてデータを分類して楕円を描画
z1 = clf.decision_function(np.c_[xx1.ravel(), yy1.ravel()])
z1 = z1.reshape(xx1.shape)
plt.contour(xx1,yy1,z1,levels=[0],linewidths=2,colors='r')
# plot
plt.scatter(data[:,0],data[:,1],color= 'black')
plt.title("Outlier detection")
plt.xlim((xx1.min(), xx1.max()))
plt.ylim((yy1.min(), yy1.max()))
plt.pause(.001)
# plt.show()
cv2.imshow('rg',img1/np.amax(img1))
cv2.imshow('by',img2/np.amax(img2))
示例9: test_outlier_detection
# 需要导入模块: from sklearn.covariance import EllipticEnvelope [as 别名]
# 或者: from sklearn.covariance.EllipticEnvelope import fit [as 别名]
def test_outlier_detection():
rnd = np.random.RandomState(0)
X = rnd.randn(100, 10)
clf = EllipticEnvelope(contamination=0.1)
assert_raises(NotFittedError, clf.predict, X)
assert_raises(NotFittedError, clf.decision_function, X)
clf.fit(X)
y_pred = clf.predict(X)
decision = clf.decision_function(X, raw_values=True)
decision_transformed = clf.decision_function(X, raw_values=False)
assert_array_almost_equal(decision, clf.mahalanobis(X))
assert_array_almost_equal(clf.mahalanobis(X), clf.dist_)
assert_almost_equal(clf.score(X, np.ones(100)), (100 - y_pred[y_pred == -1].size) / 100.0)
assert sum(y_pred == -1) == sum(decision_transformed < 0)
示例10: ellipticenvelope
# 需要导入模块: from sklearn.covariance import EllipticEnvelope [as 别名]
# 或者: from sklearn.covariance.EllipticEnvelope import fit [as 别名]
def ellipticenvelope(data, fraction = 0.02):
elenv = EllipticEnvelope(contamination=fraction)
elenv.fit(data)
score = elenv.predict(data)
numeration = [[i] for i in xrange(1, len(data)+1, 1)]
numeration = np.array(numeration)
y = np.hstack((numeration, score))
anomalies = numeration
for num,s in y:
if (y == 1):
y = np.delete(anomalies, num-1, axis=0)
return anomalies
示例11: labelValidSkeletons
# 需要导入模块: from sklearn.covariance import EllipticEnvelope [as 别名]
# 或者: from sklearn.covariance.EllipticEnvelope import fit [as 别名]
def labelValidSkeletons(skel_file, valid_index, trajectories_data, fit_contamination = 0.05):
#calculate valid widths if they were not used
calculate_widths(skel_file)
#calculate classifier for the outliers
X4fit = nodes2Array(skel_file, valid_index)
clf = EllipticEnvelope(contamination = fit_contamination)
clf.fit(X4fit)
#calculate outliers using the fitted classifier
X = nodes2Array(skel_file) #use all the indexes
y_pred = clf.decision_function(X).ravel() #less than zero would be an outlier
#labeled rows of valid individual skeletons as GOOD_SKE
trajectories_data['auto_label'] = ((y_pred>0).astype(np.int))*wlab['GOOD_SKE'] #+ wlab['BAD']*np.isnan(y_prev)
saveLabelData(skel_file, trajectories_data)
示例12: test_elliptic_envelope
# 需要导入模块: from sklearn.covariance import EllipticEnvelope [as 别名]
# 或者: from sklearn.covariance.EllipticEnvelope import fit [as 别名]
def test_elliptic_envelope():
rnd = np.random.RandomState(0)
X = rnd.randn(100, 10)
clf = EllipticEnvelope(contamination=0.1)
assert_raises(NotFittedError, clf.predict, X)
assert_raises(NotFittedError, clf.decision_function, X)
clf.fit(X)
y_pred = clf.predict(X)
scores = clf.score_samples(X)
decisions = clf.decision_function(X)
assert_array_almost_equal(
scores, -clf.mahalanobis(X))
assert_array_almost_equal(clf.mahalanobis(X), clf.dist_)
assert_almost_equal(clf.score(X, np.ones(100)),
(100 - y_pred[y_pred == -1].size) / 100.)
assert(sum(y_pred == -1) == sum(decisions < 0))
示例13: labelValidSkeletons
# 需要导入模块: from sklearn.covariance import EllipticEnvelope [as 别名]
# 或者: from sklearn.covariance.EllipticEnvelope import fit [as 别名]
def labelValidSkeletons(skel_file):
calculate_widths(skel_file)
#get valid rows using the trajectory displacement and the skeletonization success
valid_index, trajectories_data = getValidIndexes(skel_file)
#calculate classifier for the outliers
X4fit = nodes2Array(skel_file, valid_index)
clf = EllipticEnvelope(contamination=.1)
clf.fit(X4fit)
#calculate outliers using the fitted classifier
X = nodes2Array(skel_file)
y_pred = clf.decision_function(X).ravel() #less than zero would be an outlier
#labeled rows of valid individual skeletons as GOOD_SKE
trajectories_data['auto_label'] = ((y_pred>0).astype(np.int))*wlab['GOOD_SKE'] #+ wlab['BAD']*np.isnan(y_prev)
saveLabelData(skel_file, trajectories_data)
示例14: model_2_determine_test_data_similarity
# 需要导入模块: from sklearn.covariance import EllipticEnvelope [as 别名]
# 或者: from sklearn.covariance.EllipticEnvelope import fit [as 别名]
def model_2_determine_test_data_similarity(self,model):
clf_EE={}
model_EE={}
for i in range(len(model)):
clf=EllipticEnvelope(contamination=0.01,support_fraction=1)
clf_EE[i]=clf
EEmodel=clf.fit(model[i])
model_EE[i]=EEmodel
return clf_EE,model_EE
示例15: anomaly_detection
# 需要导入模块: from sklearn.covariance import EllipticEnvelope [as 别名]
# 或者: from sklearn.covariance.EllipticEnvelope import fit [as 别名]
def anomaly_detection(features, labels):
# In this function, I try to use anomaly detection method (using mutivariate gaussian distribution) to identify poi-s
non_pois = features[labels==0]
pois = features[labels==1]
print "non poi size", non_pois.shape, pois.shape, features.shape
## Spliting data to train, test and cross validation set for anomaly detection
split1 = produce_spliting_array(non_pois.shape[0], .75 )
X_train = non_pois[split1==1]
X_intermediate = non_pois[split1==0]
print "size intermediate", X_intermediate.shape
split2 = produce_spliting_array(X_intermediate.shape[0], .5 )
X_test = X_intermediate[split2==1]
label_test = np.zeros((X_test.shape[0],), dtype=np.int) - 1
X_cv = X_intermediate[split2==0]
label_cv = np.zeros((X_cv.shape[0],), dtype=np.int) - 1
split3 = produce_spliting_array(pois.shape[0], .5 )
X_test = np.vstack((X_test, pois[split3==1]))
label_test = np.hstack((label_test, np.ones(sum(split3), dtype=np.int)))
X_cv = np.vstack((X_cv, pois[split3==0]))
label_cv = np.hstack((label_cv, np.ones(sum(split3==0), dtype=np.int)))
print "size X_train", X_train.shape
print "size test data", X_test.shape, label_test.shape
print "size cv data", X_cv.shape, label_cv.shape
print "size splits", len(split1), len(split2), len(split3)
from sklearn.covariance import EllipticEnvelope
detector = EllipticEnvelope(contamination=.85)
detector.fit(X_train)
pred_cv = detector.predict(X_cv)
print pred_cv
print label_cv
print detector.score(X_cv, label_cv)