本文整理汇总了Python中sklearn.ensemble.IsolationForest.decision_function方法的典型用法代码示例。如果您正苦于以下问题:Python IsolationForest.decision_function方法的具体用法?Python IsolationForest.decision_function怎么用?Python IsolationForest.decision_function使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.IsolationForest
的用法示例。
在下文中一共展示了IsolationForest.decision_function方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_score_samples
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import decision_function [as 别名]
def test_score_samples():
X_train = [[1, 1], [1, 2], [2, 1]]
clf1 = IsolationForest(contamination=0.1).fit(X_train)
clf2 = IsolationForest().fit(X_train)
assert_array_equal(clf1.score_samples([[2., 2.]]),
clf1.decision_function([[2., 2.]]) + clf1.offset_)
assert_array_equal(clf2.score_samples([[2., 2.]]),
clf2.decision_function([[2., 2.]]) + clf2.offset_)
assert_array_equal(clf1.score_samples([[2., 2.]]),
clf2.score_samples([[2., 2.]]))
示例2: test_iforest_works
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import decision_function [as 别名]
def test_iforest_works(contamination):
# toy sample (the last two samples are outliers)
X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [6, 3], [-4, 7]]
# Test IsolationForest
clf = IsolationForest(random_state=rng, contamination=contamination)
clf.fit(X)
decision_func = -clf.decision_function(X)
pred = clf.predict(X)
# assert detect outliers:
assert_greater(np.min(decision_func[-2:]), np.max(decision_func[:-2]))
assert_array_equal(pred, 6 * [1] + 2 * [-1])
示例3: outlier_removal
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import decision_function [as 别名]
def outlier_removal(df, col, method, params):
if method == 'Isolation Forest':
do_outlier_removal = IsolationForest(**params)
if method == 'Local Outlier Factor':
do_outlier_removal = LocalOutlierFactor(**params)
else:
method == None
do_outlier_removal.fit(np.array(df[col]))
if method == 'Isolation Forest':
outlier_scores = do_outlier_removal.decision_function(np.array(df[col]))
df[('meta', 'Outlier Scores - ' + method + str(params))] = outlier_scores
is_outlier = do_outlier_removal.predict(np.array(df[col]))
df[('meta', 'Outliers - ' + method + str(params))] = is_outlier
if method == 'Local Outlier Factor':
is_outlier = do_outlier_removal.fit_predict(np.array(df[col]))
df[('meta', 'Outliers - ' + method + str(params))] = is_outlier
df[('meta', 'Outlier Factor - ' + method + str(params))] = do_outlier_removal.negative_outlier_factor_
return df, do_outlier_removal
示例4: test_iforest_performance
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import decision_function [as 别名]
def test_iforest_performance():
"""Test Isolation Forest performs well"""
# Generate train/test data
rng = check_random_state(2)
X = 0.3 * rng.randn(120, 2)
X_train = np.r_[X + 2, X - 2]
X_train = X[:100]
# Generate some abnormal novel observations
X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
X_test = np.r_[X[100:], X_outliers]
y_test = np.array([0] * 20 + [1] * 20)
# fit the model
clf = IsolationForest(max_samples=100, random_state=rng).fit(X_train)
# predict scores (the lower, the more normal)
y_pred = - clf.decision_function(X_test)
# check that there is at most 6 errors (false positive or false negative)
assert_greater(roc_auc_score(y_test, y_pred), 0.98)
示例5: print
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import decision_function [as 别名]
X_test = X[n_samples_train:, :]
y_train = y[:n_samples_train]
y_test = y[n_samples_train:]
# # training only on normal data:
# X_train = X_train[y_train == 0]
# y_train = y_train[y_train == 0]
print('IsolationForest processing...')
model = IsolationForest()
tstart = time()
model.fit(X_train)
fit_time += time() - tstart
tstart = time()
scoring = -model.decision_function(X_test) # the lower,the more normal
predict_time += time() - tstart
fpr_, tpr_, thresholds_ = roc_curve(y_test, scoring)
if predict_time + fit_time > max_time:
raise TimeoutError
f = interp1d(fpr_, tpr_)
tpr += f(x_axis)
tpr[0] = 0.
precision_, recall_ = precision_recall_curve(y_test, scoring)[:2]
# cluster: old version of scipy -> interpol1d needs sorted x_input
arg_sorted = recall_.argsort()
recall_ = recall_[arg_sorted]
示例6: test_behaviour_param
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import decision_function [as 别名]
def test_behaviour_param():
X_train = [[1, 1], [1, 2], [2, 1]]
clf1 = IsolationForest(behaviour='old').fit(X_train)
clf2 = IsolationForest(behaviour='new', contamination='auto').fit(X_train)
assert_array_equal(clf1.decision_function([[2., 2.]]),
clf2.decision_function([[2., 2.]]))
示例7: LocalOutlierFactor
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import decision_function [as 别名]
lof = LocalOutlierFactor(n_neighbors=20)
ocsvm = OneClassSVM()
lim_inf = X.min(axis=0)
lim_sup = X.max(axis=0)
volume_support = (lim_sup - lim_inf).prod()
t = np.arange(0, 100 / volume_support, 0.01 / volume_support)
axis_alpha = np.arange(alpha_min, alpha_max, 0.0001)
unif = np.random.uniform(lim_inf, lim_sup,
size=(n_generated, n_features))
# fit:
print('IsolationForest processing...')
iforest = IsolationForest()
iforest.fit(X_train)
s_X_iforest = iforest.decision_function(X_test)
print('LocalOutlierFactor processing...')
lof = LocalOutlierFactor(n_neighbors=20)
lof.fit(X_train)
s_X_lof = lof.decision_function(X_test)
print('OneClassSVM processing...')
ocsvm = OneClassSVM()
ocsvm.fit(X_train[:min(ocsvm_max_train, n_samples_train - 1)])
s_X_ocsvm = ocsvm.decision_function(X_test).reshape(1, -1)[0]
s_unif_iforest = iforest.decision_function(unif)
s_unif_lof = lof.decision_function(unif)
s_unif_ocsvm = ocsvm.decision_function(unif).reshape(1, -1)[0]
plt.subplot(121)
auc_iforest, em_iforest, amax_iforest = em(t, t_max,
volume_support,
s_unif_iforest,
示例8: IsolationForest
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import decision_function [as 别名]
# Generate some regular novel observations
X = 0.3 * rng.randn(20, 2)
X_test = np.r_[X + 2, X - 2]
# Generate some abnormal novel observations
X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
# fit the model
clf = IsolationForest(max_samples=100, random_state=rng)
clf.fit(X_train)
y_pred_train = clf.predict(X_train)
y_pred_test = clf.predict(X_test)
y_pred_outliers = clf.predict(X_outliers)
# plot the line, the samples, and the nearest vectors to the plane
xx, yy = np.meshgrid(np.linspace(-5, 5, 50), np.linspace(-5, 5, 50))
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.title("IsolationForest")
plt.contourf(xx, yy, Z, cmap=plt.cm.Blues_r)
b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white')
b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='green')
c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='red')
plt.axis('tight')
plt.xlim((-5, 5))
plt.ylim((-5, 5))
plt.legend([b1, b2, c],
["training observations",
"new regular observations", "new abnormal observations"],
loc="upper left")
示例9: Eval
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import decision_function [as 别名]
#.........这里部分代码省略.........
#that doesn't matter
trainDf['class']=0;
#spliting into training and evaluation sets
classedDf['is_train']=False
trainDf['is_train']=True
enhancedDf = enhance_flow(pd.concat([trainDf,classedDf], ignore_index=True), ftu)
# construct some vectorizers based on the data in the DF. We need to vectorize future log files the exact same way so we
# will be saving these vectorizers to a file.
vectorizers = build_vectorizers(enhancedDf, ftu, max_features=opts.maxfeaturesperbag, ngram_size=opts.ngramsize, verbose=opts.verbose)
#use the vectorizers to featureize our DF into a numeric feature dataframe
featureMatrix = featureize(enhancedDf, ftu, vectorizers, verbose=opts.verbose)
#add the class column back in (it wasn't featurized by itself)
featureMatrix['class'] = enhancedDf['class']
featureMatrix['is_train'] = enhancedDf['is_train']
#split out the train and test df's into separate objects
train, test = featureMatrix[featureMatrix['is_train']==True], featureMatrix[featureMatrix['is_train']==False]
#drop the is_train column, we don't need it anymore
train = train.drop('is_train', axis=1)
test = test.drop('is_train', axis=1)
#print('Calculating features')
Trees=opts.numtrees
Samples=opts.numsamples
clf = IsolationForest(n_estimators=Trees, max_samples=Samples)
clf.fit(train.drop('class', axis=1))
testnoclass = test.drop('class', axis=1)
print('Predicting')
test.is_copy = False
test['prediction'] = clf.decision_function(testnoclass) + 0.5
print('Analyzing')
#get the class-1 (outlier/anomaly) rows from the feature matrix, and drop the prediction so we can investigate them
##From Here
Left=0.001
Right=0.01
fpr, tpr, thresholds = roc_curve(test['class'], test['prediction'], pos_label=0)
F=interpolate.interp1d(fpr, tpr, assume_sorted=True)
x=np.logspace(np.log10(Left), np.log10(Right))
y=F(x)
roc_auc=auc(x, y)
plt.figure()
plt.xscale('log')
plt.plot(fpr, tpr, color='b')
plt.plot(x,y, color='r')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.plot(plt.xlim(), plt.ylim(), ls="--", c=".3")
plt.savefig("fig3.png")
plt.clf()
plt.close('all')
print('Area Under the Curve = %.6f' %(roc_auc))
Min, Sec= divmod( int(time.time() - Start), 60 )
#print Min, Sec
target= open('Results.txt', 'a')
target.write(str(Trees)+' ')
target.write(str(Samples)+' ')
target.write(str(Min)+' ')
target.write(str(Sec)+' ')
target.write(str(roc_auc))
target.write("\n")
target.write(str(features))
target.write("\n")
target.write("\n")
target.close()
print("Minutes: %d, Seconds: %d" % (int(Min), int(Sec)) )
return roc_auc
示例10: IForest
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import decision_function [as 别名]
#.........这里部分代码省略.........
labels_ : int, either 0 or 1
The binary labels of the training data. 0 stands for inliers
and 1 for outliers/anomalies. It is generated by applying
``threshold_`` on ``decision_scores_``.
"""
def __init__(self, n_estimators=100,
max_samples="auto",
contamination=0.1,
max_features=1.,
bootstrap=False,
n_jobs=1,
random_state=None,
verbose=0):
super(IForest, self).__init__(contamination=contamination)
self.n_estimators = n_estimators
self.max_samples = max_samples
self.max_features = max_features
self.bootstrap = bootstrap
self.n_jobs = n_jobs
self.random_state = random_state
self.verbose = verbose
def fit(self, X, y=None):
"""Fit detector. y is optional for unsupervised methods.
Parameters
----------
X : numpy array of shape (n_samples, n_features)
The input samples.
y : numpy array of shape (n_samples,), optional (default=None)
The ground truth of the input samples (labels).
"""
# validate inputs X and y (optional)
X = check_array(X)
self._set_n_classes(y)
self.detector_ = IsolationForest(n_estimators=self.n_estimators,
max_samples=self.max_samples,
contamination=self.contamination,
max_features=self.max_features,
bootstrap=self.bootstrap,
n_jobs=self.n_jobs,
random_state=self.random_state,
verbose=self.verbose)
self.detector_.fit(X=X,
y=None,
sample_weight=None)
# invert decision_scores_. Outliers comes with higher outlier scores.
self.decision_scores_ = invert_order(
self.detector_.decision_function(X))
self._process_decision_scores()
return self
def decision_function(self, X):
"""Predict raw anomaly score of X using the fitted detector.
The anomaly score of an input sample is computed based on different
detector algorithms. For consistency, outliers are assigned with
larger anomaly scores.
Parameters
----------
X : numpy array of shape (n_samples, n_features)
The training input samples. Sparse matrices are accepted only
if they are supported by the base estimator.
Returns
-------
anomaly_scores : numpy array of shape (n_samples,)
The anomaly score of the input samples.
"""
check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_'])
# invert outlier scores. Outliers comes with higher outlier scores
return invert_order(self.detector_.decision_function(X))
@property
def estimators_(self):
"""The collection of fitted sub-estimators.
Decorator for scikit-learn Isolation Forest attributes.
"""
return self.detector_.estimators_
@property
def estimators_samples_(self):
"""The subset of drawn samples (i.e., the in-bag samples) for
each base estimator.
Decorator for scikit-learn Isolation Forest attributes.
"""
return self.detector_.estimators_samples_
@property
def max_samples_(self):
"""The actual number of samples.
Decorator for scikit-learn Isolation Forest attributes.
"""
return self.detector_.max_samples_