本文整理汇总了Python中sklearn.ensemble.IsolationForest类的典型用法代码示例。如果您正苦于以下问题:Python IsolationForest类的具体用法?Python IsolationForest怎么用?Python IsolationForest使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了IsolationForest类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _predict_self
def _predict_self(self):
clf = IsolationForest(contamination=self.frac)
clf.fit(self.num_X)
return clf.predict(self.num_X)
示例2: test_iforest_sparse
def test_iforest_sparse():
"""Check IForest for various parameter settings on sparse input."""
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
boston.target[:50],
random_state=rng)
grid = ParameterGrid({"max_samples": [0.5, 1.0],
"bootstrap": [True, False]})
for sparse_format in [csc_matrix, csr_matrix]:
X_train_sparse = sparse_format(X_train)
X_test_sparse = sparse_format(X_test)
for params in grid:
# Trained on sparse format
sparse_classifier = IsolationForest(
n_estimators=10, random_state=1, **params).fit(X_train_sparse)
sparse_results = sparse_classifier.predict(X_test_sparse)
# Trained on dense format
dense_classifier = IsolationForest(
n_estimators=10, random_state=1, **params).fit(X_train)
dense_results = dense_classifier.predict(X_test)
assert_array_equal(sparse_results, dense_results)
assert_array_equal(sparse_results, dense_results)
示例3: outlier_rejection
def outlier_rejection(X, y):
model = IsolationForest(max_samples=100,
contamination=0.4,
random_state=rng)
model.fit(X)
y_pred = model.predict(X)
return X[y_pred == 1], y[y_pred == 1]
示例4: test_iforest_subsampled_features
def test_iforest_subsampled_features():
# It tests non-regression for #5732 which failed at predict.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data[:50], boston.target[:50], random_state=rng)
clf = IsolationForest(max_features=0.8)
clf.fit(X_train, y_train)
clf.predict(X_test)
示例5: outlier_rejection
def outlier_rejection(X, y):
"""This will be our function used to resample our dataset."""
model = IsolationForest(max_samples=100,
contamination=0.4,
random_state=rng)
model.fit(X)
y_pred = model.predict(X)
return X[y_pred == 1], y[y_pred == 1]
示例6: IsolationForest_calulate
def IsolationForest_calulate(train_data_one,test_data):
# 使用异常检测方法
clf = IsolationForest()
# 训练异常检测模型
clf.fit(train_data_one)
# 模型预测
Pre_result = clf.predict(test_data)
# 计算多少个概率
prob = len([x for x in Pre_result if x == 1])/len(Pre_result)
return prob
示例7: test_iforest_works
def test_iforest_works():
# toy sample (the last two samples are outliers)
X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [6, 3], [-4, 7]]
# Test LOF
clf = IsolationForest(random_state=rng)
clf.fit(X)
pred = clf.predict(X)
# assert detect outliers:
assert_greater(np.min(pred[-2:]), np.max(pred[:-2]))
示例8: test_iforest_works
def test_iforest_works(contamination):
# toy sample (the last two samples are outliers)
X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [6, 3], [-4, 7]]
# Test IsolationForest
clf = IsolationForest(random_state=rng, contamination=contamination)
clf.fit(X)
decision_func = -clf.decision_function(X)
pred = clf.predict(X)
# assert detect outliers:
assert_greater(np.min(decision_func[-2:]), np.max(decision_func[:-2]))
assert_array_equal(pred, 6 * [1] + 2 * [-1])
示例9: fit
def fit(self, X, y=None):
"""Fit detector. y is optional for unsupervised methods.
Parameters
----------
X : numpy array of shape (n_samples, n_features)
The input samples.
y : numpy array of shape (n_samples,), optional (default=None)
The ground truth of the input samples (labels).
"""
# validate inputs X and y (optional)
X = check_array(X)
self._set_n_classes(y)
self.detector_ = IsolationForest(n_estimators=self.n_estimators,
max_samples=self.max_samples,
contamination=self.contamination,
max_features=self.max_features,
bootstrap=self.bootstrap,
n_jobs=self.n_jobs,
random_state=self.random_state,
verbose=self.verbose)
self.detector_.fit(X=X,
y=None,
sample_weight=None)
# invert decision_scores_. Outliers comes with higher outlier scores.
self.decision_scores_ = invert_order(
self.detector_.decision_function(X))
self._process_decision_scores()
return self
示例10: isolationForest
def isolationForest(self, settings, mname, data):
'''
:param settings: -> settings dictionary
:param mname: -> name of serialized cluster
:return: -> isolation forest instance
:example settings: -> {n_estimators:100, max_samples:100, contamination:0.1, bootstrap:False,
max_features:1.0, n_jobs:1, random_state:None, verbose:0}
'''
# rng = np.random.RandomState(42)
if settings['random_state'] == 'None':
settings['random_state'] = None
if isinstance(settings['bootstrap'], str):
settings['bootstrap'] = str2Bool(settings['bootstrap'])
if isinstance(settings['verbose'], str):
settings['verbose'] = str2Bool(settings['verbose'])
if settings['max_samples'] != 'auto':
settings['max_samples'] = int(settings['max_samples'])
# print type(settings['max_samples'])
for k, v in settings.iteritems():
logger.info('[%s] : [INFO] IsolationForest %s set to %s',
datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), k, v)
print "IsolationForest %s set to %s" % (k, v)
try:
clf = IsolationForest(n_estimators=int(settings['n_estimators']), max_samples=settings['max_samples'], contamination=float(settings['contamination']), bootstrap=settings['bootstrap'],
max_features=float(settings['max_features']), n_jobs=int(settings['n_jobs']), random_state=settings['random_state'], verbose=settings['verbose'])
except Exception as inst:
logger.error('[%s] : [ERROR] Cannot instanciate isolation forest with %s and %s',
datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst), inst.args)
print "Error while instanciating isolation forest with %s and %s" % (type(inst), inst.args)
sys.exit(1)
# clf = IsolationForest(max_samples=100, random_state=rng)
# print "*&*&*&& %s" % type(data)
try:
clf.fit(data)
except Exception as inst:
logger.error('[%s] : [ERROR] Cannot fit isolation forest model with %s and %s',
datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst), inst.args)
sys.exit(1)
predict = clf.predict(data)
print "Anomaly Array:"
print predict
self.__serializemodel(clf, 'isoforest', mname)
return clf
示例11: test_score_samples
def test_score_samples():
X_train = [[1, 1], [1, 2], [2, 1]]
clf1 = IsolationForest(contamination=0.1).fit(X_train)
clf2 = IsolationForest().fit(X_train)
assert_array_equal(clf1.score_samples([[2., 2.]]),
clf1.decision_function([[2., 2.]]) + clf1.offset_)
assert_array_equal(clf2.score_samples([[2., 2.]]),
clf2.decision_function([[2., 2.]]) + clf2.offset_)
assert_array_equal(clf1.score_samples([[2., 2.]]),
clf2.score_samples([[2., 2.]]))
示例12: predict
def predict(self, X, window=DEFAULT_WINDOW):
"""
Predict if a particular sample is an outlier or not.
:param X: the time series to detect of
:param type X: pandas.Series
:param window: the length of window
:param type window: int
:return: 1 denotes normal, 0 denotes abnormal.
"""
x_train = list(range(0, 2 * window + 1)) + list(range(0, 2 * window + 1)) + list(range(0, window + 1))
sample_features = zip(x_train, X)
clf = IsolationForest(self.n_estimators, self.max_samples, self.contamination, self.max_feature, self.bootstrap, self.n_jobs, self.random_state, self.verbose)
clf.fit(sample_features)
predict_res = clf.predict(sample_features)
if predict_res[-1] == -1:
return 0
return 1
示例13: test_iforest_parallel_regression
def test_iforest_parallel_regression():
"""Check parallel regression."""
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng)
ensemble = IsolationForest(n_jobs=3, random_state=0).fit(X_train)
ensemble.set_params(n_jobs=1)
y1 = ensemble.predict(X_test)
ensemble.set_params(n_jobs=2)
y2 = ensemble.predict(X_test)
assert_array_almost_equal(y1, y2)
ensemble = IsolationForest(n_jobs=1, random_state=0).fit(X_train)
y3 = ensemble.predict(X_test)
assert_array_almost_equal(y1, y3)
示例14: test_iforest_performance
def test_iforest_performance():
"""Test Isolation Forest performs well"""
# Generate train/test data
rng = check_random_state(2)
X = 0.3 * rng.randn(120, 2)
X_train = np.r_[X + 2, X - 2]
X_train = X[:100]
# Generate some abnormal novel observations
X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
X_test = np.r_[X[100:], X_outliers]
y_test = np.array([0] * 20 + [1] * 20)
# fit the model
clf = IsolationForest(max_samples=100, random_state=rng).fit(X_train)
# predict scores (the lower, the more normal)
y_pred = - clf.decision_function(X_test)
# check that there is at most 6 errors (false positive or false negative)
assert_greater(roc_auc_score(y_test, y_pred), 0.98)
示例15: test_iforest_warm_start
def test_iforest_warm_start():
"""Test iterative addition of iTrees to an iForest """
rng = check_random_state(0)
X = rng.randn(20, 2)
# fit first 10 trees
clf = IsolationForest(n_estimators=10, max_samples=20,
random_state=rng, warm_start=True)
clf.fit(X)
# remember the 1st tree
tree_1 = clf.estimators_[0]
# fit another 10 trees
clf.set_params(n_estimators=20)
clf.fit(X)
# expecting 20 fitted trees and no overwritten trees
assert len(clf.estimators_) == 20
assert clf.estimators_[0] is tree_1