本文整理汇总了Python中sklearn.ensemble.IsolationForest.fit方法的典型用法代码示例。如果您正苦于以下问题:Python IsolationForest.fit方法的具体用法?Python IsolationForest.fit怎么用?Python IsolationForest.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.IsolationForest
的用法示例。
在下文中一共展示了IsolationForest.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: outlier_rejection
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import fit [as 别名]
def outlier_rejection(X, y):
model = IsolationForest(max_samples=100,
contamination=0.4,
random_state=rng)
model.fit(X)
y_pred = model.predict(X)
return X[y_pred == 1], y[y_pred == 1]
示例2: test_iforest_subsampled_features
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import fit [as 别名]
def test_iforest_subsampled_features():
# It tests non-regression for #5732 which failed at predict.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data[:50], boston.target[:50], random_state=rng)
clf = IsolationForest(max_features=0.8)
clf.fit(X_train, y_train)
clf.predict(X_test)
示例3: _predict_self
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import fit [as 别名]
def _predict_self(self):
clf = IsolationForest(contamination=self.frac)
clf.fit(self.num_X)
return clf.predict(self.num_X)
示例4: outlier_rejection
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import fit [as 别名]
def outlier_rejection(X, y):
"""This will be our function used to resample our dataset."""
model = IsolationForest(max_samples=100,
contamination=0.4,
random_state=rng)
model.fit(X)
y_pred = model.predict(X)
return X[y_pred == 1], y[y_pred == 1]
示例5: IsolationForest_calulate
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import fit [as 别名]
def IsolationForest_calulate(train_data_one,test_data):
# 使用异常检测方法
clf = IsolationForest()
# 训练异常检测模型
clf.fit(train_data_one)
# 模型预测
Pre_result = clf.predict(test_data)
# 计算多少个概率
prob = len([x for x in Pre_result if x == 1])/len(Pre_result)
return prob
示例6: test_iforest_works
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import fit [as 别名]
def test_iforest_works():
# toy sample (the last two samples are outliers)
X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [6, 3], [-4, 7]]
# Test LOF
clf = IsolationForest(random_state=rng)
clf.fit(X)
pred = clf.predict(X)
# assert detect outliers:
assert_greater(np.min(pred[-2:]), np.max(pred[:-2]))
示例7: test_iforest_works
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import fit [as 别名]
def test_iforest_works(contamination):
# toy sample (the last two samples are outliers)
X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [6, 3], [-4, 7]]
# Test IsolationForest
clf = IsolationForest(random_state=rng, contamination=contamination)
clf.fit(X)
decision_func = -clf.decision_function(X)
pred = clf.predict(X)
# assert detect outliers:
assert_greater(np.min(decision_func[-2:]), np.max(decision_func[:-2]))
assert_array_equal(pred, 6 * [1] + 2 * [-1])
示例8: isolationForest
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import fit [as 别名]
def isolationForest(self, settings, mname, data):
'''
:param settings: -> settings dictionary
:param mname: -> name of serialized cluster
:return: -> isolation forest instance
:example settings: -> {n_estimators:100, max_samples:100, contamination:0.1, bootstrap:False,
max_features:1.0, n_jobs:1, random_state:None, verbose:0}
'''
# rng = np.random.RandomState(42)
if settings['random_state'] == 'None':
settings['random_state'] = None
if isinstance(settings['bootstrap'], str):
settings['bootstrap'] = str2Bool(settings['bootstrap'])
if isinstance(settings['verbose'], str):
settings['verbose'] = str2Bool(settings['verbose'])
if settings['max_samples'] != 'auto':
settings['max_samples'] = int(settings['max_samples'])
# print type(settings['max_samples'])
for k, v in settings.iteritems():
logger.info('[%s] : [INFO] IsolationForest %s set to %s',
datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), k, v)
print "IsolationForest %s set to %s" % (k, v)
try:
clf = IsolationForest(n_estimators=int(settings['n_estimators']), max_samples=settings['max_samples'], contamination=float(settings['contamination']), bootstrap=settings['bootstrap'],
max_features=float(settings['max_features']), n_jobs=int(settings['n_jobs']), random_state=settings['random_state'], verbose=settings['verbose'])
except Exception as inst:
logger.error('[%s] : [ERROR] Cannot instanciate isolation forest with %s and %s',
datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst), inst.args)
print "Error while instanciating isolation forest with %s and %s" % (type(inst), inst.args)
sys.exit(1)
# clf = IsolationForest(max_samples=100, random_state=rng)
# print "*&*&*&& %s" % type(data)
try:
clf.fit(data)
except Exception as inst:
logger.error('[%s] : [ERROR] Cannot fit isolation forest model with %s and %s',
datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst), inst.args)
sys.exit(1)
predict = clf.predict(data)
print "Anomaly Array:"
print predict
self.__serializemodel(clf, 'isoforest', mname)
return clf
示例9: outlier_removal
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import fit [as 别名]
def outlier_removal(df, col, method, params):
if method == 'Isolation Forest':
do_outlier_removal = IsolationForest(**params)
if method == 'Local Outlier Factor':
do_outlier_removal = LocalOutlierFactor(**params)
else:
method == None
do_outlier_removal.fit(np.array(df[col]))
if method == 'Isolation Forest':
outlier_scores = do_outlier_removal.decision_function(np.array(df[col]))
df[('meta', 'Outlier Scores - ' + method + str(params))] = outlier_scores
is_outlier = do_outlier_removal.predict(np.array(df[col]))
df[('meta', 'Outliers - ' + method + str(params))] = is_outlier
if method == 'Local Outlier Factor':
is_outlier = do_outlier_removal.fit_predict(np.array(df[col]))
df[('meta', 'Outliers - ' + method + str(params))] = is_outlier
df[('meta', 'Outlier Factor - ' + method + str(params))] = do_outlier_removal.negative_outlier_factor_
return df, do_outlier_removal
示例10: test_iforest_warm_start
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import fit [as 别名]
def test_iforest_warm_start():
"""Test iterative addition of iTrees to an iForest """
rng = check_random_state(0)
X = rng.randn(20, 2)
# fit first 10 trees
clf = IsolationForest(n_estimators=10, max_samples=20,
random_state=rng, warm_start=True)
clf.fit(X)
# remember the 1st tree
tree_1 = clf.estimators_[0]
# fit another 10 trees
clf.set_params(n_estimators=20)
clf.fit(X)
# expecting 20 fitted trees and no overwritten trees
assert len(clf.estimators_) == 20
assert clf.estimators_[0] is tree_1
示例11: predict
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import fit [as 别名]
def predict(self, X, window=DEFAULT_WINDOW):
"""
Predict if a particular sample is an outlier or not.
:param X: the time series to detect of
:param type X: pandas.Series
:param window: the length of window
:param type window: int
:return: 1 denotes normal, 0 denotes abnormal.
"""
x_train = list(range(0, 2 * window + 1)) + list(range(0, 2 * window + 1)) + list(range(0, window + 1))
sample_features = zip(x_train, X)
clf = IsolationForest(self.n_estimators, self.max_samples, self.contamination, self.max_feature, self.bootstrap, self.n_jobs, self.random_state, self.verbose)
clf.fit(sample_features)
predict_res = clf.predict(sample_features)
if predict_res[-1] == -1:
return 0
return 1
示例12: len
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import fit [as 别名]
featureMatrix['is_train'] = np.random.uniform(0, 1, len(featureMatrix)) <= .75
#split out the train and test df's into separate objects
train, test = featureMatrix[featureMatrix['is_train']==True], featureMatrix[featureMatrix['is_train']==False]
#drop the is_train column, we don't need it anymore
train = train.drop('is_train', axis=1)
test = test.drop('is_train', axis=1)
#create the isolation forest class and factorize the class column
clf = IsolationForest(n_estimators=opts.numtrees)
#train the isolation forest on the training set, dropping the class column (since the trainer takes that as a separate argument)
print('\nTraining')
clf.fit(train.drop('class', axis=1))
#remove the 'answers' from the test set
testnoclass = test.drop('class', axis=1)
print('\nPredicting (class 1 is normal, class -1 is malicious)')
#evaluate our results on the test set.
test.is_copy = False
test['prediction'] = clf.predict(testnoclass)
print
#group by class (the real answers) and prediction (what the forest said). we want these values to match for 'good' answers
results=test.groupby(['class', 'prediction'])
resultsagg = results.size()
print(resultsagg)
示例13: ohEncoding
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import fit [as 别名]
data = data.drop(cols, axis=1)
data = data.join(vecData)
return data, vecData, vec
df, t, v = ohEncoding(df, col, replace=True)
print "Shape after encoding"
print type(df.shape)
df_unlabeled = df.drop("Anomaly", axis=1)
print "Shape of the dataframe without anomaly column: "
print df_unlabeled.shape
clf = IsolationForest(max_samples=6444, verbose=1, n_jobs=-1, contamination=0.255555
, bootstrap=True, max_features=9)
clf.fit(df_unlabeled)
pred = clf.predict(df_unlabeled)
# print type(pred)
# print data.shape
# print len(pred)
# print pred
anomalies = np.argwhere(pred == -1)
normal = np.argwhere(pred == 1)
# print anomalies
# print type(anomalies)
df['ISO1'] = pred
# iterate over rows
nLabAno = 0
nDetAno = 0
示例14: print
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import fit [as 别名]
# X = X[indices]
# y = y[indices]
X_train = X[:n_samples_train, :]
X_test = X[n_samples_train:, :]
y_train = y[:n_samples_train]
y_test = y[n_samples_train:]
# # training only on normal data:
# X_train = X_train[y_train == 0]
# y_train = y_train[y_train == 0]
print('IsolationForest processing...')
model = IsolationForest()
tstart = time()
model.fit(X_train)
fit_time += time() - tstart
tstart = time()
scoring = -model.decision_function(X_test) # the lower,the more normal
predict_time += time() - tstart
fpr_, tpr_, thresholds_ = roc_curve(y_test, scoring)
if predict_time + fit_time > max_time:
raise TimeoutError
f = interp1d(fpr_, tpr_)
tpr += f(x_axis)
tpr[0] = 0.
precision_, recall_ = precision_recall_curve(y_test, scoring)[:2]
示例15: IsolationForest
# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import fit [as 别名]
iforest = IsolationForest()
lof = LocalOutlierFactor(n_neighbors=20)
ocsvm = OneClassSVM()
lim_inf = X.min(axis=0)
lim_sup = X.max(axis=0)
volume_support = (lim_sup - lim_inf).prod()
t = np.arange(0, 100 / volume_support, 0.01 / volume_support)
axis_alpha = np.arange(alpha_min, alpha_max, 0.0001)
unif = np.random.uniform(lim_inf, lim_sup,
size=(n_generated, n_features))
# fit:
print('IsolationForest processing...')
iforest = IsolationForest()
iforest.fit(X_train)
s_X_iforest = iforest.decision_function(X_test)
print('LocalOutlierFactor processing...')
lof = LocalOutlierFactor(n_neighbors=20)
lof.fit(X_train)
s_X_lof = lof.decision_function(X_test)
print('OneClassSVM processing...')
ocsvm = OneClassSVM()
ocsvm.fit(X_train[:min(ocsvm_max_train, n_samples_train - 1)])
s_X_ocsvm = ocsvm.decision_function(X_test).reshape(1, -1)[0]
s_unif_iforest = iforest.decision_function(unif)
s_unif_lof = lof.decision_function(unif)
s_unif_ocsvm = ocsvm.decision_function(unif).reshape(1, -1)[0]
plt.subplot(121)
auc_iforest, em_iforest, amax_iforest = em(t, t_max,
volume_support,