当前位置: 首页>>代码示例>>Python>>正文


Python IsolationForest.decision_function方法代码示例

本文整理汇总了Python中sklearn.ensemble.IsolationForest.decision_function方法的典型用法代码示例。如果您正苦于以下问题:Python IsolationForest.decision_function方法的具体用法?Python IsolationForest.decision_function怎么用?Python IsolationForest.decision_function使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.IsolationForest的用法示例。


在下文中一共展示了IsolationForest.decision_function方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_score_samples

# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import decision_function [as 别名]
def test_score_samples():
    X_train = [[1, 1], [1, 2], [2, 1]]
    clf1 = IsolationForest(contamination=0.1).fit(X_train)
    clf2 = IsolationForest().fit(X_train)
    assert_array_equal(clf1.score_samples([[2., 2.]]),
                       clf1.decision_function([[2., 2.]]) + clf1.offset_)
    assert_array_equal(clf2.score_samples([[2., 2.]]),
                       clf2.decision_function([[2., 2.]]) + clf2.offset_)
    assert_array_equal(clf1.score_samples([[2., 2.]]),
                       clf2.score_samples([[2., 2.]]))
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:12,代码来源:test_iforest.py

示例2: test_iforest_works

# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import decision_function [as 别名]
def test_iforest_works(contamination):
    # toy sample (the last two samples are outliers)
    X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [6, 3], [-4, 7]]

    # Test IsolationForest
    clf = IsolationForest(random_state=rng, contamination=contamination)
    clf.fit(X)
    decision_func = -clf.decision_function(X)
    pred = clf.predict(X)
    # assert detect outliers:
    assert_greater(np.min(decision_func[-2:]), np.max(decision_func[:-2]))
    assert_array_equal(pred, 6 * [1] + 2 * [-1])
开发者ID:manhhomienbienthuy,项目名称:scikit-learn,代码行数:14,代码来源:test_iforest.py

示例3: outlier_removal

# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import decision_function [as 别名]
def outlier_removal(df, col, method, params):
    if method == 'Isolation Forest':
        do_outlier_removal = IsolationForest(**params)
    if method == 'Local Outlier Factor':
        do_outlier_removal = LocalOutlierFactor(**params)
    else:
        method == None
    do_outlier_removal.fit(np.array(df[col]))
    if method == 'Isolation Forest':
        outlier_scores = do_outlier_removal.decision_function(np.array(df[col]))
        df[('meta', 'Outlier Scores - ' + method + str(params))] = outlier_scores
        is_outlier = do_outlier_removal.predict(np.array(df[col]))
        df[('meta', 'Outliers - ' + method + str(params))] = is_outlier
    if method == 'Local Outlier Factor':
        is_outlier = do_outlier_removal.fit_predict(np.array(df[col]))
        df[('meta', 'Outliers - ' + method + str(params))] = is_outlier
        df[('meta', 'Outlier Factor - ' + method + str(params))] = do_outlier_removal.negative_outlier_factor_
    return df, do_outlier_removal
开发者ID:USGS-Astrogeology,项目名称:PySAT,代码行数:20,代码来源:outlier_removal.py

示例4: test_iforest_performance

# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import decision_function [as 别名]
def test_iforest_performance():
    """Test Isolation Forest performs well"""

    # Generate train/test data
    rng = check_random_state(2)
    X = 0.3 * rng.randn(120, 2)
    X_train = np.r_[X + 2, X - 2]
    X_train = X[:100]

    # Generate some abnormal novel observations
    X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
    X_test = np.r_[X[100:], X_outliers]
    y_test = np.array([0] * 20 + [1] * 20)

    # fit the model
    clf = IsolationForest(max_samples=100, random_state=rng).fit(X_train)

    # predict scores (the lower, the more normal)
    y_pred = - clf.decision_function(X_test)

    # check that there is at most 6 errors (false positive or false negative)
    assert_greater(roc_auc_score(y_test, y_pred), 0.98)
开发者ID:AndyMelendezCuesta,项目名称:scikit-learn,代码行数:24,代码来源:test_iforest.py

示例5: print

# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import decision_function [as 别名]
            X_test = X[n_samples_train:, :]
            y_train = y[:n_samples_train]
            y_test = y[n_samples_train:]

            # # training only on normal data:
            # X_train = X_train[y_train == 0]
            # y_train = y_train[y_train == 0]

            print('IsolationForest processing...')
            model = IsolationForest()
            tstart = time()
            model.fit(X_train)
            fit_time += time() - tstart
            tstart = time()

            scoring = -model.decision_function(X_test)  # the lower,the more normal
            predict_time += time() - tstart
            fpr_, tpr_, thresholds_ = roc_curve(y_test, scoring)

            if predict_time + fit_time > max_time:
                raise TimeoutError

            f = interp1d(fpr_, tpr_)
            tpr += f(x_axis)
            tpr[0] = 0.

            precision_, recall_ = precision_recall_curve(y_test, scoring)[:2]

            # cluster: old version of scipy -> interpol1d needs sorted x_input
            arg_sorted = recall_.argsort()
            recall_ = recall_[arg_sorted]
开发者ID:ngoix,项目名称:OCRF,代码行数:33,代码来源:bench_isolation_forest.py

示例6: test_behaviour_param

# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import decision_function [as 别名]
def test_behaviour_param():
    X_train = [[1, 1], [1, 2], [2, 1]]
    clf1 = IsolationForest(behaviour='old').fit(X_train)
    clf2 = IsolationForest(behaviour='new', contamination='auto').fit(X_train)
    assert_array_equal(clf1.decision_function([[2., 2.]]),
                       clf2.decision_function([[2., 2.]]))
开发者ID:MartinThoma,项目名称:scikit-learn,代码行数:8,代码来源:test_iforest.py

示例7: LocalOutlierFactor

# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import decision_function [as 别名]
    lof = LocalOutlierFactor(n_neighbors=20)
    ocsvm = OneClassSVM()

    lim_inf = X.min(axis=0)
    lim_sup = X.max(axis=0)
    volume_support = (lim_sup - lim_inf).prod()
    t = np.arange(0, 100 / volume_support, 0.01 / volume_support)
    axis_alpha = np.arange(alpha_min, alpha_max, 0.0001)
    unif = np.random.uniform(lim_inf, lim_sup,
                             size=(n_generated, n_features))

    # fit:
    print('IsolationForest processing...')
    iforest = IsolationForest()
    iforest.fit(X_train)
    s_X_iforest = iforest.decision_function(X_test)
    print('LocalOutlierFactor processing...')
    lof = LocalOutlierFactor(n_neighbors=20)
    lof.fit(X_train)
    s_X_lof = lof.decision_function(X_test)
    print('OneClassSVM processing...')
    ocsvm = OneClassSVM()
    ocsvm.fit(X_train[:min(ocsvm_max_train, n_samples_train - 1)])
    s_X_ocsvm = ocsvm.decision_function(X_test).reshape(1, -1)[0]
    s_unif_iforest = iforest.decision_function(unif)
    s_unif_lof = lof.decision_function(unif)
    s_unif_ocsvm = ocsvm.decision_function(unif).reshape(1, -1)[0]
    plt.subplot(121)
    auc_iforest, em_iforest, amax_iforest = em(t, t_max,
                                               volume_support,
                                               s_unif_iforest,
开发者ID:ngoix,项目名称:EMMV_benchmarks,代码行数:33,代码来源:em_bench.py

示例8: IsolationForest

# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import decision_function [as 别名]
# Generate some regular novel observations
X = 0.3 * rng.randn(20, 2)
X_test = np.r_[X + 2, X - 2]
# Generate some abnormal novel observations
X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))

# fit the model
clf = IsolationForest(max_samples=100, random_state=rng)
clf.fit(X_train)
y_pred_train = clf.predict(X_train)
y_pred_test = clf.predict(X_test)
y_pred_outliers = clf.predict(X_outliers)

# plot the line, the samples, and the nearest vectors to the plane
xx, yy = np.meshgrid(np.linspace(-5, 5, 50), np.linspace(-5, 5, 50))
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.title("IsolationForest")
plt.contourf(xx, yy, Z, cmap=plt.cm.Blues_r)

b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white')
b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='green')
c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='red')
plt.axis('tight')
plt.xlim((-5, 5))
plt.ylim((-5, 5))
plt.legend([b1, b2, c],
           ["training observations",
            "new regular observations", "new abnormal observations"],
           loc="upper left")
开发者ID:0664j35t3r,项目名称:scikit-learn,代码行数:33,代码来源:plot_isolation_forest.py

示例9: Eval

# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import decision_function [as 别名]

#.........这里部分代码省略.........
    #that doesn't matter
    trainDf['class']=0;


    #spliting into training and evaluation sets 
    classedDf['is_train']=False
    trainDf['is_train']=True

    enhancedDf = enhance_flow(pd.concat([trainDf,classedDf], ignore_index=True), ftu)
    # construct some vectorizers based on the data in the DF. We need to vectorize future log files the exact same way so we
    # will be saving these vectorizers to a file.

    vectorizers = build_vectorizers(enhancedDf, ftu, max_features=opts.maxfeaturesperbag, ngram_size=opts.ngramsize, verbose=opts.verbose)

    #use the vectorizers to featureize our DF into a numeric feature dataframe
    featureMatrix = featureize(enhancedDf, ftu, vectorizers, verbose=opts.verbose)

    #add the class column back in (it wasn't featurized by itself)
    featureMatrix['class'] = enhancedDf['class']
    featureMatrix['is_train'] = enhancedDf['is_train']


    #split out the train and test df's into separate objects
    train, test = featureMatrix[featureMatrix['is_train']==True], featureMatrix[featureMatrix['is_train']==False]

    #drop the is_train column, we don't need it anymore
    train = train.drop('is_train', axis=1)
    test = test.drop('is_train', axis=1)


    #print('Calculating features')


    Trees=opts.numtrees
    Samples=opts.numsamples
    clf = IsolationForest(n_estimators=Trees, max_samples=Samples)

    
    clf.fit(train.drop('class', axis=1))

    testnoclass = test.drop('class', axis=1)

    print('Predicting')

    test.is_copy = False

    test['prediction'] = clf.decision_function(testnoclass) + 0.5

    print('Analyzing')
    #get the class-1 (outlier/anomaly) rows from the feature matrix, and drop the prediction so we can investigate them

    ##From Here
    Left=0.001 
    Right=0.01
    
    fpr, tpr, thresholds = roc_curve(test['class'], test['prediction'], pos_label=0)
    
    F=interpolate.interp1d(fpr, tpr, assume_sorted=True)
    x=np.logspace(np.log10(Left), np.log10(Right))
    y=F(x)
    roc_auc=auc(x, y)

    plt.figure()
    plt.xscale('log')

    plt.plot(fpr, tpr, color='b')
    plt.plot(x,y, color='r')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic')


    plt.plot(plt.xlim(), plt.ylim(), ls="--", c=".3")
    plt.savefig("fig3.png")
    plt.clf()
    plt.close('all')


    print('Area Under the Curve = %.6f' %(roc_auc))



    Min, Sec= divmod( int(time.time() - Start), 60 )
    #print Min, Sec

    target= open('Results.txt', 'a')
    target.write(str(Trees)+' ')
    target.write(str(Samples)+' ')
    target.write(str(Min)+' ')
    target.write(str(Sec)+' ')
    target.write(str(roc_auc))
    target.write("\n")
    target.write(str(features))
    target.write("\n")
    target.write("\n")
    target.close()

    
    print("Minutes: %d, Seconds: %d" % (int(Min), int(Sec)) )
    return roc_auc 
开发者ID:DKarev,项目名称:isolation-forest,代码行数:104,代码来源:find_params_iforest.py

示例10: IForest

# 需要导入模块: from sklearn.ensemble import IsolationForest [as 别名]
# 或者: from sklearn.ensemble.IsolationForest import decision_function [as 别名]

#.........这里部分代码省略.........

    labels_ : int, either 0 or 1
        The binary labels of the training data. 0 stands for inliers
        and 1 for outliers/anomalies. It is generated by applying
        ``threshold_`` on ``decision_scores_``.
    """

    def __init__(self, n_estimators=100,
                 max_samples="auto",
                 contamination=0.1,
                 max_features=1.,
                 bootstrap=False,
                 n_jobs=1,
                 random_state=None,
                 verbose=0):
        super(IForest, self).__init__(contamination=contamination)
        self.n_estimators = n_estimators
        self.max_samples = max_samples
        self.max_features = max_features
        self.bootstrap = bootstrap
        self.n_jobs = n_jobs
        self.random_state = random_state
        self.verbose = verbose

    def fit(self, X, y=None):
        """Fit detector. y is optional for unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : numpy array of shape (n_samples,), optional (default=None)
            The ground truth of the input samples (labels).
        """
        # validate inputs X and y (optional)
        X = check_array(X)
        self._set_n_classes(y)

        self.detector_ = IsolationForest(n_estimators=self.n_estimators,
                                         max_samples=self.max_samples,
                                         contamination=self.contamination,
                                         max_features=self.max_features,
                                         bootstrap=self.bootstrap,
                                         n_jobs=self.n_jobs,
                                         random_state=self.random_state,
                                         verbose=self.verbose)
        self.detector_.fit(X=X,
                           y=None,
                           sample_weight=None)

        # invert decision_scores_. Outliers comes with higher outlier scores.
        self.decision_scores_ = invert_order(
            self.detector_.decision_function(X))
        self._process_decision_scores()
        return self

    def decision_function(self, X):
        """Predict raw anomaly score of X using the fitted detector.

        The anomaly score of an input sample is computed based on different
        detector algorithms. For consistency, outliers are assigned with
        larger anomaly scores.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only
            if they are supported by the base estimator.

        Returns
        -------
        anomaly_scores : numpy array of shape (n_samples,)
            The anomaly score of the input samples.
        """
        check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_'])
        # invert outlier scores. Outliers comes with higher outlier scores
        return invert_order(self.detector_.decision_function(X))

    @property
    def estimators_(self):
        """The collection of fitted sub-estimators.
        Decorator for scikit-learn Isolation Forest attributes.
        """
        return self.detector_.estimators_

    @property
    def estimators_samples_(self):
        """The subset of drawn samples (i.e., the in-bag samples) for
        each base estimator.
        Decorator for scikit-learn Isolation Forest attributes.
        """
        return self.detector_.estimators_samples_

    @property
    def max_samples_(self):
        """The actual number of samples.
        Decorator for scikit-learn Isolation Forest attributes.
        """
        return self.detector_.max_samples_
开发者ID:flaviassantos,项目名称:pyod,代码行数:104,代码来源:iforest.py


注:本文中的sklearn.ensemble.IsolationForest.decision_function方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。