当前位置: 首页>>代码示例>>Python>>正文


Python RandomForestClassifier.decision_function方法代码示例

本文整理汇总了Python中sklearn.ensemble.RandomForestClassifier.decision_function方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestClassifier.decision_function方法的具体用法?Python RandomForestClassifier.decision_function怎么用?Python RandomForestClassifier.decision_function使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.RandomForestClassifier的用法示例。


在下文中一共展示了RandomForestClassifier.decision_function方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: RandomForestClassifier

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import decision_function [as 别名]
#rf = RandomForestClassifier(n_estimators=100, max_features='auto', n_jobs=4)
rf = RandomForestClassifier(n_estimators=100, max_features=15, n_jobs=4, max_depth=8)
#rf = RandomForestClassifier(n_estimators=100, max_features='auto', n_jobs=4, max_depth=5)

#training
st = time.time()
print "training started"
rf.fit( x_train, y_train )
print "training ended"
et = time.time()
tt = et - st
print "Training Time = " + str(tt) + "\n"

#predictions
pred = rf.predict( x_test )
y_score = rf.decision_function(x_test)
out = open('../results/rf_combi_yes.txt','w')

#validation
total = y_test.size
good = 0
bad = 0
for i in range(total):
    a = y_test[i]
    p = pred[i]
    line = str(a) + ',' + str(p) + '\n'
    out.write(line)
    if str(a) == str(p):
        good = good + 1;
    else:
        bad = bad + 1;
开发者ID:raybenchen,项目名称:DiabeticsReadmissionPrediction,代码行数:33,代码来源:rf_combi_yes.py

示例2: perform_experiment

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import decision_function [as 别名]
def perform_experiment(train_fs, test_fs, avstats_in, binarize,
                       classifier='RF', subsample=False):
    print('Performing experiment')
    res = []
    key_dates = []
    avstats = collections.defaultdict(int)
    for w, (f_tr, f_te) in enumerate(zip(train_fs, test_fs), start=1):
        # Load test dates
        dates = numpy.array(load_dates(f_te))
        week_s, week_e = dates.min(), dates.max()
        key_dates.append(week_s)
        print('\nPeriod {} [{} - {}]'.format(w, week_s, week_e))

        # Load training data
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            X_tr, y_tr = datasets.load_svmlight_file(f_tr)
        print(X_tr.shape)
        if subsample:
            new_size = int(round(X_tr.shape[0] * subsample))
            subsam = numpy.random.choice(X_tr.shape[0], new_size)
            X_tr = X_tr[subsam, :]
            y_tr = y_tr[subsam]
        if binarize:
            X_tr.data = numpy.ones_like(X_tr.data)
        X_tr = X_tr.toarray()

        # Train classifier
        if classifier == 'RF':
            clf = RFC(n_estimators=200, n_jobs=1 if subsample else -1)
        elif classifier == 'SVM':
            clf = SVC(kernel='rbf', gamma=0.0025, C=12)
        sample_weight = None
        print('Training set size: {}'.format(X_tr.shape))
        clf.fit(X_tr, y_tr, sample_weight=sample_weight)
        tr_n_feats = X_tr.shape[1]
        del X_tr

        # Load and classify test data
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            X_te, y_te = datasets.load_svmlight_file(f_te, n_features=tr_n_feats)
        if binarize:
            X_te.data = numpy.ones_like(X_te.data)
        X_te = X_te.toarray()
        print('Test set size: {}'.format(X_te.shape))
        y_pr = clf.predict(X_te)
        if classifier == 'RF':
            y_val = clf.predict_proba(X_te)[:, 1]
        elif classifier == 'SVM':
            y_val = clf.decision_function(X_te)
        del X_te

        # Evaluate experimental results
        res.append(experiment_stats(y_tr, y_te, y_pr, y_val))

        # Load file IDs
        fileIDs = numpy.array(
            load_SHA256_sums(f_te))[numpy.where(y_te > 0.5)]

        # Update AV detection results
        for fid in fileIDs:
            avstats['Total'] += 1
            if fid in avstats_in:
                for av, det in avstats_in[fid]['report'].iteritems():
                    if det:
                        avstats[av] += 1
        del fileIDs
        avstats['Hidost'] += numpy.logical_and(y_te == y_pr, y_te > 0.5).sum()
    res = numpy.concatenate(res)
    return res, key_dates, avstats
开发者ID:srndic,项目名称:hidost-reproduction,代码行数:73,代码来源:experiment.py

示例3: main

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import decision_function [as 别名]
def main():
    train_size = 0.8


    X_train, X_valid, y_train, y_valid, scaler = load_train_data(train_size=train_size, scale_it=True, square_root_it=True)
    X_test, X_test_ids = load_test_data(scaler=scaler, square_root_it=True)

    full_X_train, _, full_y_train, _, full_scaler = load_train_data(full_train=True, scale_it=True, square_root_it=True)
    X_test_for_full, X_test_ids = load_test_data(scaler=full_scaler, square_root_it=True)


    # logistic
    # loss = ~0.6...
    # clf = LogisticRegression()
    # clf.fit(X_train, y_train)
    # clf_isotonic = CalibratedClassifierCV(clf, cv=5, method='isotonic')
    # clf_isotonic.fit(X_train, y_train)
    # y_valid_predicted = clf_isotonic.predict_proba(X_valid)
    # log_loss_mc(y_valid, y_valid_predicted)
    

    # gnb
    # loss = ~1.6...
    # clf = GaussianNB()
    # clf.fit(X_train, y_train)
    # clf_isotonic = CalibratedClassifierCV(clf, cv=5, method='isotonic')
    # clf_isotonic.fit(X_train, y_train)
    # y_valid_predicted = clf_isotonic.predict_proba(X_valid)
    # log_loss_mc(y_valid, y_valid_predicted)
    

    # rf
    # when n_estimators=100, without calibration, loss = ~0.6
    # when n_estimators=100, with calibration, loss = ~0.483
    clf = RandomForestClassifier(n_estimators=600, n_jobs=-1, verbose=1)
    clf.fit(X_train, y_train)
    clf_isotonic = CalibratedClassifierCV(clf, cv=5, method='isotonic')
    clf_isotonic.fit(X_train, y_train)
    y_valid_predicted = clf_isotonic.predict_proba(X_valid)
    log_loss_mc(y_valid, y_valid_predicted)
    

    # linear svc
    clf = LinearSVC(C=1.0, verbose=2)
    clf.fit(X_train, y_train)
    prob_pos = clf.decision_function(X_valid)
    prob_pos = \
            (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())
    y_valid_predicted = prob_pos
    log_loss_mc(y_valid, y_valid_predicted)
    clf_isotonic = CalibratedClassifierCV(clf, cv=5, method='isotonic')
    clf_isotonic.fit(X_train, y_train)
    y_valid_predicted = clf_isotonic.predict_proba(X_valid)
    log_loss_mc(y_valid, y_valid_predicted)


    # well, non-linear svc
    clf = SVC(C=1.0, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=False, cache_size=2000, class_weight=None, verbose=True, max_iter=-1)
    clf.fit(X_train, y_train)
    prob_pos = clf.decision_function(X_valid)
    prob_pos = \
            (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())
    y_valid_predicted = prob_pos
    log_loss_mc(y_valid, y_valid_predicted)
    # http://stackoverflow.com/questions/29873981/error-with-sklearn-calibratedclassifiercv-and-svm
    clf_isotonic = CalibratedClassifierCV(OneVsRestClassifier(clf), cv=5, method='isotonic')
    clf_isotonic.fit(X_train, y_train)
    y_valid_predicted = clf_isotonic.predict_proba(X_valid)
    log_loss_mc(y_valid, y_valid_predicted)


    # non-linear svc using sigmoidal
    # http://stackoverflow.com/questions/29873981/error-with-sklearn-calibratedclassifiercv-and-svm
    # probability=True
    clf = SVC(C=1.0, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=True, cache_size=2000, class_weight=None, verbose=True, max_iter=-1)
    clf.fit(X_train, y_train)
    y_valid_predicted = clf.predict_proba(X_valid)
    log_loss_mc(y_valid, y_valid_predicted)


    # nusvc, wtf?
    clf = NuSVC(nu=0.5, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=2000, verbose=True, max_iter=-1, random_state=None)
    clf.fit(X_train, y_train)
    prob_pos = clf.decision_function(X_valid)
    prob_pos = \
            (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())
    y_valid_predicted = prob_pos
    log_loss_mc(y_valid, y_valid_predicted)
    # http://stackoverflow.com/questions/29873981/error-with-sklearn-calibratedclassifiercv-and-svm
    clf_isotonic = CalibratedClassifierCV(OneVsRestClassifier(clf), cv=5, method='isotonic')
    clf_isotonic.fit(X_train, y_train)
    y_valid_predicted = clf_isotonic.predict_proba(X_valid)
    log_loss_mc(y_valid, y_valid_predicted)


    # nusvc using sigmoidal?
    clf = NuSVC(nu=0.5, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=True, tol=0.001, cache_size=2000, verbose=True, max_iter=-1, random_state=None)
    clf.fit(X_train, y_train)
    y_valid_predicted = clf.predict_proba(X_valid)
    log_loss_mc(y_valid, y_valid_predicted)
#.........这里部分代码省略.........
开发者ID:statsrocks,项目名称:kaggle-otto,代码行数:103,代码来源:m_sk.py


注:本文中的sklearn.ensemble.RandomForestClassifier.decision_function方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。