本文整理汇总了Python中sklearn.ensemble.RandomForestClassifier.decision_function方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestClassifier.decision_function方法的具体用法?Python RandomForestClassifier.decision_function怎么用?Python RandomForestClassifier.decision_function使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.RandomForestClassifier
的用法示例。
在下文中一共展示了RandomForestClassifier.decision_function方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: RandomForestClassifier
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import decision_function [as 别名]
#rf = RandomForestClassifier(n_estimators=100, max_features='auto', n_jobs=4)
rf = RandomForestClassifier(n_estimators=100, max_features=15, n_jobs=4, max_depth=8)
#rf = RandomForestClassifier(n_estimators=100, max_features='auto', n_jobs=4, max_depth=5)
#training
st = time.time()
print "training started"
rf.fit( x_train, y_train )
print "training ended"
et = time.time()
tt = et - st
print "Training Time = " + str(tt) + "\n"
#predictions
pred = rf.predict( x_test )
y_score = rf.decision_function(x_test)
out = open('../results/rf_combi_yes.txt','w')
#validation
total = y_test.size
good = 0
bad = 0
for i in range(total):
a = y_test[i]
p = pred[i]
line = str(a) + ',' + str(p) + '\n'
out.write(line)
if str(a) == str(p):
good = good + 1;
else:
bad = bad + 1;
示例2: perform_experiment
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import decision_function [as 别名]
def perform_experiment(train_fs, test_fs, avstats_in, binarize,
classifier='RF', subsample=False):
print('Performing experiment')
res = []
key_dates = []
avstats = collections.defaultdict(int)
for w, (f_tr, f_te) in enumerate(zip(train_fs, test_fs), start=1):
# Load test dates
dates = numpy.array(load_dates(f_te))
week_s, week_e = dates.min(), dates.max()
key_dates.append(week_s)
print('\nPeriod {} [{} - {}]'.format(w, week_s, week_e))
# Load training data
with warnings.catch_warnings():
warnings.simplefilter("ignore")
X_tr, y_tr = datasets.load_svmlight_file(f_tr)
print(X_tr.shape)
if subsample:
new_size = int(round(X_tr.shape[0] * subsample))
subsam = numpy.random.choice(X_tr.shape[0], new_size)
X_tr = X_tr[subsam, :]
y_tr = y_tr[subsam]
if binarize:
X_tr.data = numpy.ones_like(X_tr.data)
X_tr = X_tr.toarray()
# Train classifier
if classifier == 'RF':
clf = RFC(n_estimators=200, n_jobs=1 if subsample else -1)
elif classifier == 'SVM':
clf = SVC(kernel='rbf', gamma=0.0025, C=12)
sample_weight = None
print('Training set size: {}'.format(X_tr.shape))
clf.fit(X_tr, y_tr, sample_weight=sample_weight)
tr_n_feats = X_tr.shape[1]
del X_tr
# Load and classify test data
with warnings.catch_warnings():
warnings.simplefilter("ignore")
X_te, y_te = datasets.load_svmlight_file(f_te, n_features=tr_n_feats)
if binarize:
X_te.data = numpy.ones_like(X_te.data)
X_te = X_te.toarray()
print('Test set size: {}'.format(X_te.shape))
y_pr = clf.predict(X_te)
if classifier == 'RF':
y_val = clf.predict_proba(X_te)[:, 1]
elif classifier == 'SVM':
y_val = clf.decision_function(X_te)
del X_te
# Evaluate experimental results
res.append(experiment_stats(y_tr, y_te, y_pr, y_val))
# Load file IDs
fileIDs = numpy.array(
load_SHA256_sums(f_te))[numpy.where(y_te > 0.5)]
# Update AV detection results
for fid in fileIDs:
avstats['Total'] += 1
if fid in avstats_in:
for av, det in avstats_in[fid]['report'].iteritems():
if det:
avstats[av] += 1
del fileIDs
avstats['Hidost'] += numpy.logical_and(y_te == y_pr, y_te > 0.5).sum()
res = numpy.concatenate(res)
return res, key_dates, avstats
示例3: main
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import decision_function [as 别名]
def main():
train_size = 0.8
X_train, X_valid, y_train, y_valid, scaler = load_train_data(train_size=train_size, scale_it=True, square_root_it=True)
X_test, X_test_ids = load_test_data(scaler=scaler, square_root_it=True)
full_X_train, _, full_y_train, _, full_scaler = load_train_data(full_train=True, scale_it=True, square_root_it=True)
X_test_for_full, X_test_ids = load_test_data(scaler=full_scaler, square_root_it=True)
# logistic
# loss = ~0.6...
# clf = LogisticRegression()
# clf.fit(X_train, y_train)
# clf_isotonic = CalibratedClassifierCV(clf, cv=5, method='isotonic')
# clf_isotonic.fit(X_train, y_train)
# y_valid_predicted = clf_isotonic.predict_proba(X_valid)
# log_loss_mc(y_valid, y_valid_predicted)
# gnb
# loss = ~1.6...
# clf = GaussianNB()
# clf.fit(X_train, y_train)
# clf_isotonic = CalibratedClassifierCV(clf, cv=5, method='isotonic')
# clf_isotonic.fit(X_train, y_train)
# y_valid_predicted = clf_isotonic.predict_proba(X_valid)
# log_loss_mc(y_valid, y_valid_predicted)
# rf
# when n_estimators=100, without calibration, loss = ~0.6
# when n_estimators=100, with calibration, loss = ~0.483
clf = RandomForestClassifier(n_estimators=600, n_jobs=-1, verbose=1)
clf.fit(X_train, y_train)
clf_isotonic = CalibratedClassifierCV(clf, cv=5, method='isotonic')
clf_isotonic.fit(X_train, y_train)
y_valid_predicted = clf_isotonic.predict_proba(X_valid)
log_loss_mc(y_valid, y_valid_predicted)
# linear svc
clf = LinearSVC(C=1.0, verbose=2)
clf.fit(X_train, y_train)
prob_pos = clf.decision_function(X_valid)
prob_pos = \
(prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())
y_valid_predicted = prob_pos
log_loss_mc(y_valid, y_valid_predicted)
clf_isotonic = CalibratedClassifierCV(clf, cv=5, method='isotonic')
clf_isotonic.fit(X_train, y_train)
y_valid_predicted = clf_isotonic.predict_proba(X_valid)
log_loss_mc(y_valid, y_valid_predicted)
# well, non-linear svc
clf = SVC(C=1.0, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=False, cache_size=2000, class_weight=None, verbose=True, max_iter=-1)
clf.fit(X_train, y_train)
prob_pos = clf.decision_function(X_valid)
prob_pos = \
(prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())
y_valid_predicted = prob_pos
log_loss_mc(y_valid, y_valid_predicted)
# http://stackoverflow.com/questions/29873981/error-with-sklearn-calibratedclassifiercv-and-svm
clf_isotonic = CalibratedClassifierCV(OneVsRestClassifier(clf), cv=5, method='isotonic')
clf_isotonic.fit(X_train, y_train)
y_valid_predicted = clf_isotonic.predict_proba(X_valid)
log_loss_mc(y_valid, y_valid_predicted)
# non-linear svc using sigmoidal
# http://stackoverflow.com/questions/29873981/error-with-sklearn-calibratedclassifiercv-and-svm
# probability=True
clf = SVC(C=1.0, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=True, cache_size=2000, class_weight=None, verbose=True, max_iter=-1)
clf.fit(X_train, y_train)
y_valid_predicted = clf.predict_proba(X_valid)
log_loss_mc(y_valid, y_valid_predicted)
# nusvc, wtf?
clf = NuSVC(nu=0.5, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=2000, verbose=True, max_iter=-1, random_state=None)
clf.fit(X_train, y_train)
prob_pos = clf.decision_function(X_valid)
prob_pos = \
(prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())
y_valid_predicted = prob_pos
log_loss_mc(y_valid, y_valid_predicted)
# http://stackoverflow.com/questions/29873981/error-with-sklearn-calibratedclassifiercv-and-svm
clf_isotonic = CalibratedClassifierCV(OneVsRestClassifier(clf), cv=5, method='isotonic')
clf_isotonic.fit(X_train, y_train)
y_valid_predicted = clf_isotonic.predict_proba(X_valid)
log_loss_mc(y_valid, y_valid_predicted)
# nusvc using sigmoidal?
clf = NuSVC(nu=0.5, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=True, tol=0.001, cache_size=2000, verbose=True, max_iter=-1, random_state=None)
clf.fit(X_train, y_train)
y_valid_predicted = clf.predict_proba(X_valid)
log_loss_mc(y_valid, y_valid_predicted)
#.........这里部分代码省略.........