本文整理汇总了Python中sklearn.ensemble.RandomForestClassifier.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestClassifier.predict_proba方法的具体用法?Python RandomForestClassifier.predict_proba怎么用?Python RandomForestClassifier.predict_proba使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.RandomForestClassifier
的用法示例。
在下文中一共展示了RandomForestClassifier.predict_proba方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: learning_curve
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def learning_curve():
n = 50000
nsteps = 10
full = cu.get_sample_data_frame(n)
data = full.ix[0 : int(n * 0.6) - 1].reset_index()
cval = full.ix[int(n * 0.6) : int(n * 0.8) - 1].reset_index()
test = full.ix[int(n * 0.8) : n - 1].reset_index()
step = len(data) / nsteps
ndata = len(data)
mvec = range(step, ndata + step, step)
test_features = features.extract_features(test)
data_error = []
cval_error = []
for i in range(len(mvec)):
m = mvec[i]
print "running for size", m
train = data.ix[0 : m - 1].reset_index()
fea = features.extract_features(train)
rf = RandomForestClassifier(n_estimators=50, verbose=0, compute_importances=False, n_jobs=5)
rf.fit(fea, train["OpenStatus"])
new_priors = cu.load_priors("train.csv")
old_priors = cu.compute_priors(train.OpenStatus)
# predict train
probs = rf.predict_proba(fea)
# probs = cu.cap_and_update_priors(old_priors, probs, new_priors, 0.001)
y_true = compute_y_true(train)
score = multiclass_log_loss(y_true, probs)
data_error.append(score)
# predict cval
probs = rf.predict_proba(test_features)
# probs = cu.cap_and_update_priors(old_priors, probs, new_priors, 0.001)
y_true = compute_y_true(test)
score = multiclass_log_loss(y_true, probs)
cval_error.append(score)
return mvec, data_error, cval_error
示例2: rforests
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def rforests(trainx, trainy, test, n_estimators=100, k=5):
trainy = np.ravel(trainy)
forest = RandomForestClassifier(n_estimators)
forest.fit(trainx, trainy)
prob_train = forest.predict_proba(trainx)
prob_test = forest.predict_proba(test)
# Since the index is the number of the country that's been chosen
# we can use these with argsort to get the maximum 5., we will have to do this
# for the entire matrix though.
sort_train = np.argsort(prob_train)[:,-k:]
sort_test = np.argsort(prob_test)[:,-k:]
# Now we need to transform these back to countries, but to map I need to
# have a dataframe.
col_names = []
for i in range(k):
name = "country_destination_" + str(i+1)
col_names.append(name)
pred_train = pd.DataFrame(sort_train, columns=col_names)
pred_test = pd.DataFrame(sort_test, columns=col_names)
for name in col_names:
pred_train[name] = pred_train[name].map(dicts.country)
pred_test[name] = pred_test[name].map(dicts.country)
pred_train = np.fliplr(pred_train)
pred_test = np.fliplr(pred_test)
return forest, pred_train, pred_test
示例3: main
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def main(job_id, params):
print job_id, params
params = get_params(params)
print job_id, params
crimes = np.load(DATA_FILE)
model = RandomForestClassifier(n_estimators=params['n_estimators'],
criterion=params['criterion'],
max_depth=None if params['max_depth'] < 1 else params['max_depth'],
min_samples_split=params['min_samples_split'],
min_samples_leaf=params['min_samples_leaf'],
max_features=params['max_features'],
min_weight_fraction_leaf=0.0,
max_leaf_nodes=None, bootstrap=True, oob_score=False, n_jobs=4,
random_state=42, verbose=0, warm_start=False, class_weight=None)
model.fit(crimes['features_train'], crimes['labels_train'])
loss_train = log_loss(crimes['labels_train'], model.predict_proba(crimes['features_train']))
loss_val = log_loss(crimes['labels_val'], model.predict_proba(crimes['features_val']))
loss_all = log_loss(crimes['labels'], model.predict_proba(crimes['features']))
print 'loss_all: ', loss_all
print 'loss_train: ', loss_train
print 'loss_val: ', loss_val
return loss_val
示例4: MyRandomForest
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
class MyRandomForest(MyClassifier):
def __init__(self, params=dict()):
self._params = params
self._rf = RandomForestClassifier(**(self._params))
def update_params(self, updates):
self._params.update(updates)
self._rf = RandomForestClassifier(**(self._params))
def fit(self, Xtrain, ytrain):
self._rf.fit(Xtrain, ytrain)
# def predict(self, Xtest, option = None):
# return self._extree.predict(Xtest)
def predict_proba(self, Xtest, option = None):
return self._rf.predict_proba(Xtest)[:, 1]
def predict_proba_multi(self, Xtest, option = None):
return self._rf.predict_proba(Xtest)
def plt_feature_importance(self, fname_list, f_range = list()):
importances = self._rf.feature_importances_
std = np.std([tree.feature_importances_ for tree in self._rf.estimators_], axis=0)
indices = np.argsort(importances)[::-1]
fname_array = np.array(fname_list)
if not f_range:
f_range = range(indices.shape[0])
n_f = len(f_range)
plt.figure()
plt.title("Random Forest Feature importances")
plt.barh(range(n_f), importances[indices[f_range]],
color="b", xerr=std[indices[f_range]], ecolor='k',align="center")
plt.yticks(range(n_f), fname_array[indices[f_range]])
plt.ylim([-1, n_f])
plt.show()
def list_feature_importance(self, fname_list, f_range = list(), return_list = False):
importances = self._rf.feature_importances_
indices = np.argsort(importances)[::-1]
print 'Random forest feature ranking:'
if not f_range :
f_range = range(indices.shape[0])
n_f = len(f_range)
for i in range(n_f):
f = f_range[i]
print '{0:d}. feature[{1:d}] {2:s} ({3:f})'.format(f + 1, indices[f], fname_list[indices[f]], importances[indices[f]])
if return_list:
return [indices[f_range[i]] for i in range(n_f)]
示例5: clfTestProb
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def clfTestProb(data, top, morph, runs):
isOrgan = []
for i in range (0,runs):
# take a sample for training, leave the rest for testing (cross-validation)
data_train, data_test, top_train, top_test, morph_train, morph_test = train_test_split(data,top, morph)
clf = RandomForestClassifier(n_estimators=10, verbose=2)
clf = clf.fit(data_train, top_train)
top_class = clf.classes_
clf_predict_top = clf.predict_proba(data_test)
clf = clf.fit(data_train, morph_train)
clf_predict_morph = clf.predict_proba(data_test)
morph_class = clf.classes_
strTop = []
strMorph = []
for prob in clf_predict_top:
i = prob.tolist().index(max(prob))
strTop.append( '{:.2f}'.format(max(prob)) + " " + str(top_class[i]))
for prob in clf_predict_morph:
i = prob.tolist().index(max(prob))
strMorph.append( '{:.2f}'.format(max(prob)) + " " + str(morph_class[i]))
for i,x in enumerate(strTop):
print(x + " " + strMorph[i])
示例6: train_predict
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def train_predict(train_file, test_file, predict_valid_file, predict_test_file,
n_est, depth, n_fold=5):
logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
level=logging.DEBUG, filename='rf_{}_{}.log'.format(
n_est, depth
))
logging.info('Loading training and test data...')
X, y = load_svmlight_file(train_file)
X_tst, _ = load_svmlight_file(test_file)
clf = RF(n_estimators=n_est, max_depth=depth, random_state=2015)
cv = StratifiedKFold(y, n_folds=n_fold, shuffle=True, random_state=2015)
logging.info('Cross validation...')
p_val = np.zeros_like(y)
lloss = 0.
for i_trn, i_val in cv:
clf.fit(X[i_trn], y[i_trn])
p_val[i_val] = clf.predict_proba(X[i_val])[:, 1]
lloss += log_loss(y[i_val], p_val[i_val])
logging.info('Log Loss = {:.4f}'.format(lloss))
logging.info('Retraining with 100% data...')
clf.fit(X.todense(), y)
p_tst = clf.predict_proba(X_tst.todense())[:, 1]
logging.info('Saving predictions...')
np.savetxt(predict_valid_file, p_val, fmt='%.6f')
np.savetxt(predict_test_file, p_tst, fmt='%.6f')
示例7: main
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def main():
train_f = pd.read_csv(train_path, header=0, parse_dates=['Dates'])
print train_f.dtypes
X, Y = get_feature(train_f, "training_set")
### TRAINING
# clf = GradientBoostingClassifier(n_estimators=50)
clf = RandomForestClassifier(n_estimators=200)
# clf = LogisticRegression(n_jobs=4)
X, Y = shuffle_XY(X, Y)
data_len = len(X)
train_len = data_len * 95 / 100
val_len = data_len - train_len
X_train = X[:train_len]
X_val = X[train_len:]
Y_train = Y[:train_len]
Y_val = Y[train_len:]
clf = clf.fit(X_train, Y_train)
print "Training done"
# train_acc = clf.score(X_train, Y_train)
# print "Train acc:", train_acc
val_acc = clf.score(X_val, Y_val)
print "Val acc:", val_acc
val_pred = clf.predict_proba(X_val)
val_log = 0.0
cnt = 0
for y in Y_val:
val_log += math.log(val_pred[cnt, y]+0.0000001)
cnt += 1
val_log = - val_log / len(Y_val)
print "Val log loss:", val_log
# print max(Y_val), min(Y_val)
# print Y_val, Y_val + 1
# print "Val loss:", log_loss(Y_val+1, val_pred) # Note the +1 here!
"""
# scores = cross_val_score(clf, X, Y)
# print "Cross val acc:", scores.mean()
"""
### Testing
test_f = pd.read_csv(test_path, header=0, parse_dates=['Dates'])
# print test_f.dtypes
X_test, _ = get_feature(test_f, "test_set")
Y_test = clf.predict_proba(X_test)
### Write results
# write_results(Y_test)
write_results_prob(Y_test)
示例8: tree_trans
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def tree_trans(trainer, labels, test_w, test_g):
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
#clf = DecisionTreeClassifier(max_depth=3)
clf = RandomForestClassifier(n_estimators=700,max_depth=6, min_samples_split=10, min_samples_leaf=10)
ts = time.time()
clf.fit(trainer, labels)
print 'Trees training: ', (time.time() - ts)
ts = time.time()
prob_w = np.zeros(test_w.shape[0], dtype=[('Forest Output Prob', 'f8')])
prob_g = np.zeros(test_w.shape[0], dtype=[('Forest Output Prob', 'f8')])
print prob_w.shape
print test_w.shape
print clf.predict_proba(test_w).shape
print clf.predict_proba(test_w)[:,0].shape
prob_w['Forest Output Prob'] = clf.predict_proba(test_w)[:,0]
prob_g['Forest Output Prob'] = clf.predict_proba(test_g)[:,0]
print 'Trees transforming: ', (time.time() - ts)
return prob_w, prob_g
h_w, bin_edges = np.histogram(prob_w, 20, (0,1))
h_g, bin_edges = np.histogram(prob_g, 20, (0,1))
bin_centers = (bin_edges[0:-1] + bin_edges[1:])/2
fig = plt.figure()
ebkw = {'linewidth':1,}
ax = fig.add_subplot(111)
ax.errorbar(bin_centers, h_w, np.sqrt(h_w),label=w_label ,color='g', **ebkw)
ax.errorbar(bin_centers, h_g, np.sqrt(h_g),label=g_label ,color='b', **ebkw)
ax.set_xlabel('Decision Tree Ouput Prob', size='x-large')
ax.set_ylabel('Occupancy', size='x-large')
plt.legend()
示例9: RFC_Classifier
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def RFC_Classifier(X_train, X_cv, X_test, Y_train,Y_cv,Y_test, Actual_DS):
print("***************Starting Random Forest Classifier***************")
t0 = time()
clf = RandomForestClassifier(n_estimators=500,n_jobs=1)
clf.fit(X_train, Y_train)
preds = clf.predict(X_cv)
score = clf.score(X_cv,Y_cv)
print("Random Forest Classifier - {0:.2f}%".format(100 * score))
Summary = pd.crosstab(label_enc.inverse_transform(Y_cv), label_enc.inverse_transform(preds),
rownames=['actual'], colnames=['preds'])
Summary['pct'] = (Summary.divide(Summary.sum(axis=1), axis=1)).max(axis=1)*100
print(Summary)
#Check with log loss function
epsilon = 1e-15
#ll_output = log_loss_func(Y_cv, preds, epsilon)
preds2 = clf.predict_proba(X_cv)
ll_output2= log_loss(Y_cv, preds2, eps=1e-15, normalize=True)
print(ll_output2)
print("done in %0.3fs" % (time() - t0))
preds3 = clf.predict_proba(X_test)
print("x_test done")
#preds4 = clf.predict_proba((Actual_DS.ix[:,'feat_1':]))
preds4 = clf.predict_proba(Actual_DS)
print("***************Ending Random Forest Classifier***************")
return pd.DataFrame(preds2) , pd.DataFrame(preds3),pd.DataFrame(preds4)
示例10: rf_grid_search
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def rf_grid_search():
train_inp,valid_inp,train_target,valid_target = prepare_input()
#set up scorer for grid search. log-loss is error, not score, so set greater_is_better to false,
#and log-loss requires a probability
log_loss_scorer = make_scorer(log_loss,greater_is_better=False,needs_proba=True)
train_inp = train_inp[:100000]
train_target = train_target[:100000]
start = time.time()
random_forest = RandomForestClassifier(random_state=31)
# r_forest_parameters = {'n_estimators' : [120,300,500,800,1200],'max_depth':[5,8,15,25,30,None],'max_features':['log2','sqrt',None],
# 'min_samples_split':[1,2,5,10,15,100],'min_samples_leaf':[1,2,5,10]}
#75.1 minutes to run with these paramters - 72 fits
r_forest_parameters = {'min_samples_split':[2,5,10,20,50,100],'min_samples_leaf':[1,2,5,10,50,100]}
#grid search too slow to not use all cores, and wayyyy too slow to have no output.
r_forest_grid_obj = GridSearchCV(random_forest,r_forest_parameters,log_loss_scorer,verbose=2,n_jobs=-1)
r_forest_grid_obj = r_forest_grid_obj.fit(train_inp,train_target)
random_forest = r_forest_grid_obj.best_estimator_
print "Best params: " + str(r_forest_grid_obj.best_params_)
random_forest_train_error = log_loss(train_target,random_forest.predict_proba(train_inp))
random_forest_validation_error = log_loss(valid_target,random_forest.predict_proba(valid_inp))
print "Best random forest training error: {:02.4f}".format(random_forest_train_error)
print "Best random forest validation error: {:02.4f}".format(random_forest_validation_error)
end = time.time()
print "RF grid search took {:02.4f} seconds".format(end-start)
return random_forest
示例11: RandomForrestClassifierStep
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
class RandomForrestClassifierStep(SklearnStep):
def __init__(self,
n_estimators=10,
criterion='gini',
max_features='auto',
max_depth=None):
super(RandomForrestClassifierStep, self).__init__()
self._n_estimator=n_estimators
self._criterion = criterion
self._max_features= max_features
self._max_depth = max_depth
def fit_transform(self):
self._model = RandomForestClassifier(n_estimators=self._n_estimator,
criterion=self._criterion,
max_depth=self._max_depth,
max_features=self._max_features)
x, y = load_svmlight(self._input_path)
self._model.fit(x, y)
scores = self._model.predict_proba(x)
save_numpy_txt(scores, self._output_path)
def transform(self, x=None):
if not x:
x, _ = load_svmlight(self._test_input_path)
transformed_x = self._model.predict_proba(x)
save_numpy_txt(transformed_x, self._output_path)
else:
transformed_x = self._model.predict_proba(x)
return transformed_x
def predict(self, features):
return self._model.predict_proba(features)
示例12: rf_fit
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def rf_fit():
train_inp,valid_inp,train_target,valid_target = prepare_input()
rf = RandomForestClassifier(random_state=31,n_jobs=-1,verbose=1,n_estimators=100,min_samples_split=5)
start = time.time()
rf.fit(train_inp,train_target)
end = time.time()
print "fitting took {:0.4} seconds".format(end-start)
training_output = rf.predict_proba(train_inp)
validation_output = rf.predict_proba(valid_inp)
training_error = log_loss(train_target,training_output)
validation_error = log_loss(valid_target,validation_output)
print "Train error: {:02.4f}".format(training_error)
print "Validation error: {:02.4f}".format(validation_error)
joblib.dump(rf,rf_filename)
return rf
示例13: modelSelection
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def modelSelection(x_train, y_train, x_test, y_test, model, n_folds):
"""
Select various models and return the AUCs of training and test sets and predicted offer acceptance probabilities.
"""
if model == "Random Forest":
clf = RandomForestClassifier(n_estimators=150, oob_score=True, random_state=0, min_samples_split=1)
elif model == "Logistic Regression L1":
clf = LogisticRegression(penalty='l1', random_state=0, class_weight='auto')
elif model == "Logistic Regression L2":
clf = LogisticRegression(penalty='l2', random_state=0, class_weight='auto')
elif model == "Decision Tree":
clf = DecisionTreeClassifier(random_state=0)
elif model == "Naive Bayes":
clf = GaussianNB()
elif model == "KNN":
clf = KNeighborsClassifier(n_neighbors=10)
# Perform cross-validation on training dataset and calculate AUC
cv = StratifiedKFold(y_train, n_folds=n_folds)
auc_train = []
auc_validation = []
auc_test = []
pred_prob = []
for i, (train, validation) in enumerate(cv):
clf = clf.fit(x_train[train], y_train[train])
auc_train.append(metrics.roc_auc_score(y_train[train], clf.predict_proba(x_train[train])[:, 1]))
auc_validation.append(metrics.roc_auc_score(y_train[validation], clf.predict_proba(x_train[validation])[:, 1]))
auc_test.append(metrics.roc_auc_score(y_test, clf.predict_proba(x_test)[:, 1]))
pred_prob.append(clf.predict_proba(x_test)[:, 1])
return np.mean(auc_train), np.mean(auc_validation), np.mean(auc_test), np.mean(pred_prob, axis=0)
示例14: classifiers_accuracy
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def classifiers_accuracy():
path = os.path.join(root_dir, "generated")
td = np.load(os.path.join(path, "training_data", "training_data.npy"))
# ----------- Data set separation ----------- #
# whole set
rec_len = len(td[:, :][0]) - 1 # in index (starting from 0)
data_x = td[:, :rec_len]
data_y = td[:, rec_len]
# training set
x = data_x[:-TRAINING_SIZE]
y = data_y[:-TRAINING_SIZE]
# testing set
t_x = data_x[-TRAINING_SIZE:]
t_y = data_y[-TRAINING_SIZE:]
# ----------- Fix Class Imbalance ----------- #
OS = OverSampler(random_state=1)
ox, oy = OS.fit_transform(x, y)
smote = SMOTE(random_state=1)
sx, sy = smote.fit_transform(x, y)
bsmote1 = bSMOTE1(random_state=1)
bsx1, bsy1 = bsmote1.fit_transform(x, y)
# ----------- Train and Predict ----------- #
# predict() will just say whether it's a attack,
# but predict_proba() will say the probability (this is important)
# Over Sampler data
clf_1 = RandomForestClassifier(n_estimators=100, n_jobs=2)
clf_1.fit(ox, oy)
p_1 = clf_1.predict_proba(t_x)
# print("p_1 : ", p_1)
# SMOTE data
clf_2 = RandomForestClassifier(n_estimators=100, n_jobs=2)
clf_2.fit(sx, sy)
p_2 = clf_2.predict_proba(t_x)
# print("p_2: ", p_2)
# BSMOTE data
clf_3 = RandomForestClassifier(n_estimators=100, n_jobs=2)
clf_3.fit(bsx1, bsy1)
p_3 = clf_3.predict_proba(t_x)
# print("p_3 : ", p_3)
print "{0} \t{1} \t\t{2} \t{3}".format("actual", "os", "smote", "bsmote")
for i in range(0, TRAINING_SIZE):
a = t_y[i]
o = p_1[i][1]
s = p_2[i][1]
b = p_3[i][1]
if a != 0. or o != 0. or s != 0. or b != 0.:
print "{0} \t{1} \t{2} \t{3}".format(a, o, s, b)
pass
示例15: main
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def main(argv=None):
iris = load_iris()
rf = RandomForestClassifier(max_depth = 4)
idx = range(len(iris.target))
np.random.shuffle(idx)
rf.fit(iris.data[idx][:100], iris.target[idx][:100])
instance = iris.data[idx][100:101]
print rf.predict_proba(instance)