本文整理汇总了Python中sklearn.ensemble.forest.RandomForestClassifier.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestClassifier.predict_proba方法的具体用法?Python RandomForestClassifier.predict_proba怎么用?Python RandomForestClassifier.predict_proba使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.forest.RandomForestClassifier
的用法示例。
在下文中一共展示了RandomForestClassifier.predict_proba方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_RandomForest
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict_proba [as 别名]
def test_RandomForest(self):
X = [[0, 1], [1, 1]]
Y = [0, 1]
regression = RandomForestClassifier(n_estimators=10)
regression = regression.fit(X, Y)
regression.predict_proba(X)
示例2: MyRfClassifier
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict_proba [as 别名]
class MyRfClassifier(BaseClassifier):
def __init__(self, n_estimators, max_depth, min_samples_leaf):
self.classifier = RandomForestClassifier(**{'verbose':1, 'n_estimators': n_estimators,
'max_depth':max_depth,'min_samples_leaf':min_samples_leaf,
'n_jobs':40})
self.name = "rf_n{n}_md{md}_ms{ms}".format(
**{"n": n_estimators, "md": max_depth, "ms": min_samples_leaf}
)
def get_name(self):
return self.name
def fit(self, X, y, X_t, y_t):
return self.classifier.fit(X, y)
def predict_proba(self, X):
return self.classifier.predict_proba(X)
def get_feature_importances(self, feat_names):
ipts = dict(zip(feat_names, self.classifier.feature_importances_))
return ipts
示例3: year_train_test_split
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict_proba [as 别名]
X_train, X_test, y_train, y_test = year_train_test_split(
train_for_loo,
'WnvPresent_DateTrapSpecies',
year)
X_train.to_csv("data_per_year/" + str(year) + "X_train.csv", index=False)
X_test.to_csv("data_per_year/" + str(year) + "X_test.csv", index=False)
y_train.to_csv("data_per_year/" + str(year) + "y_train.csv", index=False)
y_test.to_csv("data_per_year/" + str(year) + "y_test.csv", index=False)
clf.fit(X_train, y_train)
# y_pred = clf.predict_proba(X_test) [:, 1] # Random Forest
y_pred = clf.predict_proba(X_test) # For XGB
score = metrics.roc_auc_score(y_test, y_pred)
scores.append(score)
#import operator
#feat_importances = dict(zip(X_train.columns, clf.feature_importances_))
#sorted_feat_importances = sorted(feat_importances.items(), key=operator.itemgetter(1))
#print(sorted_feat_importances)
total_pred = np.concatenate((total_pred, y_pred))
total_test = np.concatenate((total_test, y_test))
print("Global ROC score", metrics.roc_auc_score(total_test, total_pred))
print(scores)
示例4:
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict_proba [as 别名]
# Visualize tree
dot_data = StringIO.StringIO()
tree.export_graphviz(clf, out_file=dot_data, feature_names=list(data_tree.columns.values))
graph = pydot.graph_from_dot_data(dot_data.getvalue())
graph.write_pdf('dectree.pdf')
# Repeat on test set
y_test_pred = clf.predict(X_test)
print "Accuracy Test: {0:.3f}".format(metrics.accuracy_score(y_test, y_test_pred))
print
print "Classification report:"
print metrics.classification_report(y_test, y_test_pred)
print
print "Confusion matrix:"
print metrics.confusion_matrix(y_test, y_test_pred)
# Measure performance
y_pred = clf.predict_proba(X_train)
# Repeat on test set
y_test_pred = clf.predict_proba(X_test)
tt = g_test.as_matrix()
pred = tt* y_test_pred
ss = np.sum(pred, axis=1)
sss = ss.mean()
print sss
示例5: runns
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict_proba [as 别名]
def runns(resp_var, size_of_test_data,dataset,positive_class,predictor_var, n_estimators,important_features,dealing_with_nulls):
dataset = pd.read_csv('raw_data.csv', low_memory=False) # For testing purposes
#----DATA PREPROCESSING
#-------dealing with NULL values in the data
#----------remove the rows in which the response is null
dataset=dataset.dropna(subset=[resp_var])
#----------dealing with nulls
dataset=deal_with_nulls(dealing_with_nulls,dataset)
#----FEATURE SELECTION
#-------get predictors important in predicting the response
#-----------transform categorical predictors to dummy variables
predictors=dataset[predictor_var]
predictors=pd.get_dummies(predictors)
#-----------balance the classes in the response var
ros = RandomOverSampler(random_state=0)
resp=dataset[resp_var]
prds, resp = ros.fit_sample(predictors, resp)
#-----------fit the random forest classifier to give us the important predictors
rf_clf = RandomForestClassifier(n_estimators=n_estimators)
rf_clf.fit(prds,resp)
#-------get the important predictors
feature_imp = pd.Series(rf_clf.feature_importances_,
index=list(predictors.iloc[:,0:])).sort_values(ascending=False)
#-------names of the important predictors
important_predictor_names = feature_imp.index[0:important_features]
#-------subset the data to get only the important predictors and the response
resp=pd.DataFrame(data=resp,columns=[resp_var])
predictors=pd.DataFrame(prds,columns=list(predictors))
dataset=pd.concat([resp,predictors],axis=1)
#---------------------------------------------------------
#----MODEL TRAINING
#--------Remove the response variables from the features variables - axis 1 refers to the columns
m_data= dataset.drop(resp_var, axis = 1,inplace=False)
# Response variables are the values we want to predict
resp_var = np.array(dataset[resp_var])
dataset = pd.get_dummies(m_data)
# Saving feature names for later use
feature_list = list(m_data.columns)
# Convert to numpy array
dataset = np.array(dataset)
# Split the data into training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(dataset, resp_var, test_size = float(size_of_test_data), random_state = 402)
# Instantiate model with n_estimators decision trees
clf = RandomForestClassifier(n_jobs = 1,n_estimators = n_estimators, random_state = 142)
# Train the model on training data
clf.fit(train_features, train_labels)
# evaluation
predicted = clf.predict(test_features)
pred_prob = clf.predict_proba(test_features)
accuracy = accuracy_score(test_labels, predicted)
#confusion matrix
cnf = (confusion_matrix(test_labels,predicted))
#precision score
precision = precision_score(test_labels,predicted,pos_label=positive_class)
#avg pres
avg_precision = average_precision_score(test_labels,pred_prob[:,[1]])
#recall score
rec = recall_score(test_labels,predicted,pos_label=positive_class)
#f1 scorea
fscore = f1_score(test_labels,predicted,pos_label=positive_class)
#fbeta score
fbeta = fbeta_score(test_labels,predicted,beta=0.5)
#hamming_loss
hamming = hamming_loss(test_labels,predicted)
#jaccard similarity score
jaccard = jaccard_similarity_score(test_labels,predicted)
#logloss
logloss = log_loss(test_labels,predicted)
#zero-oneloss
zero_one = zero_one_loss(test_labels,predicted)
#auc roc
area_under_roc = roc_auc_score(test_labels,pred_prob[:,[1]])
#cohen_score
cohen = cohen_kappa_score(test_labels,predicted)
#mathews corr
mathews = matthews_corrcoef(test_labels,predicted)
# Variable importances from the important features selection stage
variable_importance_list = list(zip(prds, feature_imp))
output={"accuracy":accuracy,"precision":precision,"average precision":avg_precision,"recall":rec,"fscore":fscore,"fbeta":fbeta,"hamming":hamming,"jaccard":jaccard,"logloss":logloss,"zero_one":zero_one,"area_under_roc":area_under_roc,"cohen":cohen,"mathews":mathews}
output=json.dumps(output)
return output
示例6: flatten
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict_proba [as 别名]
from util import convert_gray_scale, flatten
Xr,Yr = training_set
Xe,Ye = test_set
Xr = flatten(convert_gray_scale(Xr))
Xe = flatten(convert_gray_scale(Xe))
rf = RandomForestClassifier(n_estimators=100, verbose=3, oob_score=True, compute_importances=True)
rf.fit(Xr, Yr)
Yp = rf.predict(Xe)
print np.mean(Yp == Ye)
Ypp = rf.predict_proba(Xe).max(axis=1)
plt.figure(1)
plt.clf()
plt.hist(Ypp[Yp == Ye], 50, color='b', normed=True, alpha=0.4,
label='classified')
plt.hist(Ypp[Yp != Ye], 50, color='r', normed=True, alpha=0.4,
label='misclassified')
plt.legend(loc='upper left')
plt.draw()
plt.show()
plt.figure(3)
plt.clf()
n = 0.01 * float(len(Yp))
示例7: StratifiedKFold
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict_proba [as 别名]
print mask.sum()
X = images[mask, ...].reshape(mask.sum(), np.prod(images.shape[1::]))
print X.shape
Y = classifications[mask]
acc = []
acc_correct = []
acc_incorrect = []
acc_x_incorrect = []
k_fold = 8
for train_inx, valid_inx in StratifiedKFold(Y, k_fold):
rf = RandomForestClassifier(n_estimators=100, verbose=0, oob_score=True, compute_importances=True)
rf.fit(X[train_inx], Y[train_inx])
Yp = rf.predict(X[valid_inx])
correct = Yp== Y[valid_inx]
rf.predict_proba(X[valid_inx])
p_correct = rf.predict_proba(X[valid_inx]).max(axis=1)
acc_correct.append(p_correct[correct])
acc_incorrect.append(p_correct[~correct])
score = correct.mean()
print score
acc.append(score)
acc_x_incorrect.append([images[mask][valid_inx[~correct]],
Y[valid_inx[~correct]],
Yp[~correct]])
print 'score', np.mean(acc)
rf = RandomForestClassifier(n_estimators=100, verbose=0, oob_score=True, compute_importances=True)
示例8: enumerate
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict_proba [as 别名]
clf = ensemble.GradientBoostingClassifier(**params)
clf.fit(X_train, y_train)
test_loss = np.zeros((params['n_estimators'],), dtype=np.float64)
train_loss = np.zeros((params['n_estimators'],), dtype=np.float64)
for i, y_pred in enumerate(clf.staged_decision_function(X_test)):
# clf.loss_ assumes that y_test[i] in {0, 1}
y_sig = (1.0 / (1.0 + np.exp(0.0 - y_pred)))
test_loss[i] = log_loss(y_test, y_sig)#clf.loss_(y_test, y_sig)
for i, y_pred in enumerate(clf.staged_decision_function(X_train)):
# clf.loss_ assumes that y_test[i] in {0, 1}
y_sig = (1.0 / (1.0 + np.exp(0.0 - y_pred)))
train_loss[i] = log_loss(y_train, y_sig)#clf.loss_(y_train, y_sig)
plt.figure()
plt.plot(test_loss, 'r', linewidth=2)
plt.plot(train_loss, 'g', linewidth=2)
plt.legend(['test', 'train'])
i = np.argmin(test_loss)
print('min log-loss: ', np.round(test_loss[i],2), ' iteration#: ', i)
rfc = RandomForestClassifier(random_state=241, n_estimators=i)
rfc.fit(X_train, y_train)
y_pred = rfc.predict_proba(X_test)
print('RandomForest log-loss: ', np.round(log_loss(y_test, y_pred),2))
示例9: print
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict_proba [as 别名]
# 'eval_metric': 'auc',
# 'eta': 0.1,
# 'silent': 1,
# 'max_delta_step': 1})
# 'Normal' 70 / 30 cross-validation
if do_cross_val == 1:
X_train, X_test, y_train, y_test = cross_validation.train_test_split(
X,
train.WnvPresent,
test_size=0.3,
random_state=0)
clf.fit(X_train, y_train)
y_pred = clf.predict_proba(X_test)
print(metrics.roc_auc_score(y_test, y_pred))
elif do_cross_val == 2:
# Leave-one-year-out cross-validation
scores = []
total_pred = np.array([])
total_test = np.array([])
for year in [2007, 2009, 2011, 2013]:
X_train,X_test, y_train, y_test, y_train_numMosquitos, y_test_numMosquitos = year_train_test_split(
train_for_loo,
'WnvPresent_DateTrapSpecies',
year)