当前位置: 首页>>代码示例>>Python>>正文


Python RandomForestClassifier.predict_proba方法代码示例

本文整理汇总了Python中sklearn.ensemble.forest.RandomForestClassifier.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestClassifier.predict_proba方法的具体用法?Python RandomForestClassifier.predict_proba怎么用?Python RandomForestClassifier.predict_proba使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.forest.RandomForestClassifier的用法示例。


在下文中一共展示了RandomForestClassifier.predict_proba方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_RandomForest

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict_proba [as 别名]
    def test_RandomForest(self):
        X = [[0, 1], [1, 1]]
        Y = [0, 1]

        regression = RandomForestClassifier(n_estimators=10)
        regression = regression.fit(X, Y)
        regression.predict_proba(X)
开发者ID:lgadawski,项目名称:spdb-driver-telematics,代码行数:9,代码来源:test_driver_functions.py

示例2: MyRfClassifier

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict_proba [as 别名]
class MyRfClassifier(BaseClassifier):
    def __init__(self, n_estimators, max_depth, min_samples_leaf):
        self.classifier = RandomForestClassifier(**{'verbose':1, 'n_estimators': n_estimators,
                                                    'max_depth':max_depth,'min_samples_leaf':min_samples_leaf,
                                                    'n_jobs':40})
        self.name = "rf_n{n}_md{md}_ms{ms}".format(
            **{"n": n_estimators, "md": max_depth, "ms": min_samples_leaf}
        )
    def get_name(self):
        return self.name

    def fit(self, X, y, X_t, y_t):
        return self.classifier.fit(X, y)

    def predict_proba(self, X):
        return self.classifier.predict_proba(X)

    def get_feature_importances(self, feat_names):
        ipts = dict(zip(feat_names, self.classifier.feature_importances_))
        return ipts
开发者ID:hongbin0908,项目名称:pytrade,代码行数:22,代码来源:tree.py

示例3: year_train_test_split

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict_proba [as 别名]
        X_train, X_test, y_train, y_test = year_train_test_split(
            train_for_loo,
            'WnvPresent_DateTrapSpecies',
            year)      

        X_train.to_csv("data_per_year/" + str(year) + "X_train.csv", index=False)
        X_test.to_csv("data_per_year/" + str(year) + "X_test.csv", index=False)
        y_train.to_csv("data_per_year/" + str(year) + "y_train.csv", index=False)
        y_test.to_csv("data_per_year/" + str(year) + "y_test.csv", index=False)

        
        clf.fit(X_train, y_train)

        # y_pred = clf.predict_proba(X_test) [:, 1] # Random Forest
        y_pred = clf.predict_proba(X_test) # For XGB
        
        score = metrics.roc_auc_score(y_test, y_pred)
        scores.append(score)
        
        #import operator
        #feat_importances = dict(zip(X_train.columns, clf.feature_importances_))
        #sorted_feat_importances = sorted(feat_importances.items(), key=operator.itemgetter(1))
        #print(sorted_feat_importances)
        
        total_pred = np.concatenate((total_pred, y_pred))
        total_test = np.concatenate((total_test, y_test))
        
    print("Global ROC score", metrics.roc_auc_score(total_test, total_pred))
        
    print(scores)
开发者ID:Pold87,项目名称:pikki-virus,代码行数:32,代码来源:crossvalidate_features_to_select.py

示例4:

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict_proba [as 别名]
# Visualize tree
dot_data = StringIO.StringIO()
tree.export_graphviz(clf, out_file=dot_data, feature_names=list(data_tree.columns.values))
graph = pydot.graph_from_dot_data(dot_data.getvalue())
graph.write_pdf('dectree.pdf')


# Repeat on test set
y_test_pred = clf.predict(X_test)
print "Accuracy Test: {0:.3f}".format(metrics.accuracy_score(y_test, y_test_pred))
print
print "Classification report:"
print metrics.classification_report(y_test, y_test_pred)
print 
print "Confusion matrix:"
print metrics.confusion_matrix(y_test, y_test_pred)

# Measure performance
y_pred = clf.predict_proba(X_train)

# Repeat on test set
y_test_pred = clf.predict_proba(X_test)

tt = g_test.as_matrix()
pred = tt* y_test_pred

ss = np.sum(pred, axis=1)

sss = ss.mean()

print sss
开发者ID:JPLindsen,项目名称:GA_Data_Science_Project,代码行数:33,代码来源:model_fit.py

示例5: runns

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict_proba [as 别名]
	def runns(resp_var, size_of_test_data,dataset,positive_class,predictor_var, n_estimators,important_features,dealing_with_nulls):
		dataset = pd.read_csv('raw_data.csv', low_memory=False) # For testing purposes
		#----DATA PREPROCESSING
		#-------dealing with NULL values in the data
		#----------remove the rows in which the response is null

		dataset=dataset.dropna(subset=[resp_var])
		#----------dealing with nulls
		dataset=deal_with_nulls(dealing_with_nulls,dataset)
		#----FEATURE SELECTION
		#-------get predictors important in predicting the response
		#-----------transform categorical predictors to dummy variables
		predictors=dataset[predictor_var]
		predictors=pd.get_dummies(predictors)
		#-----------balance the classes in the response var
		ros = RandomOverSampler(random_state=0)
		resp=dataset[resp_var]
		prds, resp = ros.fit_sample(predictors, resp)
		#-----------fit the random forest classifier to give us the important predictors
		rf_clf = RandomForestClassifier(n_estimators=n_estimators)
		rf_clf.fit(prds,resp)
		#-------get the important predictors
		feature_imp = pd.Series(rf_clf.feature_importances_,
						index=list(predictors.iloc[:,0:])).sort_values(ascending=False)
		#-------names of the important predictors
		important_predictor_names = feature_imp.index[0:important_features]
		#-------subset the data to get only the important predictors and the response
		resp=pd.DataFrame(data=resp,columns=[resp_var])
		predictors=pd.DataFrame(prds,columns=list(predictors))
		dataset=pd.concat([resp,predictors],axis=1)
		#---------------------------------------------------------
		#----MODEL TRAINING
		#--------Remove the response variables from the features variables - axis 1 refers to the columns
		m_data= dataset.drop(resp_var, axis = 1,inplace=False) 
		# Response variables are the values we want to predict
		resp_var = np.array(dataset[resp_var])

		dataset = pd.get_dummies(m_data)
		
		# Saving feature names for later use
		feature_list = list(m_data.columns)
		# Convert to numpy array
		dataset = np.array(dataset)

		# Split the data into training and testing sets
		train_features, test_features, train_labels, test_labels = train_test_split(dataset, resp_var, test_size = float(size_of_test_data), random_state = 402)

		# Instantiate model with n_estimators decision trees
		clf = RandomForestClassifier(n_jobs = 1,n_estimators = n_estimators, random_state = 142)

		# Train the model on training data
		clf.fit(train_features, train_labels)
		# evaluation
		predicted = clf.predict(test_features)
		pred_prob = clf.predict_proba(test_features)
		
		accuracy = accuracy_score(test_labels, predicted)
		#confusion matrix
		cnf = (confusion_matrix(test_labels,predicted))
		#precision score
		precision = precision_score(test_labels,predicted,pos_label=positive_class)
		#avg pres
		avg_precision = average_precision_score(test_labels,pred_prob[:,[1]])
		#recall score
		rec = recall_score(test_labels,predicted,pos_label=positive_class)
		#f1 scorea
		fscore = f1_score(test_labels,predicted,pos_label=positive_class)
		#fbeta score
		fbeta = fbeta_score(test_labels,predicted,beta=0.5)
		#hamming_loss
		hamming = hamming_loss(test_labels,predicted)
		#jaccard similarity score
		jaccard = jaccard_similarity_score(test_labels,predicted)
		#logloss
		logloss = log_loss(test_labels,predicted)
		#zero-oneloss
		zero_one = zero_one_loss(test_labels,predicted)
		#auc roc 
		area_under_roc = roc_auc_score(test_labels,pred_prob[:,[1]])
		#cohen_score
		cohen = cohen_kappa_score(test_labels,predicted)
		#mathews corr
		mathews = matthews_corrcoef(test_labels,predicted)
		# Variable importances from the important features selection stage
		variable_importance_list = list(zip(prds, feature_imp))
		output={"accuracy":accuracy,"precision":precision,"average precision":avg_precision,"recall":rec,"fscore":fscore,"fbeta":fbeta,"hamming":hamming,"jaccard":jaccard,"logloss":logloss,"zero_one":zero_one,"area_under_roc":area_under_roc,"cohen":cohen,"mathews":mathews}
		output=json.dumps(output)
		return output
开发者ID:ghollah,项目名称:ServingMLAPIs,代码行数:90,代码来源:random_forest.py

示例6: flatten

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict_proba [as 别名]
from util import convert_gray_scale, flatten


Xr,Yr = training_set
Xe,Ye = test_set

Xr = flatten(convert_gray_scale(Xr))
Xe = flatten(convert_gray_scale(Xe))

rf = RandomForestClassifier(n_estimators=100, verbose=3, oob_score=True, compute_importances=True)
rf.fit(Xr, Yr)

Yp = rf.predict(Xe)
print np.mean(Yp == Ye)

Ypp = rf.predict_proba(Xe).max(axis=1)

plt.figure(1)
plt.clf()
plt.hist(Ypp[Yp == Ye], 50, color='b', normed=True, alpha=0.4,
         label='classified')
plt.hist(Ypp[Yp != Ye], 50, color='r', normed=True, alpha=0.4,
         label='misclassified')
plt.legend(loc='upper left')
plt.draw()
plt.show()

plt.figure(3)
plt.clf()

n = 0.01 * float(len(Yp))
开发者ID:matthagy,项目名称:sc2_label_models,代码行数:33,代码来源:gs_random_forest.py

示例7: StratifiedKFold

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict_proba [as 别名]
print mask.sum()
X = images[mask, ...].reshape(mask.sum(), np.prod(images.shape[1::]))
print X.shape
Y = classifications[mask]

acc = []
acc_correct = []
acc_incorrect = []
acc_x_incorrect = []
k_fold = 8
for train_inx, valid_inx in StratifiedKFold(Y, k_fold):
    rf = RandomForestClassifier(n_estimators=100, verbose=0, oob_score=True, compute_importances=True)
    rf.fit(X[train_inx], Y[train_inx])
    Yp = rf.predict(X[valid_inx])
    correct = Yp== Y[valid_inx]
    rf.predict_proba(X[valid_inx])
    p_correct = rf.predict_proba(X[valid_inx]).max(axis=1)
    acc_correct.append(p_correct[correct])
    acc_incorrect.append(p_correct[~correct])

    score = correct.mean()
    print score
    acc.append(score)

    acc_x_incorrect.append([images[mask][valid_inx[~correct]],
                            Y[valid_inx[~correct]],
                            Yp[~correct]])

print 'score', np.mean(acc)

rf = RandomForestClassifier(n_estimators=100, verbose=0, oob_score=True, compute_importances=True)
开发者ID:matthagy,项目名称:sc2_timer,代码行数:33,代码来源:rf_digits.py

示例8: enumerate

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict_proba [as 别名]
clf = ensemble.GradientBoostingClassifier(**params)
clf.fit(X_train, y_train)

test_loss = np.zeros((params['n_estimators'],), dtype=np.float64)
train_loss = np.zeros((params['n_estimators'],), dtype=np.float64)

for i, y_pred in enumerate(clf.staged_decision_function(X_test)):
    # clf.loss_ assumes that y_test[i] in {0, 1}
    y_sig = (1.0 / (1.0 + np.exp(0.0 - y_pred)))
    test_loss[i] = log_loss(y_test, y_sig)#clf.loss_(y_test, y_sig)

for i, y_pred in enumerate(clf.staged_decision_function(X_train)):
    # clf.loss_ assumes that y_test[i] in {0, 1}
    y_sig = (1.0 / (1.0 + np.exp(0.0 - y_pred)))
    train_loss[i] = log_loss(y_train, y_sig)#clf.loss_(y_train, y_sig)

plt.figure()
plt.plot(test_loss, 'r', linewidth=2)
plt.plot(train_loss, 'g', linewidth=2)
plt.legend(['test', 'train'])

i = np.argmin(test_loss)
    
print('min log-loss: ', np.round(test_loss[i],2), ' iteration#: ', i)

rfc = RandomForestClassifier(random_state=241, n_estimators=i)
rfc.fit(X_train, y_train)
y_pred = rfc.predict_proba(X_test)

print('RandomForest log-loss: ', np.round(log_loss(y_test, y_pred),2))
开发者ID:eugene-batalov,项目名称:coursera_machine_learning,代码行数:32,代码来源:gbm.py

示例9: print

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict_proba [as 别名]
#                  'eval_metric': 'auc',
#                  'eta': 0.1,
#                  'silent': 1,
#                  'max_delta_step': 1})

# 'Normal' 70 / 30 cross-validation
if do_cross_val == 1:
    X_train, X_test, y_train, y_test = cross_validation.train_test_split(
        X,
        train.WnvPresent,
        test_size=0.3,
        random_state=0)

    clf.fit(X_train, y_train)

    y_pred = clf.predict_proba(X_test)
    print(metrics.roc_auc_score(y_test, y_pred))

elif do_cross_val == 2:

    # Leave-one-year-out cross-validation
    scores = []
    total_pred = np.array([])
    total_test = np.array([])
    
    for year in [2007, 2009, 2011, 2013]:

        X_train,X_test, y_train, y_test, y_train_numMosquitos, y_test_numMosquitos = year_train_test_split(
            train_for_loo,
            'WnvPresent_DateTrapSpecies',
            year)      
开发者ID:Pold87,项目名称:pikki-virus,代码行数:33,代码来源:crossvalidate.py


注:本文中的sklearn.ensemble.forest.RandomForestClassifier.predict_proba方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。