当前位置: 首页>>代码示例>>Python>>正文


Python RandomForestClassifier.predict_proba方法代码示例

本文整理汇总了Python中sklearn.ensemble.RandomForestClassifier.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestClassifier.predict_proba方法的具体用法?Python RandomForestClassifier.predict_proba怎么用?Python RandomForestClassifier.predict_proba使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.RandomForestClassifier的用法示例。


在下文中一共展示了RandomForestClassifier.predict_proba方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: learning_curve

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def learning_curve():
    n = 50000
    nsteps = 10
    full = cu.get_sample_data_frame(n)
    data = full.ix[0 : int(n * 0.6) - 1].reset_index()
    cval = full.ix[int(n * 0.6) : int(n * 0.8) - 1].reset_index()
    test = full.ix[int(n * 0.8) : n - 1].reset_index()
    step = len(data) / nsteps
    ndata = len(data)
    mvec = range(step, ndata + step, step)
    test_features = features.extract_features(test)
    data_error = []
    cval_error = []
    for i in range(len(mvec)):
        m = mvec[i]
        print "running for size", m
        train = data.ix[0 : m - 1].reset_index()
        fea = features.extract_features(train)
        rf = RandomForestClassifier(n_estimators=50, verbose=0, compute_importances=False, n_jobs=5)
        rf.fit(fea, train["OpenStatus"])
        new_priors = cu.load_priors("train.csv")
        old_priors = cu.compute_priors(train.OpenStatus)
        # predict train
        probs = rf.predict_proba(fea)
        # probs = cu.cap_and_update_priors(old_priors, probs, new_priors, 0.001)
        y_true = compute_y_true(train)
        score = multiclass_log_loss(y_true, probs)
        data_error.append(score)
        # predict cval
        probs = rf.predict_proba(test_features)
        # probs = cu.cap_and_update_priors(old_priors, probs, new_priors, 0.001)
        y_true = compute_y_true(test)
        score = multiclass_log_loss(y_true, probs)
        cval_error.append(score)
    return mvec, data_error, cval_error
开发者ID:coreyabshire,项目名称:stacko,代码行数:37,代码来源:lab.py

示例2: rforests

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def rforests(trainx, trainy, test, n_estimators=100, k=5):
	trainy = np.ravel(trainy)

	forest = RandomForestClassifier(n_estimators)
	forest.fit(trainx, trainy)


	prob_train = forest.predict_proba(trainx)
	prob_test = forest.predict_proba(test)

	# Since the index is the number of the country that's been chosen
	# we can use these with argsort to get the maximum 5., we will have to do this
	# for the entire matrix though.
	sort_train = np.argsort(prob_train)[:,-k:]
	sort_test = np.argsort(prob_test)[:,-k:]

	# Now we need to transform these back to countries, but to map I need to
	# have a dataframe.
	col_names = []

	for i in range(k):
		name = "country_destination_" + str(i+1)
		col_names.append(name)

	pred_train = pd.DataFrame(sort_train, columns=col_names)
	pred_test = pd.DataFrame(sort_test, columns=col_names)

	for name in col_names:
		pred_train[name] = pred_train[name].map(dicts.country)
		pred_test[name] = pred_test[name].map(dicts.country)

	pred_train = np.fliplr(pred_train)
	pred_test = np.fliplr(pred_test)

	return forest, pred_train, pred_test
开发者ID:oew1v07,项目名称:kaggle_playaround,代码行数:37,代码来源:forests.py

示例3: main

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def main(job_id, params):
    print job_id, params
    params = get_params(params)
    print job_id, params

    crimes = np.load(DATA_FILE)

    model = RandomForestClassifier(n_estimators=params['n_estimators'],
                                   criterion=params['criterion'],
                                   max_depth=None if params['max_depth'] < 1 else params['max_depth'],
                                   min_samples_split=params['min_samples_split'],
                                   min_samples_leaf=params['min_samples_leaf'],
                                   max_features=params['max_features'],
                                   min_weight_fraction_leaf=0.0,
                                   max_leaf_nodes=None, bootstrap=True, oob_score=False, n_jobs=4,
                                   random_state=42, verbose=0, warm_start=False, class_weight=None)
    model.fit(crimes['features_train'], crimes['labels_train'])
    loss_train = log_loss(crimes['labels_train'], model.predict_proba(crimes['features_train']))
    loss_val = log_loss(crimes['labels_val'], model.predict_proba(crimes['features_val']))
    loss_all = log_loss(crimes['labels'], model.predict_proba(crimes['features']))
    print 'loss_all: ', loss_all
    print 'loss_train: ', loss_train
    print 'loss_val: ', loss_val

    return loss_val
开发者ID:ManasMahanta,项目名称:misc,代码行数:27,代码来源:crimes_job.py

示例4: MyRandomForest

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
class MyRandomForest(MyClassifier):
    def __init__(self, params=dict()):
        self._params = params
        self._rf = RandomForestClassifier(**(self._params))

    def update_params(self, updates):
        self._params.update(updates)
        self._rf = RandomForestClassifier(**(self._params))

    def fit(self, Xtrain, ytrain):
        self._rf.fit(Xtrain, ytrain)

    # def predict(self, Xtest, option = None):
    #   return self._extree.predict(Xtest)

    def predict_proba(self, Xtest, option = None):
        return self._rf.predict_proba(Xtest)[:, 1]

    def predict_proba_multi(self, Xtest, option = None):
        return self._rf.predict_proba(Xtest)

    def plt_feature_importance(self, fname_list, f_range = list()):
        importances = self._rf.feature_importances_

        std = np.std([tree.feature_importances_ for tree in self._rf.estimators_], axis=0)
        indices = np.argsort(importances)[::-1]

        fname_array = np.array(fname_list)

        if not f_range:
            f_range = range(indices.shape[0])

        n_f = len(f_range)

        plt.figure()
        plt.title("Random Forest Feature importances")
        plt.barh(range(n_f), importances[indices[f_range]],
               color="b", xerr=std[indices[f_range]], ecolor='k',align="center")
        plt.yticks(range(n_f), fname_array[indices[f_range]])
        plt.ylim([-1, n_f])
        plt.show()


    def list_feature_importance(self, fname_list, f_range = list(), return_list = False):
        importances = self._rf.feature_importances_
        indices = np.argsort(importances)[::-1]

        print 'Random forest feature ranking:'

        if not f_range :
            f_range = range(indices.shape[0])

        n_f = len(f_range)

        for i in range(n_f):
            f = f_range[i]
            print '{0:d}. feature[{1:d}]  {2:s}  ({3:f})'.format(f + 1, indices[f], fname_list[indices[f]], importances[indices[f]])

        if return_list:
            return [indices[f_range[i]] for i in range(n_f)]
开发者ID:tonyzhangrt,项目名称:wklearn,代码行数:62,代码来源:learner.py

示例5: clfTestProb

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def clfTestProb(data, top, morph, runs):
    isOrgan = []
   
    
    for i in range (0,runs):
    # take a sample for training, leave the rest for testing (cross-validation)
        data_train, data_test, top_train, top_test, morph_train, morph_test = train_test_split(data,top, morph)
        clf = RandomForestClassifier(n_estimators=10, verbose=2)
       
        clf = clf.fit(data_train, top_train)
        top_class = clf.classes_
        clf_predict_top = clf.predict_proba(data_test)
        
        clf = clf.fit(data_train, morph_train)
        clf_predict_morph = clf.predict_proba(data_test)
        morph_class = clf.classes_
    strTop = []
    strMorph = []
    
    
    for prob in clf_predict_top:
        i = prob.tolist().index(max(prob))
        strTop.append( '{:.2f}'.format(max(prob)) + " " + str(top_class[i]))
    for prob in clf_predict_morph:
        i = prob.tolist().index(max(prob))
        strMorph.append( '{:.2f}'.format(max(prob)) + " " + str(morph_class[i]))
        
    for i,x in enumerate(strTop):
        print(x + " " + strMorph[i])
开发者ID:rn1n3r,项目名称:TP53-ML,代码行数:31,代码来源:learndata.py

示例6: train_predict

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def train_predict(train_file, test_file, predict_valid_file, predict_test_file,
                  n_est, depth, n_fold=5):

    logging.basicConfig(format='%(asctime)s   %(levelname)s   %(message)s',
                        level=logging.DEBUG, filename='rf_{}_{}.log'.format(
                                                        n_est, depth
                                                       ))

    logging.info('Loading training and test data...')
    X, y = load_svmlight_file(train_file)
    X_tst, _ = load_svmlight_file(test_file)

    clf = RF(n_estimators=n_est, max_depth=depth, random_state=2015)

    cv = StratifiedKFold(y, n_folds=n_fold, shuffle=True, random_state=2015)

    logging.info('Cross validation...')
    p_val = np.zeros_like(y)
    lloss = 0.
    for i_trn, i_val in cv:
        clf.fit(X[i_trn], y[i_trn])
        p_val[i_val] = clf.predict_proba(X[i_val])[:, 1]
        lloss += log_loss(y[i_val], p_val[i_val])

    logging.info('Log Loss = {:.4f}'.format(lloss))

    logging.info('Retraining with 100% data...')
    clf.fit(X.todense(), y)
    p_tst = clf.predict_proba(X_tst.todense())[:, 1]

    logging.info('Saving predictions...')
    np.savetxt(predict_valid_file, p_val, fmt='%.6f')
    np.savetxt(predict_test_file, p_tst, fmt='%.6f')
开发者ID:drivendata,项目名称:countable-care-3rd-place,代码行数:35,代码来源:train_predict_rf.py

示例7: main

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def main():
    
    train_f = pd.read_csv(train_path, header=0, parse_dates=['Dates'])
    print train_f.dtypes

    X, Y = get_feature(train_f, "training_set")
    

    ### TRAINING
    # clf = GradientBoostingClassifier(n_estimators=50)
    clf = RandomForestClassifier(n_estimators=200)
    # clf = LogisticRegression(n_jobs=4)

    X, Y = shuffle_XY(X, Y)
    data_len = len(X)
    train_len = data_len * 95 / 100 
    val_len = data_len - train_len
    X_train = X[:train_len]
    X_val = X[train_len:]
    Y_train = Y[:train_len]
    Y_val = Y[train_len:]
    
    clf = clf.fit(X_train, Y_train)
    print "Training done"

    # train_acc = clf.score(X_train, Y_train)
    # print "Train acc:", train_acc
    
    val_acc = clf.score(X_val, Y_val)
    print "Val acc:", val_acc

    val_pred = clf.predict_proba(X_val)
    val_log = 0.0
    cnt = 0
    for y in Y_val:
        val_log += math.log(val_pred[cnt, y]+0.0000001)
        cnt += 1
    val_log =  - val_log / len(Y_val)
    print "Val log loss:", val_log

    # print max(Y_val), min(Y_val)
    # print Y_val, Y_val + 1

    # print "Val loss:", log_loss(Y_val+1, val_pred) # Note the +1 here!
    """
    # scores = cross_val_score(clf, X, Y)
    # print "Cross val acc:", scores.mean()
    """

    ### Testing

    test_f = pd.read_csv(test_path, header=0, parse_dates=['Dates'])
    # print test_f.dtypes

    X_test, _ = get_feature(test_f, "test_set")
    Y_test = clf.predict_proba(X_test)

    ### Write results
    # write_results(Y_test)
    write_results_prob(Y_test)
开发者ID:ruoyanwang,项目名称:datasci,代码行数:62,代码来源:random_forest.py

示例8: tree_trans

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def tree_trans(trainer, labels, test_w, test_g):
    from sklearn import tree 
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier

    #clf = DecisionTreeClassifier(max_depth=3)
    clf = RandomForestClassifier(n_estimators=700,max_depth=6, min_samples_split=10, min_samples_leaf=10)
    ts = time.time()
    clf.fit(trainer, labels)
    print 'Trees training: ', (time.time() - ts)

    ts = time.time()
    prob_w = np.zeros(test_w.shape[0], dtype=[('Forest Output Prob', 'f8')])
    prob_g = np.zeros(test_w.shape[0], dtype=[('Forest Output Prob', 'f8')])
    print prob_w.shape
    print test_w.shape
    print clf.predict_proba(test_w).shape
    print clf.predict_proba(test_w)[:,0].shape
    prob_w['Forest Output Prob'] = clf.predict_proba(test_w)[:,0]
    prob_g['Forest Output Prob'] = clf.predict_proba(test_g)[:,0]
    print 'Trees transforming: ', (time.time() - ts)
    return prob_w, prob_g

    h_w, bin_edges = np.histogram(prob_w, 20, (0,1))
    h_g, bin_edges = np.histogram(prob_g, 20, (0,1))
    bin_centers = (bin_edges[0:-1] + bin_edges[1:])/2

    fig = plt.figure()
    ebkw = {'linewidth':1,}
    ax = fig.add_subplot(111)
    ax.errorbar(bin_centers, h_w, np.sqrt(h_w),label=w_label  ,color='g', **ebkw)
    ax.errorbar(bin_centers, h_g, np.sqrt(h_g),label=g_label ,color='b', **ebkw)
    ax.set_xlabel('Decision Tree Ouput Prob', size='x-large')
    ax.set_ylabel('Occupancy', size='x-large')
    plt.legend()
开发者ID:andrewjohnlowe,项目名称:JetImages,代码行数:37,代码来源:pca.py

示例9: RFC_Classifier

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def RFC_Classifier(X_train, X_cv, X_test, Y_train,Y_cv,Y_test, Actual_DS):
    print("***************Starting Random Forest Classifier***************")
    t0 = time()
    clf = RandomForestClassifier(n_estimators=500,n_jobs=1)
    clf.fit(X_train, Y_train)
    preds = clf.predict(X_cv)
    score = clf.score(X_cv,Y_cv)

    print("Random Forest Classifier - {0:.2f}%".format(100 * score))
    Summary = pd.crosstab(label_enc.inverse_transform(Y_cv), label_enc.inverse_transform(preds),
                      rownames=['actual'], colnames=['preds'])
    Summary['pct'] = (Summary.divide(Summary.sum(axis=1), axis=1)).max(axis=1)*100
    print(Summary)

    #Check with log loss function
    epsilon = 1e-15
    #ll_output = log_loss_func(Y_cv, preds, epsilon)
    preds2 = clf.predict_proba(X_cv)
    ll_output2= log_loss(Y_cv, preds2, eps=1e-15, normalize=True)
    print(ll_output2)
    print("done in %0.3fs" % (time() - t0))

    preds3 = clf.predict_proba(X_test)

    print("x_test done")
    #preds4 = clf.predict_proba((Actual_DS.ix[:,'feat_1':]))
    preds4 = clf.predict_proba(Actual_DS)

    print("***************Ending Random Forest Classifier***************")
    return pd.DataFrame(preds2) , pd.DataFrame(preds3),pd.DataFrame(preds4)
开发者ID:roshankr,项目名称:DS_Competition,代码行数:32,代码来源:Otto_Classification.py

示例10: rf_grid_search

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def rf_grid_search():

	train_inp,valid_inp,train_target,valid_target = prepare_input()
	#set up scorer for grid search. log-loss is error, not score, so set greater_is_better to false,
	#and log-loss requires a probability
	log_loss_scorer = make_scorer(log_loss,greater_is_better=False,needs_proba=True)

	train_inp = train_inp[:100000]
	train_target = train_target[:100000]

	start = time.time()
	random_forest = RandomForestClassifier(random_state=31)
	# r_forest_parameters = {'n_estimators' : [120,300,500,800,1200],'max_depth':[5,8,15,25,30,None],'max_features':['log2','sqrt',None],
	# 'min_samples_split':[1,2,5,10,15,100],'min_samples_leaf':[1,2,5,10]}
	
	#75.1 minutes to run with these paramters - 72 fits

	r_forest_parameters = {'min_samples_split':[2,5,10,20,50,100],'min_samples_leaf':[1,2,5,10,50,100]}
	#grid search too slow to not use all cores, and wayyyy too slow to have no output.
	r_forest_grid_obj = GridSearchCV(random_forest,r_forest_parameters,log_loss_scorer,verbose=2,n_jobs=-1)
	r_forest_grid_obj = r_forest_grid_obj.fit(train_inp,train_target)
	random_forest = r_forest_grid_obj.best_estimator_
	print "Best params: " + str(r_forest_grid_obj.best_params_)	
	random_forest_train_error = log_loss(train_target,random_forest.predict_proba(train_inp))
	random_forest_validation_error = log_loss(valid_target,random_forest.predict_proba(valid_inp))
	print "Best random forest training error: {:02.4f}".format(random_forest_train_error)
	print "Best random forest validation error: {:02.4f}".format(random_forest_validation_error)
	end = time.time()
	print "RF grid search took {:02.4f} seconds".format(end-start)

	return random_forest
开发者ID:btaborsky,项目名称:red-hat-kaggle,代码行数:33,代码来源:red_hat.py

示例11: RandomForrestClassifierStep

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
class RandomForrestClassifierStep(SklearnStep):
    def __init__(self,
                 n_estimators=10,
                 criterion='gini',
                 max_features='auto',
                 max_depth=None):
        super(RandomForrestClassifierStep, self).__init__()
        self._n_estimator=n_estimators
        self._criterion = criterion
        self._max_features= max_features
        self._max_depth = max_depth

    def fit_transform(self):
        self._model = RandomForestClassifier(n_estimators=self._n_estimator,
                                             criterion=self._criterion,
                                             max_depth=self._max_depth,
                                             max_features=self._max_features)
        x, y = load_svmlight(self._input_path)
        self._model.fit(x, y)
        scores = self._model.predict_proba(x)
        save_numpy_txt(scores, self._output_path)

    def transform(self, x=None):
        if not x:
            x, _ = load_svmlight(self._test_input_path)
            transformed_x = self._model.predict_proba(x)
            save_numpy_txt(transformed_x, self._output_path)
        else:
            transformed_x = self._model.predict_proba(x)
            return transformed_x

    def predict(self, features):
        return self._model.predict_proba(features)
开发者ID:myungchoi,项目名称:client-py,代码行数:35,代码来源:classifier.py

示例12: rf_fit

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def rf_fit():

	train_inp,valid_inp,train_target,valid_target = prepare_input()

	rf = RandomForestClassifier(random_state=31,n_jobs=-1,verbose=1,n_estimators=100,min_samples_split=5)
	start = time.time()

	rf.fit(train_inp,train_target)

	end = time.time()
	print "fitting took {:0.4} seconds".format(end-start)

	training_output = rf.predict_proba(train_inp)
	validation_output = rf.predict_proba(valid_inp)

	training_error = log_loss(train_target,training_output)
	validation_error = log_loss(valid_target,validation_output)

	print "Train error: {:02.4f}".format(training_error)
	print "Validation error: {:02.4f}".format(validation_error)


	joblib.dump(rf,rf_filename)


	return rf
开发者ID:btaborsky,项目名称:red-hat-kaggle,代码行数:28,代码来源:red_hat.py

示例13: modelSelection

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def modelSelection(x_train, y_train, x_test, y_test, model, n_folds):
    """
    Select various models and return the AUCs of training and test sets and predicted offer acceptance probabilities.
    """
    if model == "Random Forest":
        clf = RandomForestClassifier(n_estimators=150, oob_score=True, random_state=0, min_samples_split=1)
    elif model == "Logistic Regression L1":
        clf = LogisticRegression(penalty='l1', random_state=0, class_weight='auto')
    elif model == "Logistic Regression L2":
        clf = LogisticRegression(penalty='l2', random_state=0, class_weight='auto')
    elif model == "Decision Tree":
        clf = DecisionTreeClassifier(random_state=0)
    elif model == "Naive Bayes":
        clf = GaussianNB()
    elif model == "KNN":
        clf = KNeighborsClassifier(n_neighbors=10)
    # Perform cross-validation on training dataset and calculate AUC
    cv = StratifiedKFold(y_train, n_folds=n_folds)
    auc_train = []
    auc_validation = []
    auc_test = []
    pred_prob = []
    for i, (train, validation) in enumerate(cv):
        clf = clf.fit(x_train[train], y_train[train])
        auc_train.append(metrics.roc_auc_score(y_train[train], clf.predict_proba(x_train[train])[:, 1]))
        auc_validation.append(metrics.roc_auc_score(y_train[validation], clf.predict_proba(x_train[validation])[:, 1]))
        auc_test.append(metrics.roc_auc_score(y_test, clf.predict_proba(x_test)[:, 1]))
        pred_prob.append(clf.predict_proba(x_test)[:, 1])
    return np.mean(auc_train), np.mean(auc_validation), np.mean(auc_test), np.mean(pred_prob, axis=0)
开发者ID:katherinez22,项目名称:projects,代码行数:31,代码来源:Part1.py

示例14: classifiers_accuracy

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def classifiers_accuracy():
    path = os.path.join(root_dir, "generated")
    td = np.load(os.path.join(path, "training_data", "training_data.npy"))

    # ----------- Data set separation ----------- #
    # whole set
    rec_len = len(td[:, :][0]) - 1  # in index (starting from 0)
    data_x = td[:, :rec_len]
    data_y = td[:, rec_len]

    # training set
    x = data_x[:-TRAINING_SIZE]
    y = data_y[:-TRAINING_SIZE]

    # testing set
    t_x = data_x[-TRAINING_SIZE:]
    t_y = data_y[-TRAINING_SIZE:]

    # ----------- Fix Class Imbalance ----------- #
    OS = OverSampler(random_state=1)
    ox, oy = OS.fit_transform(x, y)

    smote = SMOTE(random_state=1)
    sx, sy = smote.fit_transform(x, y)

    bsmote1 = bSMOTE1(random_state=1)
    bsx1, bsy1 = bsmote1.fit_transform(x, y)

    # ----------- Train and Predict  ----------- #
    # predict() will just say whether it's a attack,
    # but predict_proba() will say the probability (this is important)

    # Over Sampler data
    clf_1 = RandomForestClassifier(n_estimators=100, n_jobs=2)
    clf_1.fit(ox, oy)
    p_1 = clf_1.predict_proba(t_x)
    # print("p_1 : ", p_1)

    # SMOTE data
    clf_2 = RandomForestClassifier(n_estimators=100, n_jobs=2)
    clf_2.fit(sx, sy)
    p_2 = clf_2.predict_proba(t_x)
    # print("p_2: ", p_2)

    # BSMOTE data
    clf_3 = RandomForestClassifier(n_estimators=100, n_jobs=2)
    clf_3.fit(bsx1, bsy1)
    p_3 = clf_3.predict_proba(t_x)
    # print("p_3 : ", p_3)

    print "{0} \t{1} \t\t{2} \t{3}".format("actual", "os", "smote", "bsmote")
    for i in range(0, TRAINING_SIZE):
        a = t_y[i]
        o = p_1[i][1]
        s = p_2[i][1]
        b = p_3[i][1]
        if a != 0. or o != 0. or s != 0. or b != 0.:
            print "{0} \t{1} \t{2} \t{3}".format(a, o, s, b)
        pass
开发者ID:grainier,项目名称:heimdall,代码行数:61,代码来源:classifiers_accuracy.py

示例15: main

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import predict_proba [as 别名]
def main(argv=None):
    iris = load_iris()
    rf = RandomForestClassifier(max_depth = 4)
    idx = range(len(iris.target))
    np.random.shuffle(idx)
    rf.fit(iris.data[idx][:100], iris.target[idx][:100])
    
    instance = iris.data[idx][100:101]
    print rf.predict_proba(instance)
开发者ID:guo-xuan,项目名称:SiprosScripts,代码行数:11,代码来源:IterativeRandomForest.py


注:本文中的sklearn.ensemble.RandomForestClassifier.predict_proba方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。