本文整理汇总了Python中sklearn.ensemble.ExtraTreesClassifier.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python ExtraTreesClassifier.predict_proba方法的具体用法?Python ExtraTreesClassifier.predict_proba怎么用?Python ExtraTreesClassifier.predict_proba使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.ExtraTreesClassifier
的用法示例。
在下文中一共展示了ExtraTreesClassifier.predict_proba方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: eval_param
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import predict_proba [as 别名]
def eval_param(params):
"""Evaluation of one set of xgboost's params.
Then, use 3 folds as training and cv in a row as xgboost's watchlist with an early_stop at 50.
"""
global df_results, train, target, test
print ("Training with params : ")
print (params)
random_state = 42
avg_score = 0.
n_folds = 3
predict = np.zeros(test.shape[0])
#dtest = xgb.DMatrix(test)
skf = StratifiedKFold(target, n_folds=n_folds, random_state=random_state)
for train_index, cv_index in skf:
# train
x_train, x_cv = train[train_index], train[cv_index]
y_train, y_cv = target[train_index], target[cv_index]
clf = ExtraTreesClassifier(**params).fit(x_train, y_train)
#bst = xgb.train(params, dtrain, num_round, watchlist, early_stopping_rounds=early_stopping_rounds, maximize=True)
# test / score
predict_cv = clf.predict_proba(x_cv, y_cv)#bst.predict(dvalid, ntree_limit=bst.best_iteration)
avg_score += -log_loss(y_cv, predict_cv)
predict += clf.predict_proba(test)#bst.predict(dtest, ntree_limit=bst.best_iteration)
predict /= n_folds
avg_score /= n_folds
# store
new_row = pd.DataFrame([np.append([avg_score], list(params.values()))],
columns=np.append(['score'], list(params.keys())))
df_results = df_results.append(new_row, ignore_index=True)
np.savetxt('hyperopt_preds/pred' + str(df_results.index.max()) + '.txt', predict, fmt='%s')
df_results.to_csv('hyperopt_results_sgd.csv')
print ("\tScore {0}\n\n".format(avg_score))
return {'loss': - avg_score, 'status': STATUS_OK}
示例2: MyExtraTree
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import predict_proba [as 别名]
class MyExtraTree(MyClassifier):
def __init__(self, params=dict()):
self._params = params
self._extree = ExtraTreesClassifier(**(self._params))
def update_params(self, updates):
self._params.update(updates)
self._extree = ExtraTreesClassifier(**(self._params))
def fit(self, Xtrain, ytrain):
self._extree.fit(Xtrain, ytrain)
# def predict(self, Xtest, option = None):
# return self._extree.predict(Xtest)
def predict_proba(self, Xtest, option = None):
return self._extree.predict_proba(Xtest)[:, 1]
def predict_proba_multi(self, Xtest, option = None):
return self._extree.predict_proba(Xtest)
def plt_feature_importance(self, fname_list, f_range = list()):
importances = self._extree.feature_importances_
std = np.std([tree.feature_importances_ for tree in self._extree.estimators_], axis=0)
indices = np.argsort(importances)[::-1]
fname_array = np.array(fname_list)
if not f_range:
f_range = range(indices.shape[0])
n_f = len(f_range)
plt.figure()
plt.title("Extra Tree Feature importances")
plt.barh(range(n_f), importances[indices[f_range]],
color="b", xerr=std[indices[f_range]], ecolor='k',align="center")
plt.yticks(range(n_f), fname_array[indices[f_range]])
plt.ylim([-1, n_f])
plt.show()
def list_feature_importance(self, fname_list, f_range = list(), return_list = False):
importances = self._extree.feature_importances_
indices = np.argsort(importances)[::-1]
print 'Extra tree feature ranking:'
if not f_range :
f_range = range(indices.shape[0])
n_f = len(f_range)
for i in range(n_f):
f = f_range[i]
print '{0:d}. feature[{1:d}] {2:s} ({3:f})'.format(f + 1, indices[f], fname_list[indices[f]], importances[indices[f]])
if return_list:
return [indices[f_range[i]] for i in range(n_f)]
示例3: ERFC_Classifier
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import predict_proba [as 别名]
def ERFC_Classifier(X_train, X_cv, X_test, Y_train,Y_cv,Y_test, Actual_DS):
print("***************Starting Extreme Random Forest Classifier***************")
t0 = time()
clf = ExtraTreesClassifier(n_estimators=100,n_jobs=-1)
clf.fit(X_train, Y_train)
preds = clf.predict(X_cv)
score = clf.score(X_cv,Y_cv)
print("Extreme Random Forest Classifier - {0:.2f}%".format(100 * score))
Summary = pd.crosstab(label_enc.inverse_transform(Y_cv), label_enc.inverse_transform(preds),
rownames=['actual'], colnames=['preds'])
Summary['pct'] = (Summary.divide(Summary.sum(axis=1), axis=1)).max(axis=1)*100
print(Summary)
#Check with log loss function
epsilon = 1e-15
#ll_output = log_loss_func(Y_cv, preds, epsilon)
preds2 = clf.predict_proba(X_cv)
ll_output2= log_loss(Y_cv, preds2, eps=1e-15, normalize=True)
print(ll_output2)
print("done in %0.3fs" % (time() - t0))
preds3 = clf.predict_proba(X_test)
#preds4 = clf.predict_proba((Actual_DS.ix[:,'feat_1':]))
preds4 = clf.predict_proba(Actual_DS)
print("***************Ending Extreme Random Forest Classifier***************")
return pd.DataFrame(preds2) , pd.DataFrame(preds3),pd.DataFrame(preds4)
示例4: et
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import predict_proba [as 别名]
def et(train_data,train_label,val_data,val_label,test_data,name="extratrees_submission.csv"):
print "start training ExtraTrees..."
etClf = ExtraTreesClassifier(n_estimators=10)
etClf.fit(train_data,train_label)
#evaluate on validation set
val_pred_label = etClf.predict_proba(val_data)
logloss = preprocess.evaluation(val_label,val_pred_label)
print "logloss of validation set:",logloss
print "Start classify test set..."
test_label = etClf.predict_proba(test_data)
preprocess.saveResult(test_label,filename = name)
示例5: et
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import predict_proba [as 别名]
def et(series, n_folds, clfparams, featureparams, aggregateparams, include, exclude,
save_test_predictions, save_oob_predictions, skip_cross_validation, _run):
data = TelstraData(include = include, exclude = exclude, **featureparams)
time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f")
pred_cols = ['predict_{}'.format(i) for i in range(3)]
if skip_cross_validation:
loss = 999.
else:
y = data.get_y()
kf = StratifiedKFold(y.values, n_folds=n_folds, shuffle=True)
pred = pd.DataFrame(0., index = y.index, columns = pred_cols)
i = 1
_run.info['loss'] = []
_run.info['trainloss'] = []
feature_importances_ = 0
for itrain, itest in kf:
Xtr, ytr, Xte, yte = data.get_train_test_features(itrain, itest, **aggregateparams)
clf = ET(**clfparams)
clf.fit(Xtr, ytr)
pred.iloc[itest, :] = clf.predict_proba(Xte)
trainloss = multiclass_log_loss(ytr, clf.predict_proba(Xtr))
_run.info['trainloss'].append(trainloss)
loss = multiclass_log_loss(yte, pred.iloc[itest].values)
_run.info['loss'].append(loss)
if i == 1:
feature_importances_ = clf.feature_importances_/n_folds
else:
feature_importances_ += clf.feature_importances_/n_folds
i += 1
loss = multiclass_log_loss(y, pred.values)
_run.info['features'] = list(Xtr.columns)
_run.info['feature_importances'] = list(feature_importances_)
# Optionally save oob predictions
if save_oob_predictions:
filename = '{}_{}.csv'.format(series, time)
pred.to_csv(filename, index_label='id')
# Optionally generate test predictions
if save_test_predictions:
filename = '{}_test_{}.csv'.format(series, time)
Xtr, ytr, Xte, yte = data.get_train_test_features(**aggregateparams)
clf = ET(**clfparams)
clf.fit(Xtr, ytr)
predtest = pd.DataFrame(clf.predict_proba(Xte),
index = yte.index, columns = pred_cols)
predtest.to_csv(filename, index_label='id')
return loss
示例6: kfold_cv
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import predict_proba [as 别名]
def kfold_cv(X_train, y_train,idx,k):
kf = StratifiedKFold(y_train,n_folds=k)
xx=[]
count=0
for train_index, test_index in kf:
count+=1
X_train_cv, X_test_cv = X_train[train_index,:],X_train[test_index,:]
gc.collect()
y_train_cv, y_test_cv = y_train[train_index],y_train[test_index]
y_pred=np.zeros(X_test_cv.shape[0])
m=0
for j in range(m):
clf=xgb_classifier(eta=0.1,min_child_weight=20,col=0.5,subsample=0.7,depth=5,num_round=200,seed=j*77,gamma=0.1)
y_pred+=clf.train_predict(X_train_cv,(y_train_cv),X_test_cv,y_test=(y_test_cv))
#y_pred/=m;
clf=ExtraTreesClassifier(n_estimators=700,max_features= 50,criterion= 'entropy',min_samples_split= 3,
max_depth= 60, min_samples_leaf= 4,verbose=1,n_jobs=-1)
#clf=RandomForestClassifier(n_jobs=-1,n_estimators=100,max_depth=100)
clf.fit(X_train_cv,(y_train_cv))
y_pred=clf.predict_proba(X_test_cv).T[1]
print y_pred.shape
xx.append(llfun(y_test_cv,(y_pred)))
ypred=y_pred
yreal=y_test_cv
idx=idx[test_index]
print xx[-1]#,y_pred.shape
break
print xx,'average:',np.mean(xx),'std',np.std(xx)
return ypred,yreal,idx#np.mean(xx)
示例7: ef_predictedValue
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import predict_proba [as 别名]
def ef_predictedValue():
print '----------ExtraForest----------'
ef_clf = ExtraTreesClassifier(n_estimators = NoOfEstimators, n_jobs = NoJobs)
ef_clf.fit(train_df[features], train_df['SeriousDlqin2yrs'])
ef_predictedValue = ef_clf.predict_proba(test_df[features])
print 'Feature Importance = %s' % ef_clf.feature_importances_
return ef_predictedValue[:,1]
示例8: main
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import predict_proba [as 别名]
def main():
start = time.time()
print("Reading the data from " + train_file)
data = cu.get_dataframe(train_file)
print("Extracting features")
fea = features.extract_features(feature_names, data)
print("Training the model")
clf = ExtraTreesClassifier(n_estimators=trees_count, max_features=len(feature_names), max_depth=None, min_samples_split=1, compute_importances=True, bootstrap=False, random_state=0, n_jobs=-1, verbose=2)
clf.fit(fea, data["OpenStatus"])
print "Listing feature importances:"
cu.list_feature_importance(clf,feature_names)
print("Reading test file and making predictions: " + test_file)
data = cu.get_dataframe(test_file)
test_features = features.extract_features(feature_names, data)
probs = clf.predict_proba(test_features)
if (update_posteriors):
print("Calculating priors and updating posteriors")
new_priors = cu.get_priors(full_train_file)
old_priors = cu.get_priors(train_file)
probs = cu.cap_and_update_priors(old_priors, probs, new_priors, 0.001)
print("Saving submission to %s" % submission_file)
cu.write_submission(submission_file, probs)
finish = time.time()
print "completed in %0.4f seconds" % (finish-start)
示例9: train_classifier
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import predict_proba [as 别名]
def train_classifier(prefix='atx', nside=32, ds=4, color_thresh=30, test_size=0.5):
X_img,y=load_labeled(prefix=prefix,nside=nside,quick=False)
if prefix=='atx': color_name='pool'
colors = get_colors(name=color_name, quick=True)
print '...getting features...'
X = get_features(X_img, colors, ds=ds, thresh=color_thresh)
print '...done getting features...'
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier
from sklearn.cross_validation import train_test_split
from sklearn import metrics
rf = ExtraTreesClassifier(n_estimators=200, n_jobs=6, max_features=0.02)
X_train, X_test, y_train, y_test, img_train, img_test = train_test_split(X,y,X_img,test_size=0.5)
print '...fitting...'
rf.fit(X_train, y_train)
y_proba = rf.predict_proba(X_test)[:,1]
fpr, tpr, thresholds = metrics.roc_curve(y_test, y_proba)
auc = metrics.auc(fpr, tpr)
pl.clf(); pl.plot(fpr, tpr, 'b-o')
pl.plot(fpr, fpr/np.mean(y), 'r--'); pl.ylim(0,1); pl.xlim(0,1)
pl.title('AUC: %0.3f'%auc)
for i,th in enumerate(thresholds): print th,tpr[i],tpr[i]/fpr[i]
prob_thresh=0.6
wh_missed=np.where((y_proba<prob_thresh)&(y_test==1))[0]
wh_ok=np.where((y_proba>prob_thresh)&(y_test==1))[0]
示例10: ExtraTreeModel
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import predict_proba [as 别名]
class ExtraTreeModel(BaseModel):
def __init__(self, model_params):
super(BaseModel, self).__init__()
self.model = ExtraTreesClassifier(**model_params)
def fit(self, data, dep_var_name=None):
if dep_var_name is None:
sys.exit('dep_var_name is needed for fit function.')
else:
self.dep_var_name = dep_var_name
tmp_data = data.copy()
data_label = tmp_data[self.dep_var_name].values
tmp_data.drop(self.dep_var_name, axis=1, inplace=True)
self.model.fit(tmp_data, data_label)
def predict(self, data):
if self.dep_var_name in data.columns:
tmp_data = data.copy()
tmp_data.drop(self.dep_var_name, axis=1, inplace=True)
else:
tmp_data = data
scores = self.model.predict_proba(tmp_data)
## scores is a numpy array without index
result = pd.Series(scores[:, 1], index=tmp_data.index)
return result
示例11: test_multioutput
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import predict_proba [as 别名]
def test_multioutput():
"""Check estimators on multi-output problems."""
olderr = np.seterr(divide="ignore")
X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [-2, 1], [-1, 1], [-1, 2], [2, -1], [1, -1], [1, -2]]
y = [[-1, 0], [-1, 0], [-1, 0], [1, 1], [1, 1], [1, 1], [-1, 2], [-1, 2], [-1, 2], [1, 3], [1, 3], [1, 3]]
T = [[-1, -1], [1, 1], [-1, 1], [1, -1]]
y_true = [[-1, 0], [1, 1], [-1, 2], [1, 3]]
# toy classification problem
clf = ExtraTreesClassifier(random_state=0)
y_hat = clf.fit(X, y).predict(T)
assert_array_equal(y_hat, y_true)
assert_equal(y_hat.shape, (4, 2))
proba = clf.predict_proba(T)
assert_equal(len(proba), 2)
assert_equal(proba[0].shape, (4, 2))
assert_equal(proba[1].shape, (4, 4))
log_proba = clf.predict_log_proba(T)
assert_equal(len(log_proba), 2)
assert_equal(log_proba[0].shape, (4, 2))
assert_equal(log_proba[1].shape, (4, 4))
# toy regression problem
clf = ExtraTreesRegressor(random_state=5)
y_hat = clf.fit(X, y).predict(T)
assert_almost_equal(y_hat, y_true)
assert_equal(y_hat.shape, (4, 2))
np.seterr(**olderr)
示例12: eval_seq_model
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import predict_proba [as 别名]
def eval_seq_model(out_file='eval_model.csv',window_shift=1, retrain=False):
filename = 'cache/joblib/rf_eval_model.joblib.pkl'
file_names=['training1', 'training3', 'training4',
'validation1_lab', 'validation3_lab']
if retrain:
X, y = aggregated_skeletion(file_names=file_names,
agg_functions=['median', 'var', 'min', 'max'])
X = X.fillna(0)
y = np.array([gesture_to_id[gest] for gest in y])
clf = ExtraTreesClassifier(n_estimators=500, random_state=0,
n_jobs=-1)
clf.fit(X, y)
_ = joblib.dump(clf, filename, compress=9)
else:
clf = joblib.load(filename)
X_win = aggregated_skeletion_win(['validation2_lab', 'training2'],
agg_functions=['median', 'var', 'min', 'max'],
window_shift=window_shift)
y_pred = clf.predict_proba(X_win)
df_pred = DataFrame(y_pred, index=[s for (s, _) in X_win.index])
to_dump = df_pred.groupby(level=0).apply(postprocess)
dump_predictions(to_dump, out_path=out_file)
return df_pred, to_dump
示例13: movement_interval
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import predict_proba [as 别名]
def movement_interval(train_on=['training1','training2', 'training3', 'training4'],
predict_on=['validation1_lab', 'validation2_lab', 'validation3_lab']):
window_shift = 5
window_length = 40
print 'aggregated_skeletion_win'
X_win = aggregated_skeletion_win(predict_on,
agg_functions=['median', 'var', 'min', 'max'],
window_shift=window_shift, window_length=window_length)
X_win= X_win.fillna(0)
print 'train rf model'
X, y = aggregated_skeletion(file_names=train_on,
agg_functions=['median', 'var', 'min', 'max'])
X = X.fillna(0)
y = np.array([gesture_to_id[gest] for gest in y])
clf = ExtraTreesClassifier(n_estimators=1500, random_state=0,
n_jobs=-1)
clf.fit(X, y)
del X
del y
print 'rf predict'
y_pred = clf.predict_proba(X_win)
df_out = pd.concat([DataFrame.from_records(X_win.index.values.tolist(),
columns=['sample_id', 'frame']), DataFrame(y_pred)], axis=1)
df_out['movement'] = np.array(np.argmax(y_pred, axis=1) != 0,
dtype=int)
# adjust for sliding window size
df_out.frame = df_out.frame + 20
return df_out
示例14: _cascade_layer
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import predict_proba [as 别名]
def _cascade_layer(self, X, y=None, layer=0):
n_tree = getattr(self, 'n_cascadeRFtree')
n_cascadeRF = getattr(self, 'n_cascadeRF')
min_samples = getattr(self, 'min_samples_cascade')
prf = RandomForestClassifier(
n_estimators=100, max_features=8,
bootstrap=True, criterion="entropy", min_samples_split=20,
max_depth=None, class_weight='balanced', oob_score=True)
crf = ExtraTreesClassifier(
n_estimators=100, max_depth=None,
bootstrap=True, oob_score=True)
prf_pred = []
if y is not None:
# print('Adding/Training Layer, n_layer={}'.format(self.n_layer))
for irf in range(n_cascadeRF):
prf.fit(X, y)
crf.fit(X, y)
setattr(self, '_casprf{}_{}'.format(self.n_layer, irf), prf)
setattr(self, '_cascrf{}_{}'.format(self.n_layer, irf), crf)
probas = prf.oob_decision_function_
probas += crf.oob_decision_function_
prf_pred.append(probas)
elif y is None:
for irf in range(n_cascadeRF):
prf = getattr(self, '_casprf{}_{}'.format(layer, irf))
crf = getattr(self, '_cascrf{}_{}'.format(layer, irf))
probas = prf.predict_proba(X)
probas += crf.predict_proba(X)
prf_pred.append(probas)
return prf_pred
示例15: eval_gesture_model
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import predict_proba [as 别名]
def eval_gesture_model(retrain=False, window_shift=1, window_length=40,
train_on=['training1', 'training3', 'training4',
'validation1_lab', 'validation3_lab'],
predict_on=['validation2_lab', 'training2']):
filename = 'cache/joblib/rf_eval_model' + str(window_length) + '.joblib.pkl'
#file_names=['training1', 'training3', 'training4',
# 'validation1_lab', 'validation3_lab']
if retrain:
X, y = aggregated_skeletion(file_names=train_on,
agg_functions=['median', 'var', 'min', 'max'],
window_length=window_length)
X = X.fillna(0)
y = np.array([gesture_to_id[gest] for gest in y])
clf = ExtraTreesClassifier(n_estimators=500, random_state=0,
n_jobs=-1)
clf.fit(X, y)
_ = joblib.dump(clf, filename, compress=9)
else:
clf = joblib.load(filename)
X_test, y_test = aggregated_skeletion(predict_on,
agg_functions=['median', 'var', 'min', 'max'],
window_length=window_length)
X_test = X_test.fillna(0)
y_test = np.array([gesture_to_id[gest] for gest in y_test])
y_pred = clf.predict_proba(X_test)
return y_pred, y_test