本文整理汇总了Python中sklearn.ensemble.GradientBoostingClassifier.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingClassifier.predict_proba方法的具体用法?Python GradientBoostingClassifier.predict_proba怎么用?Python GradientBoostingClassifier.predict_proba使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.GradientBoostingClassifier
的用法示例。
在下文中一共展示了GradientBoostingClassifier.predict_proba方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ensembleGBM
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def ensembleGBM(derived_data_path, X_train, Y_train, X_test, seed=60):
random.seed(seed)
GBM1 = GradientBoostingClassifier(n_estimators = 1500, learning_rate = 0.008, min_samples_leaf = 5, max_features=0.2, max_depth=7)
GBM2 = GradientBoostingClassifier(n_estimators = 1700, learning_rate = 0.007, min_samples_leaf = 5, max_features=0.2, max_depth=7)
GBM3 = GradientBoostingClassifier(n_estimators = 1600, learning_rate = 0.0075, min_samples_leaf = 5, max_features=0.2, max_depth=7)
GBM4 = GradientBoostingClassifier(n_estimators = 1650, learning_rate = 0.007, min_samples_leaf = 5, max_features=0.2, max_depth=8)
GBM5 = GradientBoostingClassifier(n_estimators = 1750, learning_rate = 0.00725, min_samples_leaf = 6, max_features=0.2, max_depth=7)
GBM6 = GradientBoostingClassifier(n_estimators = 1550, learning_rate = 0.00775, min_samples_leaf = 4, max_features=0.2, max_depth=7)
GBM7 = GradientBoostingClassifier(n_estimators = 1850, learning_rate = 0.00725, min_samples_leaf = 5, max_features=0.2, max_depth=6)
print "Running Model 1"
GBM1.fit(X_train, Y_train)
print "Running Model 2"
GBM2.fit(X_train, Y_train)
print "Running Model 3"
GBM3.fit(X_train, Y_train)
print "Running Model 4"
GBM4.fit(X_train, Y_train)
print "Running Model 5"
GBM5.fit(X_train, Y_train)
print "Running Model 6"
GBM6.fit(X_train, Y_train)
print "Running Model 7"
GBM7.fit(X_train, Y_train)
GBMClassifiers = [GBM1, GBM2, GBM3, GBM4, GBM5, GBM6, GBM7]
saveObject(derived_data_path, 'GBM_classifiers.obj', GBMClassifiers)
combine = float(1)/7*(GBM1.predict_proba(X_test)[:,1] + GBM2.predict_proba(X_test)[:,1] + GBM3.predict_proba(X_test)[:,1] +GBM4.predict_proba(X_test)[:,1] +GBM5.predict_proba(X_test)[:,1] + GBM6.predict_proba(X_test)[:,1] + GBM7.predict_proba(X_test)[:,1])
return combine
示例2: gbdt_solver
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def gbdt_solver(train_data, train_label, validation, test, unlabel, dimreduce=decomposition.undo):
"""
"""
# train_data = train_data[:100,:]
# train_label = train_label[:100]
logging.info("begin to train the gbdt classifier")
new_train_data, new_val, new_test, new_unlabel = dimreduce(train_data, train_label, validation, test, unlabel)
logging.info("finished feature extracting")
"""
gb = GradientBoostingClassifier ()
params_gbdt = {"n_estimators":[100,200,500,1000],
"learning_rate":[0.02,0.03,0.05,0.1],
"max_depth":[3,5,7,9],
"random_state":[1000000007]}"""
# rand_search_result = GridSearchCV (gb, param_grid = params_gbdt , n_jobs = 3 , cv = 3, scoring = 'roc_auc')
# rand_search_result = RandomizedSearchCV (gb, param_distributions = params_gbdt, n_jobs = 3, cv = 3, n_iter = 100, scoring = 'roc_auc')
# rand_search_result.fit (new_train_data , train_label)
# params = tools.report (rand_search_result.grid_scores_)
params = {
"n_estimators": 600,
"learning_rate": 0.03,
"random_state": 1000000007,
"max_depth": 2,
"warm_start": True,
}
gb = GradientBoostingClassifier(**params)
gb.fit(new_train_data, train_label)
joblib.dump(gb, ROOT + "/result/gbdt.pkl")
evaluate.get_auc(gb.predict_proba(new_val)[:, 1])
return gb.predict_proba(new_test)[:, 1]
示例3: ctr_gbdt
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def ctr_gbdt(model='sklearn-clicklog', from_cache=False, train_dataset_length=100000, test_dataset_length=100000):
TRAIN_FILE, TEST_FILE = create_dataset(model, from_cache, train_dataset_length, test_dataset_length)
prediction_model = GradientBoostingClassifier(
loss='deviance',
learning_rate=0.1,
n_estimators=30,
subsample=1.0,
min_samples_split=2,
min_samples_leaf=1,
min_weight_fraction_leaf=0.0,
max_depth=5,
)
x_train, y_train = clean_data(TRAIN_FILE)
x_test, y_test = clean_data(TEST_FILE)
with Timer('fit model'):
prediction_model.fit(x_train, y_train)
with Timer('evaluate model'):
y_prediction_train = prediction_model.predict_proba(x_train)
y_prediction_test = prediction_model.predict_proba(x_test)
loss_train = log_loss(y_train, y_prediction_train)
loss_test = log_loss(y_test, y_prediction_test)
print 'loss_train: %s' % loss_train
print 'loss_test: %s' % loss_test
示例4: predict
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def predict(fea, df, t, t9):
Un = df.columns == 'Blank'
for f in Fea:
'''
try:
df[(f+'_y')] = df[(f+'_x')] - df[(f+'_y')]
print(1)
except:
pass
'''
Un = Un | (df.columns == f)
Un = Un | (df.columns == (f+'_x'))
Un = Un | (df.columns == (f+'_y'))
Un = Un & (df.columns != 'New_y')
clf = GradientBoostingClassifier()
y = df[t].label
X = df[t].ix[:,Un]
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size = 0.9, random_state = 1)
clf.fit(X_train, y_train)
re = 'Testing AUC: \t' + str(roc_auc_score(y_test,clf.predict_proba(X_test)[:,1]))
print re
re = 'September AUC: \t' + str(roc_auc_score(df[t9].label,clf.predict_proba(df[t9].ix[:,Un])[:,1]))
print re
print(X.columns)
print(clf.feature_importances_)
return Un, clf
示例5: TestGradientBoostingClassifierConverter
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
class TestGradientBoostingClassifierConverter(TestCase):
def setUp(self):
np.random.seed(1)
self.est = GradientBoostingClassifier(max_depth=2, n_estimators=10)
self.est.fit([[0, 0], [0, 1], [1, 0], [1, 1]], [0, 1, 1, 1])
self.ctx = TransformationContext(
{
Schema.INPUT: [IntegerNumericFeature("x1"), StringCategoricalFeature("x2", ["zero", "one"])],
Schema.MODEL: [IntegerNumericFeature("x1"), StringCategoricalFeature("x2", ["zero", "one"])],
Schema.DERIVED: [],
Schema.OUTPUT: [IntegerCategoricalFeature("output", [0, 1])],
}
)
self.converter = GradientBoostingConverter(estimator=self.est, context=self.ctx)
def test_transform(self):
p = self.converter.pmml()
mm = p.MiningModel[0]
assert mm.MiningSchema is not None, "Missing mining schema"
assert len(mm.MiningSchema.MiningField) == 2, "Wrong number of mining fields"
assert mm.Segmentation is not None, "Missing segmentation root"
def test_transform_with_verification(self):
p = self.converter.pmml(
[
{"x1": 0, "x2": "zero", "output": self.est.predict_proba([[0, 0]])[0, 1]},
{"x1": 0, "x2": "one", "output": self.est.predict_proba([[0, 1]])[0, 1]},
{"x1": 1, "x2": "zero", "output": self.est.predict_proba([[1, 0]])[0, 1]},
{"x1": 1, "x2": "one", "output": self.est.predict_proba([[1, 1]])[0, 1]},
]
)
mm = p.MiningModel[0]
assert mm.MiningSchema is not None, "Missing mining schema"
assert len(mm.MiningSchema.MiningField) == 2, "Wrong number of mining fields"
assert mm.Segmentation is not None, "Missing segmentation root"
示例6: GB_Classifier
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def GB_Classifier(X_train, X_cv, X_test, Y_train,Y_cv,Y_test, Actual_DS):
print("***************Starting Gradient Boosting***************")
t0 = time()
clf = GradientBoostingClassifier(n_estimators=500,learning_rate=0.01)
clf.fit(X_train, Y_train)
preds = clf.predict(X_cv)
score = clf.score(X_cv,Y_cv)
print("Gradient Boosting - {0:.2f}%".format(100 * score))
Summary = pd.crosstab(label_enc.inverse_transform(Y_cv), label_enc.inverse_transform(preds),
rownames=['actual'], colnames=['preds'])
Summary['pct'] = (Summary.divide(Summary.sum(axis=1), axis=1)).max(axis=1)*100
print(Summary)
#Check with log loss function
epsilon = 1e-15
#ll_output = log_loss_func(Y_cv, preds, epsilon)
preds2 = clf.predict_proba(X_cv)
ll_output2= log_loss(Y_cv, preds2, eps=1e-15, normalize=True)
print(ll_output2)
print("done in %0.3fs" % (time() - t0))
preds3 = clf.predict_proba(X_test)
#preds4 = clf.predict_proba((Actual_DS.ix[:,'feat_1':]))
preds4 = clf.predict_proba(Actual_DS)
print("***************Ending Gradient Boosting***************")
return pd.DataFrame(preds2),pd.DataFrame(preds3),pd.DataFrame(preds4)
示例7: ensembleGBMTest
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def ensembleGBMTest(derived_data_path, X_train, Y_train, X_test, Y_test):
random.seed(60)
GBM1 = GradientBoostingClassifier(n_estimators = 1500, learning_rate = 0.008, min_samples_leaf = 5, max_features=0.2, max_depth=7)
GBM2 = GradientBoostingClassifier(n_estimators = 1700, learning_rate = 0.007, min_samples_leaf = 5, max_features=0.2, max_depth=7)
GBM3 = GradientBoostingClassifier(n_estimators = 1600, learning_rate = 0.0075, min_samples_leaf = 5, max_features=0.2, max_depth=7)
GBM4 = GradientBoostingClassifier(n_estimators = 1650, learning_rate = 0.007, min_samples_leaf = 5, max_features=0.2, max_depth=8)
GBM5 = GradientBoostingClassifier(n_estimators = 1750, learning_rate = 0.00725, min_samples_leaf = 6, max_features=0.2, max_depth=7)
GBM6 = GradientBoostingClassifier(n_estimators = 1550, learning_rate = 0.00775, min_samples_leaf = 4, max_features=0.2, max_depth=7)
GBM7 = GradientBoostingClassifier(n_estimators = 1850, learning_rate = 0.00725, min_samples_leaf = 5, max_features=0.2, max_depth=6)
GBM1.fit(X_train, Y_train)
GBM2.fit(X_train, Y_train)
GBM3.fit(X_train, Y_train)
GBM4.fit(X_train, Y_train)
GBM5.fit(X_train, Y_train)
GBM6.fit(X_train, Y_train)
GBM7.fit(X_train, Y_train)
print "GBM1: %f" % (gini(GBM1, X_test, Y_test))
print "GBM2: %f" % (gini(GBM2, X_test, Y_test))
print "GBM3: %f" % (gini(GBM3, X_test, Y_test))
print "GBM4: %f" % (gini(GBM4, X_test, Y_test))
print "GBM5: %f" % (gini(GBM5, X_test, Y_test))
print "GBM6: %f" % (gini(GBM6, X_test, Y_test))
print "GBM7: %f" % (gini(GBM7, X_test, Y_test))
#now combine!
combine = GBM1.predict_proba(X_test)[:,1] + GBM2.predict_proba(X_test)[:,1] + GBM3.predict_proba(X_test)[:,1] +GBM4.predict_proba(X_test)[:,1] +GBM5.predict_proba(X_test)[:,1]
combine = combine + GBM6.predict_proba(X_test)[:,1] + GBM7.predict_proba(X_test)[:,1]
print "With our powers combined: %f" % (giniNoEstimator(Y_test, combine))
GBMClassifiers = [GBM1, GBM2, GBM3, GBM4, GBM5, GBM6, GBM7]
saveObject(derived_data_path, 'GBM_classifiers.obj', GBMClassifiers)
示例8: predict
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def predict(fea1,fea2, df, t, t9):
n = 0
weight = [0.73,0.27]
tave = np.zeros(len(df[t9]))
y = df[t].label
X_1 = df[t]
df9 = df[t9]
for fea in [fea1,fea2]:
Un = df.columns == 'Blank'
for f in fea:
Un = Un | (df.columns == f)
Un = Un | (df.columns == (f+'_x'))
Un = Un | (df.columns == (f+'_y'))
Un = Un & (df.columns != 'quarterly_attrition_rate_y')
clf = GradientBoostingClassifier()
X = X_1.ix[:,Un]
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size = 0.9, random_state = 1)
min_max_scaler = preprocessing.MinMaxScaler()
clf.fit(min_max_scaler.fit_transform(X_train), y_train)
re = 'Testing AUC: \t' + str(roc_auc_score(y_test,clf.predict_proba(min_max_scaler.transform(X_test))[:,1]))
print re
t = clf.predict_proba(min_max_scaler.fit_transform(df9.ix[:,Un]))[:,1]
re = 'September AUC: \t' + str(roc_auc_score(df9.label,t))
print re
tave = t * weight[n] + tave
n += 1
print '-' * 30
print(weight)
print 'Total AUC'
re = 'September AUC: \t' + str(roc_auc_score(df9.label,tave))
print re
return Un, clf
示例9: main
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def main():
train_f = pd.read_csv(train_path, header=0, parse_dates=['Dates'])
print train_f.dtypes
X, Y = get_feature(train_f, "training_set")
### TRAINING
clf = GradientBoostingClassifier(n_estimators=50)
# clf = RandomForestClassifier(n_estimators=2)
# clf = LogisticRegression(n_jobs=4)
X, Y = shuffle_XY(X, Y)
data_len = len(X)
train_len = data_len * 95 / 100
val_len = data_len - train_len
X_train = X[:train_len]
X_val = X[train_len:]
Y_train = Y[:train_len]
Y_val = Y[train_len:]
clf = clf.fit(X_train, Y_train)
print "Training done"
val_acc = clf.score(X_val, Y_val)
print "Val acc:", val_acc
val_pred = clf.predict_proba(X_val)
# print max(Y_val), min(Y_val)
# print Y_val, Y_val + 1
val_log = 0.0
cnt = 0
for y in Y_val:
val_log += math.log(val_pred[cnt, y]+0.0000001)
cnt += 1
val_log = - val_log / len(Y_val)
print "Val log loss:", val_log
# print "Val loss:", log_loss(Y_val+1, val_pred) # Note the +1 here!
"""
# scores = cross_val_score(clf, X, Y)
# print "Cross val acc:", scores.mean()
"""
### Testing
test_f = pd.read_csv(test_path, header=0, parse_dates=['Dates'])
# print test_f.dtypes
X_test, _ = get_feature(test_f, "test_set")
Y_test = clf.predict_proba(X_test)
### Write results
# write_results(Y_test)
write_results_prob(Y_test)
示例10: MyGradientBoost
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
class MyGradientBoost(MyClassifier):
def __init__(self, params=dict()):
self._params = params
self._gb = GradientBoostingClassifier(**(self._params))
def update_params(self, updates):
self._params.update(updates)
self._gb = GradientBoostingClassifier(**(self._params))
def fit(self, Xtrain, ytrain):
self._gb.fit(Xtrain, ytrain)
# def predict(self, Xtest, option = None):
# return self._gb.predict(Xtest)
def predict_proba(self, Xtest, option = None):
return self._gb.predict_proba(Xtest)[:, 1]
def predict_proba_multi(self, Xtest, option = None):
return self._gb.predict_proba(Xtest)
def plt_feature_importance(self, fname_list, f_range = list()):
importances = self._gb.feature_importances_
std = np.std([tree[0].feature_importances_ for tree in self._gb.estimators_], axis=0)
indices = np.argsort(importances)[::-1]
fname_array = np.array(fname_list)
if not f_range:
f_range = range(indices.shape[0])
n_f = len(f_range)
plt.figure()
plt.title("Gradient Boost Feature importances")
plt.barh(range(n_f), importances[indices[f_range]],
color="b", xerr=std[indices[f_range]], ecolor='k',align="center")
plt.yticks(range(n_f), fname_array[indices[f_range]])
plt.ylim([-1, n_f])
plt.show()
def list_feature_importance(self, fname_list, f_range = list(), return_list = False):
importances = self._gb.feature_importances_
indices = np.argsort(importances)[::-1]
print 'Gradient Boost feature ranking:'
if not f_range :
f_range = range(indices.shape[0])
n_f = len(f_range)
for i in range(n_f):
f = f_range[i]
print '{0:d}. feature[{1:d}] {2:s} ({3:f})'.format(f + 1, indices[f], fname_list[indices[f]], importances[indices[f]])
if return_list:
return [indices[f_range[i]] for i in range(n_f)]
示例11: do_gbdt4
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def do_gbdt4(train_x, train_y, test_x=None, test_y=None, learning_rate=0.03, max_depth=8, max_features=25,
n_estimators=600, load=False, save=True, outfile=None, search=False, log=False):
if search == False:
if log==True:
mdl_name = 'gbdt_log_train_lr' + str(learning_rate) + '_n' + str(n_estimators) + '_maxdep' + str(max_depth) + '.pkl'
else:
mdl_name = 'gbdt_train_lr' + str(learning_rate) + '_n' + str(n_estimators) + '_maxdep' + str(max_depth) + '.pkl'
if os.path.exists(mdl_name) == True:
clf_gbdt = joblib.load(mdl_name)
else:
# create gradient boosting
clf_gbdt = GradientBoostingClassifier(learning_rate=learning_rate, max_depth=max_depth,
max_features=max_features, n_estimators=n_estimators)
#n_estimators=500, learning_rate=0.5, max_depth=3)
clf_gbdt.fit(train_x, train_y)
if save == True:
try:
_ = joblib.dump(clf_gbdt, mdl_name, compress=1)
except:
print("*** Save GBM model to pickle failed!!!")
if outfile != None:
outfile.write("*** Save RF model to pickle failed!!!")
if test_x != None and test_y != None:
probas_gbdt = clf_gbdt.predict_proba(test_x)[:, 1]
score_gbdt = roc_auc_score(test_y, probas_gbdt)
print("GBDT ROC score", score_gbdt)
return clf_gbdt
else:
max_depth_list = [ 6, 7, 8, 9, 10]
n_list = [2000]
lr_list = [0.005,0.003]
max_feat_list = [15, 16, 17, 18, 20]
info = {}
for md in max_depth_list:
for n in n_list:
for lr in lr_list:
for mf in max_feat_list:
print 'max_depth = ', md
print 'n = ', n
print 'learning rate = ', lr
print 'max feature = ', mf
# n_estimators=500, learning_rate=0.5, max_depth=3)
mdl_name = 'gbdt_n'+str(n)+'_lr'+str(lr)+'_md'+str(md)+'mf'+str(mf)+'.pkl'
if os.path.exists(mdl_name) == True:
clf_gbdt = joblib.load(mdl_name)
else:
clf_gbdt = GradientBoostingClassifier(learning_rate=learning_rate, max_depth=md,max_features=mf, n_estimators=n_estimators)
clf_gbdt.fit(train_x, train_y)
_ = joblib.dump(clf_gbdt, mdl_name, compress=1)
probas_gbdt = clf_gbdt.predict_proba(test_x)[:, 1]
score_gbdt = roc_auc_score(test_y, probas_gbdt)
info[md, n, lr, mf] = score_gbdt
for md in info:
scores = info[md]
print('GBDT max_depth = %d, n = %d, lr = %.5f, max_feature = %d, ROC score = %.5f(%.5f)' % (
md[0], md[1], md[2], md[3], scores.mean(), scores.std()))
示例12: gb
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def gb(train_data,train_label,val_data,val_label,test_data,name="GradientBoosting_submission.csv"):
print "start training GradientBoosting..."
gbClf = GradientBoostingClassifier() # params: by default
gbClf.fit(train_data,train_label)
#evaluate on validation set
val_pred_label = gbClf.predict_proba(val_data)
logloss = preprocess.evaluation(val_label,val_pred_label)
print "logloss of validation set:",logloss
print "Start classify test set..."
test_label = gbClf.predict_proba(test_data)
preprocess.saveResult(test_label,filename = name)
示例13: classify2
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def classify2(dis_data, numeric_data, t_label):
fold = 5
skf = StratifiedKFold(t_label, fold)
roc_auc = 0
f1_score_value = 0
clf1 = LogisticRegression()
clf2 = GradientBoostingClassifier()
# clf3 = tree.DecisionTreeClassifier(max_depth=500, max_leaf_nodes= 500, class_weight={1:12})
clf3 = GradientBoostingClassifier()
for train, test in skf:
clf3 = clf3.fit(dis_data.iloc[train], t_label.iloc[train])
#compute auc
probas_ = clf3.predict_proba(dis_data.iloc[test])
fpr, tpr, thresholds = roc_curve(t_label.iloc[test], probas_[:, 0])
roc_auc += auc(fpr, tpr)
#compute f1_score
label_pred = clf3.predict(dis_data.iloc[test])
f1_score_value += f1_score(t_label.iloc[test], label_pred, pos_label= 1)
return roc_auc / fold, f1_score_value / fold
示例14: machineLearning
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def machineLearning(X, Y_parameters, predict_value, writer):
X_parameters = X
clf1 = LinearSVR()
clf2 = LinearRegression()
clf3 = RandomForestClassifier()
clf4 = LogisticRegression()
clf5 = DecisionTreeClassifier()
clf6 = GradientBoostingClassifier()
##clf1.fit(X_parameters, Y_parameters)
#clf2.fit(X_parameters, Y_parameters)
#clf3.fit(X_parameters, Y_parameters)
clf4.fit(X_parameters, Y_parameters)
#clf5.fit(X_parameters, Y_parameters)
clf6.fit(X_parameters, Y_parameters)
print "finish fitting"
answer = []
for line in predict_value:
line1 = line[1:]
#predict_outcome1 = clf1.predict(line1)
#predict_outcome2 = clf2.predict(line1)
#predict_outcome3 = clf3.predict_proba(line1)
predict_outcome4 = clf4.predict_proba(line1)
#predict_outcome5 = clf5.predict_proba(line1)
predict_outcome6 = clf6.predict_proba(line1)
#value1 = predict_outcome1[0]
#value2 = predict_outcome2[0]
#value3 = predict_outcome3[0][1]
value4 = predict_outcome4[0][1]
#value5 = predict_outcome5[0][1]
value6 = predict_outcome6[0][1]
data = (value4+value6)/2
writer.writerow([line[0],data])
print "finish learning"
示例15: train
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict_proba [as 别名]
def train():
posi_result = {}
train_feature, test_feature, train_id_list, test_id_list, train_tar_list = merge_feature(feature_str)
tmp1 = [m < 32 for m in trainTarList]
tmp1 = np.array(tmp1)
# train_feature = train_feature[tmp1]
target_list = np.array(trainTarList)
target_list = target_list[tmp1]
# train_id_list = np.array(train_id_list)
# train_id_list = train_id_list[tmp1]
c_feature = trainFeature.columns[:]
clf1 = RandomForestClassifier(n_estimators=200, min_samples_split=17)
clf1.fit(trainFeature[c_feature], target_list)
# rf_preds = clf1.predict(test_feature)
rf_prob = clf1.predict_proba(test_feature)
gbdt1 = GradientBoostingClassifier(n_estimators=150, min_samples_split=17)
gbdt1.fit(trainFeature[c_feature], target_list)
# gbdt_preds = gbdt1.predict(test_feature)
gbdt_prob = gbdt1.predict_proba(test_feature)
all_prob = rf_prob + gbdt_prob
all_preds = []
print all_prob.shape
for k in range(all_prob.shape[0]):
prob1 = list(allProb[k, :])
ind1 = prob.index(max(prob1))
allPreds.append(ind1)
for j in range(len(all_preds)):
all_pre_name = dl.get_num_position(all_preds[j])
posi_result[test_id_list[j]] = all_pre_name
return posi_result