本文整理汇总了Python中sklearn.ensemble.GradientBoostingClassifier.score方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingClassifier.score方法的具体用法?Python GradientBoostingClassifier.score怎么用?Python GradientBoostingClassifier.score使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.GradientBoostingClassifier
的用法示例。
在下文中一共展示了GradientBoostingClassifier.score方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_classification_synthetic
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def test_classification_synthetic():
# Test GradientBoostingClassifier on synthetic dataset used by
# Hastie et al. in ESLII Example 12.7.
X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)
X_train, X_test = X[:2000], X[2000:]
y_train, y_test = y[:2000], y[2000:]
for loss in ('deviance', 'exponential'):
gbrt = GradientBoostingClassifier(n_estimators=100, min_samples_split=1,
max_depth=1, loss=loss,
learning_rate=1.0, random_state=0)
gbrt.fit(X_train, y_train)
error_rate = (1.0 - gbrt.score(X_test, y_test))
assert error_rate < 0.09, \
"GB(loss={}) failed with error {}".format(loss, error_rate)
gbrt = GradientBoostingClassifier(n_estimators=200, min_samples_split=1,
max_depth=1,
learning_rate=1.0, subsample=0.5,
random_state=0)
gbrt.fit(X_train, y_train)
error_rate = (1.0 - gbrt.score(X_test, y_test))
assert error_rate < 0.08, ("Stochastic GradientBoostingClassifier(loss={}) "
"failed with error {}".format(loss, error_rate))
示例2: run_gradient_boosting_classifier
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def run_gradient_boosting_classifier(data, _max_depth):
(feature_train, feature_test, label_train, label_test) = train_test_split(data[:, 0:-1], data[:, -1].astype(int),
test_size=0.25)
# TODO: Vary Number of Estimators and Learning Rate
gbc = GradientBoostingClassifier(learning_rate=0.1, n_estimators=50, max_depth=_max_depth, verbose = True)
gbc.fit(feature_train, label_train)
training_error = gbc.score(feature_train, label_train)
#cross_validation_score = cross_val_score(gbc, feature_train, label_train, cv=10)
testing_error = gbc.score(feature_test, label_test)
print "Random Forest Results for Max Depth:", _max_depth
print "Training Accuracy:", training_error
#print "10-fold Cross Validation Accuracy: %0.2f (+/- %0.2f)" % (cross_validation_score.mean(), cross_validation_score.std() * 2)
print "Testing Accuracy:", testing_error
feature_importance = gbc.feature_importances_
stddev = np.std([tree[0].feature_importances_ for tree in gbc.estimators_], axis=0)
indices = np.argsort(feature_importance)[::-1]
# Print the feature ranking
print("Feature ranking:")
for f in range(len(feature_importance)):
print("%d. feature %d (%f)" % (f + 1, indices[f], feature_importance[indices[f]]))
plot_feature_importance(feature_importance, indices, stddev, "gradient-boosted-classifier-feature-importance-depth-" + str(_max_depth))
示例3: test_zero_estimator_clf
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def test_zero_estimator_clf():
# Test if ZeroEstimator works for classification.
X = iris.data
y = np.array(iris.target)
est = GradientBoostingClassifier(n_estimators=20, max_depth=1,
random_state=1, init=ZeroEstimator())
est.fit(X, y)
assert_greater(est.score(X, y), 0.96)
est = GradientBoostingClassifier(n_estimators=20, max_depth=1,
random_state=1, init='zero')
est.fit(X, y)
assert_greater(est.score(X, y), 0.96)
# binary clf
mask = y != 0
y[mask] = 1
y[~mask] = 0
est = GradientBoostingClassifier(n_estimators=20, max_depth=1,
random_state=1, init='zero')
est.fit(X, y)
assert_greater(est.score(X, y), 0.96)
est = GradientBoostingClassifier(n_estimators=20, max_depth=1,
random_state=1, init='foobar')
assert_raises(ValueError, est.fit, X, y)
示例4: plotLearningCurve
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def plotLearningCurve(dat,lab,optim):
'''
This function plots the learning curve for the classifier
Parameters:
-----------
dat: numpy array with all records
lab: numpay array with class labels of all records
optim: optimal parameters for classifier
'''
clf = GradientBoostingClassifier(learning_rate = optim[0], subsample = optim[1])
# split training data into train and test (already chose optimal parameters)
xTrain, xTest, yTrain, yTest = cross_validation.train_test_split(dat, lab,
test_size = 0.3)
# choose various sizes of training set to model on to generate learning curve
szV = range(10, np.shape(xTrain)[0], int(np.shape(xTrain)[0]) / 10)
szV.append(np.shape(xTrain)[0])
LCvals = np.zeros((len(szV),3), dtype = np.float64) # store data points of learning curve
for i in xrange(0, len(szV)):
clf = clf.fit(xTrain[:szV[i],:], yTrain[:szV[i]])
LCvals[i,0] = szV[i]
LCvals[i,1] = clf.score(xTest, yTest)
LCvals[i,2] = clf.score(xTrain[:szV[i],:], yTrain[:szV[i]])
#print LCvals
# generate figure
fig = plt.figure(1, figsize = (10,10))
prop = matplotlib.font_manager.FontProperties(size=15.5)
ax = fig.add_subplot(1, 1, 1)
ax.plot(LCvals[:,0] / np.float64(np.shape(xTrain)[0]), 1.0 - LCvals[:,1],
label = 'Test Set')
ax.plot(LCvals[:,0] / np.float64(np.shape(xTrain)[0]), 1.0 - LCvals[:,2],
label = 'Training Set')
ax.set_ylabel(r"Error", fontsize = 20)
ax.set_xlabel(r"% of Training Set Used", fontsize = 20)
ax.axis([0.0, 1.0, -0.1, 0.5])
plt.legend(loc = 'upper right', prop = prop)
plt.savefig('LC_GB.pdf', bbox_inches = 'tight')
fig.clear()
# where is model failing?
predProb = clf.predict_proba(xTest)
tmp = np.zeros((np.shape(predProb)[0], np.shape(predProb)[1] + 2))
tmp[:,:-2] = predProb
tmp[:,-2] = clf.predict(xTest)
tmp[:,-1] = yTest
mask = tmp[:,-2] != tmp[:,-1]
print tmp[mask]
print mask.sum(), len(xTest)
print tmp[:50,:]
示例5: TestGradBoost
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def TestGradBoost(dat, lab):
'''
This function finds the optimal parameters for the classifier
Parameters:
-----------
dat: numpy array with all records
lab: numpy array with class labels of all records
Returns:
--------
par: optimal parameters for the classifier
'''
# Gradient Boost parameters. Will choose one based on which does best on the validation set
# learning_rate, subsample
lr = np.linspace(0.01, 0.2, num = 5)
sub = np.linspace(0.1, 1.0, num = 5)
par = [(e,f) for e in lr for f in sub]
# want to try different ensembles to get error bar on score
num = 10
seed = np.random.randint(1000000, size = num)
valScore = np.zeros((num, len(par)))
testScore = np.zeros((num, len(par)))
for nv in xrange(0, num):
print 'Ensemble:', nv + 1
# split training data into train, validation, test (60, 20, 20)
xTrain, xTmp, yTrain, yTmp = cross_validation.train_test_split(dat, lab,
test_size = 0.4,
random_state = seed[nv])
xVal, xTest, yVal, yTest = cross_validation.train_test_split(xTmp, yTmp,
test_size = 0.5,
random_state = seed[nv])
# now train RF for each parameter combination
for i in xrange(0,len(par)):
clf = GradientBoostingClassifier(learning_rate = par[i][0], subsample = par[i][1])
clf = clf.fit(xTrain, yTrain)
valScore[nv,i] = clf.score(xVal, yVal)
testScore[nv,i] = clf.score(xTest, yTest)
# Find optimal parameters
tmp = np.argmax(np.mean(valScore, axis = 0))
print
print 'Optimal parameters (learning rate, subsampling):', par[tmp]
print ('Mean | Std Score (Validation set):', np.mean(valScore, axis = 0)[tmp],
'|', np.std(valScore, axis = 0)[tmp])
print ('Mean | Std Score (Test set):', np.mean(testScore, axis = 0)[tmp],
'|', np.std(testScore, axis = 0)[tmp])
# Return optimal parameters
return par[tmp]
示例6: main
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def main():
# generate synthetic binary classification data
# (name refers to example 10.2 in ESL textbook...see refs below)
X, y = make_hastie_10_2()
# perform train/test split (no need to shuffle)
split_pt = int(TRAIN_PCT * len(X))
X_train, X_test = X[:split_pt], X[split_pt:]
y_train, y_test = y[:split_pt], y[split_pt:]
# single dec stump
stump_clf = DecisionTreeClassifier(
max_depth=1)
stump_clf.fit(X_train, y_train)
stump_score = round(stump_clf.score(X_test, y_test), 3)
print 'decision stump acc = {}\t(max_depth = 1)'.format(stump_score)
# single dec tree (max_depth=3)
tree_clf = DecisionTreeClassifier(max_depth=3)
tree_clf.fit(X_train, y_train)
tree_score = round(tree_clf.score(X_test, y_test), 3)
print 'decision tree acc = {}\t(max_depth = 5)\n'.format(tree_score)
# gbt: a powerful ensemble technique
gbt_scores = list()
for k in (10, 50, 100, 150, 200, 250, 300, 350, 400, 450, 500):
print 'fitting gbt for n_estimators = {}...'.format(k)
gbt_clf = GradientBoostingClassifier(
n_estimators=k, # number of weak learners for this iteration
max_depth=1, # weak learners are dec stumps
learning_rate=1.0) # regularization (shrinkage) hyperparam
gbt_clf.fit(X_train, y_train)
gbt_scores.append(round(gbt_clf.score(X_test, y_test), 3))
print '\ngbt accuracy =\n{}\n'.format(gbt_scores)
# stochastic gbt (using subsampling)
sgbt_scores = list()
for k in (10, 50, 100, 150, 200, 250, 300, 350, 400, 450, 500):
print 'fitting sgbt for n_estimators = {}...'.format(k)
sgbt_clf = GradientBoostingClassifier(
n_estimators=k, # number of weak learners for this iteration
max_depth=1, # weak learners are dec stumps
subsample=0.5, # % of training set used by each bc
learning_rate=1.0) # regularization (shrinkage) hyperparam
sgbt_clf.fit(X_train, y_train)
sgbt_scores.append(round(sgbt_clf.score(X_test, y_test), 3))
print '\nsgbt accuracy =\n{}'.format(sgbt_scores)
示例7: trainAndPredict
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def trainAndPredict(num_trees, train_num):
train_X = X[:train_num]
train_y = y[:train_num]
test_X = X[train_num:]
test_y = y[train_num:]
#clf = svm.SVC()
clf = GradientBoostingClassifier(n_estimators=num_trees, learning_rate=0.5, max_depth=2, random_state=0)
clf.fit(train_X, train_y)
return (clf.score(train_X, train_y), clf.score(test_X, test_y))
示例8: l1_penalty_solver
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def l1_penalty_solver(train_data,test_data,n_est,m_d):
best = 0.0
best_Output = []
for j in [10**(x) for x in xrange(-3,-2,1)]:
X, y = train_data[:,1::], train_data[:,0]
x1, y1 = test_data[:,1::], test_data[:,0]
# Set regularization parameter
for C in range(10,11,1):
# turn down tolerance for short training time
#cls = svm.SVC(kernel='poly',degree=3).fit(X,y)
cls = GradientBoostingClassifier(n_estimators=n_est,max_depth=m_d).fit(X,y)
#cls = DecisionTreeClassifier().fit(X,y)
#cls = LogisticRegression(C=C, penalty='l1', tol=j).fit(X, y)
#cls = LogisticRegression(C=C, penalty='l2', tol=j).fit(X, y)
val1 = cls.predict(x1)
#val1 = cls.predict(x1)
val2 = val1 #cls.predict(x1)
count = 0.
for i in range(len(val1)):
if val1[i] == y1[i]:
count +=1.
else:
continue
result1 = count/len(val1)
count = 0.
for i in range(len(val2)):
if val2[i] == y1[i]:
count +=1.
else:
continue
result2 = count/len(val2)
if result1>best:
best = result1
best_Output = val1
if result2>best:
best = result2
best_Output = val2
pr.print_results(best_Output)
#return best
return [cls.score(X,y),cls.score(x1,y1)]
示例9: gbPredict
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def gbPredict(LOSS, N_EST, L_RATE, M_DEPT, SUB_S, W_START, N_FOLD, EX_F, TRAIN_DATA_X, TRAIN_DATA_Y, TEST__DATA_X, isProb):
# feature extraction
### clf = GradientBoostingClassifier(loss=LOSS, n_estimators=N_EST, learning_rate=L_RATE, max_depth=M_DEPT, subsample=SUB_S, warm_start=W_START).fit(TRAIN_DATA_X, TRAIN_DATA_Y)
### extA = delFeatMin(clf.feature_importances_, EX_F)
### TRAIN_DATA_X = TRAIN_DATA_X[:, extA]
# k-fold validation
kf = KFold(TRAIN_DATA_Y.shape[0], n_folds=N_FOLD)
tesV = 0.0
for train_index, test_index in kf:
X_train, X_test = TRAIN_DATA_X[train_index], TRAIN_DATA_X[test_index]
y_train, y_test = TRAIN_DATA_Y[train_index], TRAIN_DATA_Y[test_index]
clf = GradientBoostingClassifier(loss=LOSS, n_estimators=N_EST, learning_rate=L_RATE, max_depth=M_DEPT, subsample=SUB_S, warm_start=W_START).fit(X_train, y_train)
tesK = 1 - clf.score(X_test, y_test)
tesV += tesK
eVal = tesV / N_FOLD
# train all data
clf = GradientBoostingClassifier(loss=LOSS, n_estimators=N_EST, learning_rate=L_RATE, max_depth=M_DEPT, subsample=SUB_S, warm_start=W_START).fit(TRAIN_DATA_X, TRAIN_DATA_Y)
TEST__DATA_X = TEST__DATA_X[:, extA]
if isProb:
data = clf.predict_proba(TEST__DATA_X)
else:
data = clf.predict(TEST__DATA_X)
print "Eval =", eVal, "with n_esti =", N_EST, "l_rate =", L_RATE, "m_dep =", M_DEPT, "sub_s =", SUB_S, "ex_num =", EX_F, "and loss is", LOSS
return (data, eVal)
示例10: GB_Classifier
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def GB_Classifier(X_train, X_cv, X_test, Y_train,Y_cv,Y_test, Actual_DS):
print("***************Starting Gradient Boosting***************")
t0 = time()
clf = GradientBoostingClassifier(n_estimators=500,learning_rate=0.01)
clf.fit(X_train, Y_train)
preds = clf.predict(X_cv)
score = clf.score(X_cv,Y_cv)
print("Gradient Boosting - {0:.2f}%".format(100 * score))
Summary = pd.crosstab(label_enc.inverse_transform(Y_cv), label_enc.inverse_transform(preds),
rownames=['actual'], colnames=['preds'])
Summary['pct'] = (Summary.divide(Summary.sum(axis=1), axis=1)).max(axis=1)*100
print(Summary)
#Check with log loss function
epsilon = 1e-15
#ll_output = log_loss_func(Y_cv, preds, epsilon)
preds2 = clf.predict_proba(X_cv)
ll_output2= log_loss(Y_cv, preds2, eps=1e-15, normalize=True)
print(ll_output2)
print("done in %0.3fs" % (time() - t0))
preds3 = clf.predict_proba(X_test)
#preds4 = clf.predict_proba((Actual_DS.ix[:,'feat_1':]))
preds4 = clf.predict_proba(Actual_DS)
print("***************Ending Gradient Boosting***************")
return pd.DataFrame(preds2),pd.DataFrame(preds3),pd.DataFrame(preds4)
示例11: classify
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def classify(train, train_sample_ids, test_sample_ids, whichClassifier):
feature_names = list(train.columns)
feature_names.remove("click_bool")
feature_names.remove("booking_bool")
feature_names.remove("gross_bookings_usd")
#feature_names.remove("date_time")
feature_names.remove("position")
# Create Train and Test
trainX = train[feature_names][train_sample_ids]
testX = train[feature_names][test_sample_ids]
Y_columns = ["click_bool", "booking_bool", "position"]
trainY = train[Y_columns][train_sample_ids].apply(lambda x: objective(x, whichClassifier), axis=1)
testY = train[Y_columns][test_sample_ids].apply(lambda x: objective(x, whichClassifier), axis=1)
print "Train: ", len(trainY)
print "Test: ", len(testY)
print("Training the Classifier")
classifier = GradientBoostingClassifier(n_estimators=1024,
verbose=3,
subsample=0.8,
min_samples_split=10,
max_depth = 6,
random_state=1)
classifier.fit(trainX, trainY)
print "Score = ", classifier.score(testX, testY)
return classifier
示例12: train_gbt
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def train_gbt(filename, color, name):
'''Train on Gradient Boosted Trees Classifier'''
# Read data
data2 = pd.read_csv(filename, encoding="utf")
X = data2.ix[:, 1:-1]
y = data2.ix[:, -1]
# Split into train, validation and test
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
# Define model
clf1 = GradientBoostingClassifier(learning_rate=0.05, max_depth=5, random_state=42)
# Fit model
t0 = time()
clf1.fit(X_train, y_train)
pred_probas = clf1.predict_proba(X_val)
predictions = clf1.predict(X_val)
print "Score", clf1.score(X_val, y_val)
importances = clf1.feature_importances_
indices = np.argsort(importances)[::-1]
# Metrics & Plotting
metrics[1, 0] = precision_score(y_val, predictions)
metrics[1, 1] = recall_score(y_val, predictions)
metrics[1, 2] = f1_score(y_val, predictions)
metrics[1, 3] = time() - t0
fpr_rf, tpr_rf, _ = roc_curve(y_val, predictions)
plt.plot(fpr_rf, tpr_rf, color=color, label=name)
return importances, indices
示例13: main
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def main():
train_f = pd.read_csv(train_path, header=0, parse_dates=['Dates'])
print train_f.dtypes
X, Y = get_feature(train_f, "training_set")
### TRAINING
clf = GradientBoostingClassifier(n_estimators=50)
# clf = RandomForestClassifier(n_estimators=2)
# clf = LogisticRegression(n_jobs=4)
X, Y = shuffle_XY(X, Y)
data_len = len(X)
train_len = data_len * 95 / 100
val_len = data_len - train_len
X_train = X[:train_len]
X_val = X[train_len:]
Y_train = Y[:train_len]
Y_val = Y[train_len:]
clf = clf.fit(X_train, Y_train)
print "Training done"
val_acc = clf.score(X_val, Y_val)
print "Val acc:", val_acc
val_pred = clf.predict_proba(X_val)
# print max(Y_val), min(Y_val)
# print Y_val, Y_val + 1
val_log = 0.0
cnt = 0
for y in Y_val:
val_log += math.log(val_pred[cnt, y]+0.0000001)
cnt += 1
val_log = - val_log / len(Y_val)
print "Val log loss:", val_log
# print "Val loss:", log_loss(Y_val+1, val_pred) # Note the +1 here!
"""
# scores = cross_val_score(clf, X, Y)
# print "Cross val acc:", scores.mean()
"""
### Testing
test_f = pd.read_csv(test_path, header=0, parse_dates=['Dates'])
# print test_f.dtypes
X_test, _ = get_feature(test_f, "test_set")
Y_test = clf.predict_proba(X_test)
### Write results
# write_results(Y_test)
write_results_prob(Y_test)
示例14: rand_forest_train
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def rand_forest_train(self):
# 读取本地用户特征信息
users = pd.read_csv('names.csv')
# 选取similarity、platform、reputation、entropy作为判别人类或机器的特征
X = users[['similarity', 'platform', 'reputation', 'entropy']]
y = users['human_or_machine']
# 对原始数据进行分割, 25%的数据用于测试
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=33)
# 对类别特征进行转化,成为特征向量
from sklearn.feature_extraction import DictVectorizer
vec = DictVectorizer(sparse=False)
X_train = vec.fit_transform(X_train.to_dict(orient='record'))
X_test = vec.transform(X_test.to_dict(orient='record'))
# 使用单一决策树进行集成模型的训练及预测分析
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)
dtc_y_pred = dtc.predict(X_test)
# 使用随机森林分类器进行集成模型的训练及预测分析
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)
rfc_y_pred = rfc.predict(X_test)
# 使用梯度提升决策树进行集成模型的训练及预测分析
from sklearn.ensemble import GradientBoostingClassifier
gbc = GradientBoostingClassifier()
gbc.fit(X_train, y_train)
gbc_y_pred = gbc.predict(X_test)
from sklearn.metrics import classification_report
# 输出单一决策树在测试集上的分类准确性, 以及更加详细的精确率 召回率 F1指标
print("单一决策树的准确性为", dtc.score(X_test, y_test))
print(classification_report(dtc_y_pred, y_test))
# 输出随机森林分类器在测试集上的分类准确性,以及更加详细的精确率 召回率 F1指标
print("随机森林分类器的准确性为", rfc.score(X_test, y_test))
print(classification_report(rfc_y_pred, y_test))
# 输出梯度提升决策树在测试集上的分类准确性,以及更加详细的精确率 召回率 F1指标
print("梯度提升决策树的准确性为", gbc.score(X_test, y_test))
print(classification_report(gbc_y_pred, y_test))
users = pd.read_csv('values.csv')
# 检验是否为机器或人类
X = users[['similarity', 'platform', 'reputation', 'entropy']]
X = vec.transform(X.to_dict(orient='record'))
print(rfc.predict(X))
self.dtc = dtc
self.rfc = rfc
self.gbc = gbc
示例15: gbdt_clf
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import score [as 别名]
def gbdt_clf(x_train,x_test,y_train,y_test):
clf = GradientBoostingClassifier(n_estimators=100)
clf.fit(x_train,y_train)
y_pred = clf.predict_proba(x_test)[:,1]
print "gbdt F1 scores",clf.score(x_test,y_test)
scores = roc_auc_score(y_test,y_pred)
print "gbdt_clf scores: ",scores
joblib.dump(clf,'./output/gbdt_clf.model')