本文整理汇总了Python中sklearn.ensemble.GradientBoostingClassifier.staged_decision_function方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingClassifier.staged_decision_function方法的具体用法?Python GradientBoostingClassifier.staged_decision_function怎么用?Python GradientBoostingClassifier.staged_decision_function使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.GradientBoostingClassifier
的用法示例。
在下文中一共展示了GradientBoostingClassifier.staged_decision_function方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: learn
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import staged_decision_function [as 别名]
def learn(learning_rate, X_train, y_train, X_test, y_test):
model = GradientBoostingClassifier(
n_estimators=250,
verbose=True,
random_state=241,
learning_rate=learning_rate
)
model.fit(X_train, y_train)
# plot scores
test_score = list(range(250))
train_score = list(range(250))
for i, predictions in enumerate(model.staged_decision_function(X_test)):
predictions = [x[0] for x in predictions.tolist()] # unpack this stupid format
predictions = [1/(1 + math.exp(-x)) for x in predictions]
test_score[i] = log_loss(y_test, predictions)
for i, predictions in enumerate(model.staged_decision_function(X_train)):
predictions = [x[0] for x in predictions.tolist()] # unpack this stupid format
predictions = [1/(1 + math.exp(-x)) for x in predictions]
train_score[i] = log_loss(y_train, predictions)
plt.figure()
plt.plot(test_score, 'r', linewidth=2)
plt.plot(train_score, 'g', linewidth=2)
plt.legend(['test', 'train'])
plt.show()
return train_score, test_score
示例2: train_test_split
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import staged_decision_function [as 别名]
X= data[:, 1:]
#split into train test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=241)
#train
clf = GradientBoostingClassifier(n_estimators=250, verbose=True, random_state=241, learning_rate=0.2)
clf.fit(X_train, y_train)
#verify log loss
loss_on_test = []
for i, pred1 in enumerate(clf.staged_decision_function(X_test)):
## print(i)
## print(pred1)
## print(y_test)
x = log_loss(y_test, 1.0/(1.0+np.exp(-pred1)))
## print(x)
loss_on_test.append(x)
grd2 = clf.staged_predict_proba(X_test)
loss_on_test_proba = []
for i, pred2 in enumerate(grd2):
loss_on_test_proba.append(log_loss(y_test, pred2))
示例3: train_test_split
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import staged_decision_function [as 别名]
plt.style.use('ggplot')
df = pd.read_csv('gbm-data.csv')
val = df.values
X = val[:,1:]
y = val[:,0]
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.8, random_state=241)
#learning_rates = [1, 0.5, 0.3, 0.2, 0.1]
learning_rates = [0.2]
sigmoid = lambda x: 1 / (1 + np.exp(-x))
log_loss_test = []
for l in learning_rates:
clf = GradientBoostingClassifier(n_estimators=250, verbose=True,
random_state=241,learning_rate = l)
print('fitting...')
clf.fit(X_train, y_train)
print('building staged_decision_function')
staged_dec = clf.staged_decision_function(X_test)
for pred in staged_dec:
y_pred = sigmoid(pred)
log_loss_test.append(log_loss(y_test,y_pred))
best_iter = [np.argmin(log_loss_test),log_loss_test[np.argmin(log_loss_test)]]
#clf1 = RandomForestClassifier(n_estimators = 37, random_state=241)
#clf1.fit(X_train, y_train)
#prediction = clf1.predict_proba(X_test)
#res = log_loss(y_test,prediction)
#
示例4: train_test_split
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import staged_decision_function [as 别名]
X_train, X_test, y_train, y_test = train_test_split(features, target,
test_size=0.8,
random_state=241)
# for lr in [1, 0.5, 0.3, 0.2, 0.1]:
for lr in [0.2]:
clf = GradientBoostingClassifier(n_estimators=250,
verbose=True,
random_state=241,
learning_rate=lr)
clf.fit(X_train, y_train)
sigmoid_test_arr, sigmoid_train_arr = [], []
train_pred = clf.staged_decision_function(X_train)
test_pred = clf.staged_decision_function(X_test)
test_pred_arr, train_pred_arr = [], []
for i, val in enumerate(train_pred):
sigmoid = 1 / (1 + np.exp(-val))
train_pred_arr.append(log_loss(y_train, sigmoid))
for i, val in enumerate(test_pred):
sigmoid = 1 / (1 + np.exp(-val))
test_pred_arr.append(log_loss(y_test, sigmoid))
test_tuples, train_tuples = [], []
i = 0
示例5: sigmoid
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import staged_decision_function [as 别名]
test_deviance = {}
def sigmoid(y_pred):
return 1 / (1 + math.e ** (-y_pred))
learning_rates = [1, 0.5, 0.3, 0.2, 0.1]
for learning_rate in learning_rates:
model = GradientBoostingClassifier(n_estimators=250, verbose=True, random_state=241, learning_rate=learning_rate)
model.fit(X_train, y_train)
# compute test set deviance
test_deviance[learning_rate] = np.zeros((250,), dtype=np.float64)
for i, y_pred in enumerate(model.staged_decision_function(X_test)):
# clf.loss_ assumes that y_test[i] in {0, 1}
test_deviance[learning_rate][i] = log_loss(y_test, sigmoid(y_pred))
plt.plot((np.arange(test_deviance[learning_rate].shape[0]) + 1)[::5], test_deviance[learning_rate][::5],
'-', label='label = {}'.format(learning_rate))
plt.legend(loc='upper left')
plt.xlabel('Boosting Iterations')
plt.ylabel('Test Set Deviance')
plt.show()
# 3. Как можно охарактеризовать график качества на тестовой выборке,
# начиная с некоторой итерации: переобучение (overfitting) или недообучение (underfitting)?
# В ответе укажите одно из слов overfitting либо underfitting.
print('overfitting')
示例6: len
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import staged_decision_function [as 别名]
import matplotlib.pyplot as plt
data = pandas.read_csv('gbm-data.csv')
X = data[list(range(1, len(data.columns)))]
y = np.ravel(data[[0]])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=241)
for learning_rate in [1, 0.5, 0.3, 0.2, 0.1]:
clf = GradientBoostingClassifier(n_estimators=250, verbose=True, random_state=241, learning_rate=learning_rate)
clf.fit(X_train, y_train)
log_train = []
log_test = []
for y_pred in clf.staged_decision_function(X_train):
log_train.append(log_loss(y_train, 1 / (1 + np.exp(-y_pred))))
for y_pred in clf.staged_decision_function(X_test):
log_test.append(log_loss(y_test, 1 / (1 + np.exp(-y_pred))))
if learning_rate == 0.2:
mini = min(log_test)
ind = (log_test).index(mini)
plt.figure()
plt.plot(log_test, 'r', linewidth=2)
plt.plot(log_train, 'g', linewidth=2)
plt.legend(['test', 'train'])
plt.show()
示例7: train_test_split
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import staged_decision_function [as 别名]
X = df.iloc[:, 1:].as_matrix()
y = df.iloc[:, 0].as_matrix()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=241)
for l in [1, 0.5, 0.3, 0.2, 0.1]:
cf = GradientBoostingClassifier(n_estimators=250,
verbose=True, random_state=241, learning_rate=l)
cf.fit(X_train, y_train)
train_loss = []
test_loss = []
# log loss for train set
for stage, array in enumerate(cf.staged_decision_function(X_train)):
# apply sigmoid function
transformed = []
for row in array:
transformed.append(float(1) / (1+np.exp(-row[0])))
# calculate metric
score = log_loss(y_train, transformed)
train_loss.append(score)
# log loss for test set
for stage, array in enumerate(cf.staged_decision_function(X_test)):
# apply sigmoid function
transformed = []
for row in array:
transformed.append(float(1) / (1+np.exp(-row[0])))
# calculate metric
示例8: gb
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import staged_decision_function [as 别名]
def gb(data):
X = data[data.columns.values[1:]].values
y = data[data.columns.values[:1]].values.ravel()
N = len(y)
X_train, X_test, y_train, y_test = \
cv.train_test_split(X, y,
test_size=0.8,
random_state=241)
# ------------------------------------------------------
# Deal with Gradient Boosting
# ------------------------------------------------------
# Reserve an array to store iteration with min log_loss for each learning rate
min_iterations_train = []
min_iterations_test = []
# Fit Gradient Boosting Classifiers with different learning rates
learning_rates = [1, 0.5, 0.3, 0.2, 0.1]
for lr in learning_rates:
print("GB learning rate = ", lr)
# Fit the classifier
gbclf = GradientBoostingClassifier(n_estimators=250,
verbose=True,
random_state=241,
learning_rate=lr)
gbclf.fit(X_train, y_train)
# Get log_loss errors after every iteration of the Gradient Boosting
y_train_pred = gbclf.staged_decision_function(X_train)
log_loss_train = []
for y_t_p in y_train_pred:
log_loss_train.append(log_loss(y_train, 1 / (1 + np.exp(-y_t_p))))
y_test_pred = gbclf.staged_decision_function(X_test)
log_loss_test = []
for y_t_p in y_test_pred:
log_loss_test.append(log_loss(y_test, 1 / (1 + np.exp(-y_t_p))))
# Min log-loss and the corresponding iteration
log_loss_train_min_ind = np.argmin(log_loss_train) + 1
log_loss_test_min_ind = np.argmin(log_loss_test) + 1
log_loss_train_min = np.min(log_loss_train)
log_loss_test_min = np.min(log_loss_test)
min_iterations_train.append((log_loss_train_min, log_loss_train_min_ind))
min_iterations_test.append((log_loss_test_min, log_loss_test_min_ind))
# Plot the errors for both TRAIN and TEST sets (w/ the curr Learning Rate)
plt.figure('GB learning rate: ' + str(lr))
plt.plot(log_loss_test, 'r', linewidth=2)
plt.plot(log_loss_train, 'g', linewidth=2)
plt.legend(['log_loss_test', 'log_loss_train'])
plt.draw()
# Optimal TEST iteration for the learning rate 0.2
print('Optimal iterations TEST vs. learning rate:')
for t in zip(min_iterations_test, learning_rates):
print('min: ', t[0][0], 'min_ind: ', t[0][1], 'learning rate: ', t[1])
t = [(x[0], x[1]) for x, y in zip(min_iterations_test, learning_rates) if y == 0.2]
opt_log_loss = t[0][0]
opt_log_loss_ind = t[0][1]
writefile('%0.2f %d' % (opt_log_loss, opt_log_loss_ind), 'log-loss-0.2.out')
# ------------------------------------------------------
# Deal with Random Forests
# ------------------------------------------------------
clf = RandomForestClassifier(n_estimators=opt_log_loss_ind, random_state=241)
clf.fit(X_train, y_train)
y_test_pred_rf = clf.predict_proba(X_test)
log_loss_test_rf = log_loss(y_test, y_test_pred_rf)
# log-loss over the test set using Random Forests
writefile('%0.2f' % (log_loss_test_rf), 'log-loss-rf.out')
return 0
示例9: RandomForestClassifier
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import staged_decision_function [as 别名]
from sklearn.ensemble import RandomForestClassifier
clfR = RandomForestClassifier(n_estimators=250,verbose=True, random_state=241)
clfR.fit(X_train,y_train)
print log_loss(y_test, clfR.predict_proba(X_test))
########################################
clf = GradientBoostingClassifier(n_estimators=250,verbose=True, random_state=241,learning_rate = 0.2)
clf.fit(X_train,y_train)
sdf = []
k = 0
for y_pred in enumerate(clf.staged_decision_function(X_test)):
sdf.append([])
for i in y_pred[1]:
sdf[k].append( 1 / (1 + math.exp(-1* i )))
#sdf[k].append( i + 1-1)
k+= 1
k = 0
for i in sdf:
print (str(k)+ " " + str( log_loss(y_true = y_test, y_pred = i)))
k+=1
a= []
for i in sdf:
a.append(log_loss(y_true = y_test, y_pred = i))
print min(a)
示例10: GradientBoostingClassifier
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import staged_decision_function [as 别名]
learning_rate = [1, 0.5, 0.3, 0.2, 0.1]
for rate in learning_rate:
#обучаем классификатор
clf = GradientBoostingClassifier(learning_rate = rate,
n_estimators=250,
verbose=True,
random_state=241)
clf.fit(X_train,Y_train)
#готовим массывы под функцию потерь
train_loss = np.zeros(250, dtype=np.float64)
test_loss = np.zeros(250, dtype=np.float64)
#считаем функцию потерь на обучающих данных
for i, Y_train_pred in enumerate(clf.staged_decision_function(X_train)):
Y_train_pred = 1 / (1 + np.exp(-Y_train_pred))
train_loss[i] = log_loss(Y_train, Y_train_pred)
#считаем функцию потерь на тестовых данных
for i, Y_test_pred in enumerate(clf.staged_decision_function(X_test)):
Y_test_pred = 1 / (1 + np.exp(-Y_test_pred))
test_loss[i] = log_loss(Y_test, Y_test_pred)
#строим графики
plt.figure()
plt.plot(test_loss, 'r', linewidth=2)
plt.plot(train_loss, 'g', linewidth=2)
plt.legend(['test', 'train'])
plt.title('learning_rate=%f' %rate)
示例11: range
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import staged_decision_function [as 别名]
data = pandas.read_csv('Data/gbm-data.csv')
datanp = data.values
y = datanp[:, 0]
x = datanp[:, [x for x in range(1, 1777)]]
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.8, random_state=241)
test_score = dict()
for learning_rate in [0.2]:
print("learning rate:", learning_rate)
cls = GradientBoostingClassifier(n_estimators=250, verbose=True, random_state=241, learning_rate=learning_rate)
cls.fit(X_train, y_train)
for i, pred in enumerate(cls.staged_decision_function(X_test)):
predicted = sigmoid(pred)
test_score[i] = log_loss(y_test, predicted)
#train_score = dict()
#for i, pred in enumerate(cls.staged_decision_function(X_train)):
# train_score[i] = cls.loss_(y_train, pred)
pp.pprint(test_score)
res = min(test_score, key=test_score.get)
print(res)
cls2 = GradientBoostingClassifier(n_estimators=36, verbose=True, random_state=241)
cls2.fit(X_train, y_train)
示例12: train_test_split
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import staged_decision_function [as 别名]
X = data_values[:, 1:]
y = data_values[:, 0]
# Разбейте выборку на обучающую и тестовую, используя функцию train_test_split
# с параметрами test_size = 0.8 и random_state = 241.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=241)
# 2
# Обучите GradientBoostingClassifier с параметрами n_estimators=250, verbose=True, random_state=241
# и для каждого значения learning_rate из списка [1, 0.5, 0.3, 0.2, 0.1] проделайте следующее:
for lr in [1, 0.5, 0.3, 0.2, 0.1]:
clf = GradientBoostingClassifier(n_estimators=250, verbose=True, random_state=241, learning_rate=lr)
clf.fit(X_train, y_train)
# Используйте метод staged_decision_function для предсказания качества на обучающей и тестовой выборке на каждой итерации.
score_prediction_train = clf.staged_decision_function(X_train)
score_prediction_test = clf.staged_decision_function(X_test)
# Преобразуйте полученное предсказание с помощью сигмоидной функции по формуле 1 / (1 + e^{−y_pred}), где y_pred — предсказаное значение.
score_prediction_train_mod = 1 / (1 + math.exp(-score_prediction_train))
score_prediction_test_mod = 1 / (1 + math.exp(-score_prediction_test))
# Вычислите и постройте график значений log-loss
# (которую можно посчитать с помощью функции sklearn.metrics.log_loss) на обучающей и тестовой выборках,
# а также найдите минимальное значение метрики и номер итерации, на которой оно достигается.
log_loss_graph_train = log_loss(y_train, clf.predict_proba(X_train)[:, 1])
log_loss_graph_test = log_loss(y_test, clf.predict_proba(X_test)[:, 1])
print("%s -> ll[train] = %s -> ll[test] = %s" % (lr, log_loss_graph_train, log_loss_graph_test))
示例13: train_test_split
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import staged_decision_function [as 别名]
import pandas
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
import sklearn.metrics as met
import matplotlib.pyplot as plt
df = pandas.read_csv("gbm-data.csv")
vals = df.values
X_train, X_test, y_train, y_test = train_test_split(vals[:, 1:], vals[:, 0], test_size=0.8, random_state=241)
# for lr in [1, 0.5, 0.3, 0.2, 0.1]:
clf = GradientBoostingClassifier(learning_rate=1, n_estimators=250, verbose=False, random_state=241)
clf.fit(X_train, y_train)
sc_train = enumerate(clf.staged_decision_function(X_train))
sc_test = enumerate(clf.staged_decision_function(X_test))
train_loss = {}
test_loss = {}
for i, y_predicted in sc_train:
train_loss[i] = met.log_loss(y_train,1/(1+np.exp(-y_predicted)))
for i, y_predicted in sc_test:
test_loss[i] = met.log_loss(y_test, 1/(1+np.exp(-y_predicted)))
plt.figure()
plt.plot(list(test_loss.values()), 'r', linewidth=2)
plt.plot(list(train_loss.values()), 'g', linewidth=2)
plt.legend(['test', 'train'])
plt.show()
示例14: train_test_split
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import staged_decision_function [as 别名]
from sklearn.metrics import log_loss
data = pandas.read_csv('gbm-data.csv')
X = data.drop('Activity', axis=1)
y = data['Activity']
data = np.array(pandas.read_csv('gbm-data.csv').values)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=241)
for learning_rate in [0.2]:
cls = GradientBoostingClassifier(n_estimators=250, verbose=True, random_state=241, learning_rate=learning_rate)
cls.fit(X_train, y_train)
print(cls.learning_rate)
sigma_func = lambda x: 1/(1+math.e**(-x))
sdc_train = list(cls.staged_decision_function(X_train))
sdc_test = list(cls.staged_decision_function(X_test))
for i in range(250):
pred_train = list(map(sigma_func, sdc_train[i]))
pred_test = list(map(sigma_func, sdc_test[i]))
loss_train = log_loss(y_train, pred_train)
loss_test = log_loss(y_test, pred_test)
print(i, loss_train, loss_test)
clf = RandomForestClassifier(n_estimators=36, random_state=241)
clf.fit(X_train, y_train)
pred = clf.predict_proba(X_test)
print(log_loss(y_test, pred))
示例15: train_test_split
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import staged_decision_function [as 别名]
df = pandas.read_csv('gbm-data.csv', index_col=None) #1
dfa = df.values
X = dfa[:,1:]
y = dfa[:,0]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=241)
def sigma(y_pred):
return 1/(1 + np.exp(-y_pred))
# for rate in [1, 0.5, 0.3, 0.2, 0.1]: #2
for rate in [0.2]: #2
print rate
clf = GradientBoostingClassifier(n_estimators=250, verbose=True, random_state=241, learning_rate = rate)
clf.fit(X_train, y_train)
sigma_y_train = [sigma(y) for y in clf.staged_decision_function(X_train)]
sigma_y_test = [sigma(y) for y in clf.staged_decision_function(X_test) ]
log_loss_train = [log_loss(y_train, y) for y in sigma_y_train]
log_loss_test = [log_loss(y_test , y) for y in sigma_y_test ]
min_log_loss_test = min(log_loss_test)
it_min_log_loss_test = log_loss_test.index(min_log_loss_test)
print ">>>> it: ", it_min_log_loss_test, " val: ", min_log_loss_test #4
if rate == 0.2: #5
rf = RandomForestClassifier(random_state=241, n_estimators=it_min_log_loss_test)
rf.fit(X_train, y_train)
tree_log_loss_test = log_loss(y_test, rf.predict_proba(X_test)[:,1])
print ">>>>>>>> rf log_loss val: ", tree_log_loss_test
plt.figure()