本文整理汇总了Python中sklearn.ensemble.GradientBoostingClassifier.fit方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingClassifier.fit方法的具体用法?Python GradientBoostingClassifier.fit怎么用?Python GradientBoostingClassifier.fit使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.GradientBoostingClassifier
的用法示例。
在下文中一共展示了GradientBoostingClassifier.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ctr_gbdt
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def ctr_gbdt(model='sklearn-clicklog', from_cache=False, train_dataset_length=100000, test_dataset_length=100000):
TRAIN_FILE, TEST_FILE = create_dataset(model, from_cache, train_dataset_length, test_dataset_length)
prediction_model = GradientBoostingClassifier(
loss='deviance',
learning_rate=0.1,
n_estimators=30,
subsample=1.0,
min_samples_split=2,
min_samples_leaf=1,
min_weight_fraction_leaf=0.0,
max_depth=5,
)
x_train, y_train = clean_data(TRAIN_FILE)
x_test, y_test = clean_data(TEST_FILE)
with Timer('fit model'):
prediction_model.fit(x_train, y_train)
with Timer('evaluate model'):
y_prediction_train = prediction_model.predict_proba(x_train)
y_prediction_test = prediction_model.predict_proba(x_test)
loss_train = log_loss(y_train, y_prediction_train)
loss_test = log_loss(y_test, y_prediction_test)
print 'loss_train: %s' % loss_train
print 'loss_test: %s' % loss_test
示例2: test_gradient_boosting_validation_fraction
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def test_gradient_boosting_validation_fraction():
X, y = make_classification(n_samples=1000, random_state=0)
gbc = GradientBoostingClassifier(n_estimators=100,
n_iter_no_change=10,
validation_fraction=0.1,
learning_rate=0.1, max_depth=3,
random_state=42)
gbc2 = clone(gbc).set_params(validation_fraction=0.3)
gbc3 = clone(gbc).set_params(n_iter_no_change=20)
gbr = GradientBoostingRegressor(n_estimators=100, n_iter_no_change=10,
learning_rate=0.1, max_depth=3,
validation_fraction=0.1,
random_state=42)
gbr2 = clone(gbr).set_params(validation_fraction=0.3)
gbr3 = clone(gbr).set_params(n_iter_no_change=20)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
# Check if validation_fraction has an effect
gbc.fit(X_train, y_train)
gbc2.fit(X_train, y_train)
assert gbc.n_estimators_ != gbc2.n_estimators_
gbr.fit(X_train, y_train)
gbr2.fit(X_train, y_train)
assert gbr.n_estimators_ != gbr2.n_estimators_
# Check if n_estimators_ increase monotonically with n_iter_no_change
# Set validation
gbc3.fit(X_train, y_train)
gbr3.fit(X_train, y_train)
assert gbr.n_estimators_ < gbr3.n_estimators_
assert gbc.n_estimators_ < gbc3.n_estimators_
示例3: test_staged_predict_proba
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def test_staged_predict_proba():
# Test whether staged predict proba eventually gives
# the same prediction.
X, y = datasets.make_hastie_10_2(n_samples=1200,
random_state=1)
X_train, y_train = X[:200], y[:200]
X_test, y_test = X[200:], y[200:]
clf = GradientBoostingClassifier(n_estimators=20)
# test raise NotFittedError if not fitted
assert_raises(NotFittedError, lambda X: np.fromiter(
clf.staged_predict_proba(X), dtype=np.float64), X_test)
clf.fit(X_train, y_train)
# test if prediction for last stage equals ``predict``
for y_pred in clf.staged_predict(X_test):
assert_equal(y_test.shape, y_pred.shape)
assert_array_equal(clf.predict(X_test), y_pred)
# test if prediction for last stage equals ``predict_proba``
for staged_proba in clf.staged_predict_proba(X_test):
assert_equal(y_test.shape[0], staged_proba.shape[0])
assert_equal(2, staged_proba.shape[1])
assert_array_almost_equal(clf.predict_proba(X_test), staged_proba)
示例4: test_partial_dependecy_input
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def test_partial_dependecy_input():
# Test input validation of partial dependence.
clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
clf.fit(X, y)
assert_raises(ValueError, partial_dependence,
clf, [0], grid=None, X=None)
assert_raises(ValueError, partial_dependence,
clf, [0], grid=[0, 1], X=X)
# first argument must be an instance of BaseGradientBoosting
assert_raises(ValueError, partial_dependence,
{}, [0], X=X)
# Gradient boosting estimator must be fit
assert_raises(ValueError, partial_dependence,
GradientBoostingClassifier(), [0], X=X)
assert_raises(ValueError, partial_dependence, clf, [-1], X=X)
assert_raises(ValueError, partial_dependence, clf, [100], X=X)
# wrong ndim for grid
grid = np.random.rand(10, 2, 1)
assert_raises(ValueError, partial_dependence, clf, [0], grid=grid)
示例5: test_plot_partial_dependence_multiclass
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def test_plot_partial_dependence_multiclass():
# Test partial dependence plot function on multi-class input.
clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
clf.fit(iris.data, iris.target)
grid_resolution = 25
fig, axs = plot_partial_dependence(clf, iris.data, [0, 1],
label=0,
grid_resolution=grid_resolution)
assert len(axs) == 2
assert all(ax.has_data for ax in axs)
# now with symbol labels
target = iris.target_names[iris.target]
clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
clf.fit(iris.data, target)
grid_resolution = 25
fig, axs = plot_partial_dependence(clf, iris.data, [0, 1],
label='setosa',
grid_resolution=grid_resolution)
assert len(axs) == 2
assert all(ax.has_data for ax in axs)
# label not in gbrt.classes_
assert_raises(ValueError, plot_partial_dependence,
clf, iris.data, [0, 1], label='foobar',
grid_resolution=grid_resolution)
# label not provided
assert_raises(ValueError, plot_partial_dependence,
clf, iris.data, [0, 1],
grid_resolution=grid_resolution)
示例6: PlotFeaturesImportance
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def PlotFeaturesImportance(X,y,featureNames,dataName):
'''
Plot the relative contribution/importance of the features.
Best to reduce to top X features first - for interpretability
Code example from:
http://bugra.github.io/work/notes/2014-11-22/an-introduction-to-supervised-learning-scikit-learn/
'''
gbc = GradientBoostingClassifier(n_estimators=40)
gbc.fit(X, y)
# Get Feature Importance from the classifier
feature_importance = gbc.feature_importances_
# Normalize The Features
feature_importance = 100 * (feature_importance / feature_importance.max())
sorted_idx = numpy.argsort(feature_importance)
pos = numpy.arange(sorted_idx.shape[0]) + 4.5
# pos = numpy.arange(sorted_idx.shape[0])
# plt.figure(figsize=(16, 12))
plt.figure(figsize=(14, 9), dpi=250)
plt.barh(pos, feature_importance[sorted_idx], align='center', color='#7A68A6')
#plt.yticks(pos, numpy.asanyarray(df.columns.tolist())[sorted_idx]) #ORIG
plt.yticks(pos, numpy.asanyarray(featureNames)[sorted_idx])
plt.xlabel('Relative Importance')
plt.title('%s: Top Features' %(dataName))
plt.grid('off')
plt.ion()
plt.show()
plt.savefig(str(dataName)+'TopFeatures.png',dpi=200)
示例7: model_train_ensemble
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def model_train_ensemble(X1,Y1,Save = False, modelname = None):
X1,Y1 = DowmSample(X1,Y1,9)
# model = RandomForestClassifier(n_estimators=100,random_state=1)
model = GradientBoostingClassifier(n_estimators=100,max_leaf_nodes=5, subsample=0.7, learning_rate=0.1, random_state=1)
# model = LogisticRegression('l2')
model.fit(X1, Y1.ravel())
# 保存模型
if Save == True:
f = open(modelname,'w')
pickle.dump(model, f)
f.close()
print '\n -------------- Training is over ----------------------'
return model
示例8: Blender
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
class Blender(BaseEstimator, ClassifierMixin):
def __init__(self, trained_clfs):
self.clfs = trained_clfs
# self.classifier = make_pipeline(OneHotEncoder(), DenseTransformer(),
# GradientBoostingClassifier())
self.classifier = GradientBoostingClassifier()
# self.classifier = make_pipeline(
# OneHotEncoder(), LogisticRegression(class_weight='auto'))
def fit(self, data, target):
# self.enc = LabelEncoder().fit(target)
probs = self.transform_input(data)
# self.classifier.fit(predictions, target)
self.classifier.fit(probs, target)
def predict(self, data):
predictions = self.transform_input(data)
return self.classifier.predict(predictions)
def transform_input(self, data):
probabilities = [clf.predict_proba(data) for clf in self.clfs]
probabilities = np.array(probabilities)
# features, samples = probabilities.shape
n_clfs, samples, features = probabilities.shape
probabilities = np.reshape(probabilities, (samples, n_clfs * features))
probabilities[np.isnan(probabilities)] = 0
return probabilities
示例9: test_oob_improvement
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def test_oob_improvement():
"""Test if oob improvement has correct shape and regression test. """
clf = GradientBoostingClassifier(n_estimators=100, random_state=1, subsample=0.5)
clf.fit(X, y)
assert clf.oob_improvement_.shape[0] == 100
# hard-coded regression test - change if modification in OOB computation
assert_array_almost_equal(clf.oob_improvement_[:5], np.array([0.19, 0.15, 0.12, -0.12, -0.11]), decimal=2)
示例10: main
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def main():
print '[INFO, time: %s] Getting Data....' % (time.strftime('%H:%M:%S'))
testing_file = file('test.p', 'r')
training_file = file('train.p', 'r')
train = pickle.load(training_file)
test = pickle.load(testing_file)
testing_file.close()
training_file.close()
trainX = train[:,:-1]
trainy = train[:,-1]
testX = test[:,:-1]
testy = test[:,-1]
print '[INFO, time: %s] Fitting %s ...' % (time.strftime('%H:%M:%S'), 'GradientBoostingClassifier(n_estimators=1000)')
clf = GradientBoostingClassifier(n_estimators=1000)
clf.fit(trainX, trainy)
print '[INFO, time: %s] Making Predictions...' % (time.strftime('%H:%M:%S'))
prediction = clf.predict(testX)
print '[RESULT, time: %s] accuracy = %f' % (time.strftime('%H:%M:%S'),accuracy_score(testy, prediction))
model_save_file = file('gradient_1000.p', 'w')
pickle.dump(clf, model_save_file)
model_save_file.close()
print 'All done'
示例11: partial_dependence
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def partial_dependence(df, y):
'''
INPUT: X = features
y = target variable binary, imbalanced classes
OUPUT: X = features oversampled to have balanced target classes
y = target variable oversample to have balanced classes
Discovers the minority class and then oversamples until eah class makes up
50% of your data.
'''
X_train, X_test, y_train, y_test = oversample_train_test(df, y)
# X_train, X_test, y_train, y_test = train_test_split(df, y, random_state=42)
feature_engineering = Pipeline([
('lists', ListSplitter()),
('race', RaceDummies()),
('crime_sentence', CrimeAndSentence()),
('feat_eng', FeatureEngineer()),
('columns', ColumnFilter(prejudice=False))
])
X = feature_engineering.fit_transform(X_train.copy(), y_train)
X_test = feature_engineering.fit_transform(X_test.copy(), y_test)
gbc = GradientBoostingClassifier(n_estimators=850, learning_rate=.75)
gbc.fit(X.copy(), y_train)
most_imp = np.argsort(gbc.feature_importances_)[-6:]
names = list(X_test.columns)
feats = list(most_imp)
fig, axs = plot_partial_dependence(gbc, X_test, feats, feature_names=names,
n_jobs=3, grid_resolution=50)
示例12: run_gradient_boosting_classifier
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def run_gradient_boosting_classifier(data, _max_depth):
(feature_train, feature_test, label_train, label_test) = train_test_split(data[:, 0:-1], data[:, -1].astype(int),
test_size=0.25)
# TODO: Vary Number of Estimators and Learning Rate
gbc = GradientBoostingClassifier(learning_rate=0.1, n_estimators=50, max_depth=_max_depth, verbose = True)
gbc.fit(feature_train, label_train)
training_error = gbc.score(feature_train, label_train)
#cross_validation_score = cross_val_score(gbc, feature_train, label_train, cv=10)
testing_error = gbc.score(feature_test, label_test)
print "Random Forest Results for Max Depth:", _max_depth
print "Training Accuracy:", training_error
#print "10-fold Cross Validation Accuracy: %0.2f (+/- %0.2f)" % (cross_validation_score.mean(), cross_validation_score.std() * 2)
print "Testing Accuracy:", testing_error
feature_importance = gbc.feature_importances_
stddev = np.std([tree[0].feature_importances_ for tree in gbc.estimators_], axis=0)
indices = np.argsort(feature_importance)[::-1]
# Print the feature ranking
print("Feature ranking:")
for f in range(len(feature_importance)):
print("%d. feature %d (%f)" % (f + 1, indices[f], feature_importance[indices[f]]))
plot_feature_importance(feature_importance, indices, stddev, "gradient-boosted-classifier-feature-importance-depth-" + str(_max_depth))
示例13: predict
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def predict(fea, df, t, t9):
Un = df.columns == 'Blank'
for f in Fea:
'''
try:
df[(f+'_y')] = df[(f+'_x')] - df[(f+'_y')]
print(1)
except:
pass
'''
Un = Un | (df.columns == f)
Un = Un | (df.columns == (f+'_x'))
Un = Un | (df.columns == (f+'_y'))
Un = Un & (df.columns != 'New_y')
clf = GradientBoostingClassifier()
y = df[t].label
X = df[t].ix[:,Un]
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size = 0.9, random_state = 1)
clf.fit(X_train, y_train)
re = 'Testing AUC: \t' + str(roc_auc_score(y_test,clf.predict_proba(X_test)[:,1]))
print re
re = 'September AUC: \t' + str(roc_auc_score(df[t9].label,clf.predict_proba(df[t9].ix[:,Un])[:,1]))
print re
print(X.columns)
print(clf.feature_importances_)
return Un, clf
示例14: __init__
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def __init__(self, estimator,
phase,
n_jobs, cv_k_fold, parameters,
X_train, y_train,
X_test, y_test):
# estimator : ensemble学習器
# cv : if train : get best parameter
if phase == "train":
clf = GradientBoostingClassifier()
gscv = GridSearchCV(clf, parameters,
verbose = 10,
scoring = "f1",#scoring = "precision" or "recall"
n_jobs = n_jobs, cv = cv_k_fold)
gscv.fit(X_train, y_train)
self.best_params = gscv.best_params_
clf.set_params(**gscv.best_params_)
clf.fit(X_train, y_train)
train_loss = clf.train_score_
test_loss = np.empty(len(clf.estimators_))
for i, pred in enumerate(clf.staged_predict(X_test)):
test_loss[i] = clf.loss_(y_test, pred)
plt.plot(np.arange(len(clf.estimators_)) + 1, test_loss, label='Test')
plt.plot(np.arange(len(clf.estimators_)) + 1, train_loss, label='Train')
plt.xlabel('the number of weak learner:Boosting Iterations')
plt.ylabel('Loss')
plt.legend(loc="best")
plt.savefig("loss_cv.png")
plt.close()
estimator.set_params(**gscv.best_params_)
self.estimator = estimator
self.one_hot_encoding = None
示例15: test_max_feature_auto
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import fit [as 别名]
def test_max_feature_auto():
"""Test if max features is set properly for floats and str. """
X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)
_, n_features = X.shape
X_train, X_test = X[:2000], X[2000:]
y_train, y_test = y[:2000], y[2000:]
gbrt = GradientBoostingClassifier(n_estimators=1, max_features='auto')
gbrt.fit(X_train, y_train)
assert_equal(gbrt.max_features_, int(np.sqrt(n_features)))
gbrt = GradientBoostingRegressor(n_estimators=1, max_features='auto')
gbrt.fit(X_train, y_train)
assert_equal(gbrt.max_features_, n_features)
gbrt = GradientBoostingRegressor(n_estimators=1, max_features=0.3)
gbrt.fit(X_train, y_train)
assert_equal(gbrt.max_features_, int(n_features * 0.3))
gbrt = GradientBoostingRegressor(n_estimators=1, max_features='sqrt')
gbrt.fit(X_train, y_train)
assert_equal(gbrt.max_features_, int(np.sqrt(n_features)))
gbrt = GradientBoostingRegressor(n_estimators=1, max_features='log2')
gbrt.fit(X_train, y_train)
assert_equal(gbrt.max_features_, int(np.log2(n_features)))