本文整理汇总了Python中sklearn.ensemble.GradientBoostingClassifier.predict方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingClassifier.predict方法的具体用法?Python GradientBoostingClassifier.predict怎么用?Python GradientBoostingClassifier.predict使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.GradientBoostingClassifier
的用法示例。
在下文中一共展示了GradientBoostingClassifier.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: GradBoost
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def GradBoost(X_DS, Y_DS, X_train, X_test, y_train, y_test, Cl_Names = 'None', mask='None',Max_Depth=3):
#******************************************************************************
from sklearn.ensemble import GradientBoostingClassifier as GBC #import library for machine learning analysis
from sklearn.metrics import classification_report
print 'Gradient Boosting: Training...' #notify the user about the status of the process
Gradient_Boosting_obj = GBC(max_depth=Max_Depth) #call the Gradient Boosting routine built in
Gradient_Boosting_obj.fit(X_train, y_train) #fit the logistic model to the train data sets
Pred_Train = Gradient_Boosting_obj.predict(X_train) #apply the logistic model to the train dataset
Pred_Test = Gradient_Boosting_obj.predict(X_test) #apply the logistic model to the test dataset
print 'Gradient Boosting: Completed!' #notify the user about the status of the process
labels = len(np.unique(Y_DS)) #extract the labels from the classification classes
Conf_M = np.zeros((labels,labels), dtype='int') #initialize the confusion matrix for the classification problem
if Cl_Names != 'None':
target_names = Cl_Names
else:
target_names = np.arange(len(np.unique(Y_DS))).astype(str).tolist()
#end
Conf_M = CM(y_test, Pred_Test,np.unique(Y_DS)) #calls the confusion matrix routine with the test set and prediction set
print(classification_report(y_test, Pred_Test, target_names=target_names)) #print the performance indicators on the console
return Gradient_Boosting_obj, Conf_M
示例2: test_mem_layout
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def test_mem_layout():
# Test with different memory layouts of X and y
X_ = np.asfortranarray(X)
clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
clf.fit(X_, y)
assert_array_equal(clf.predict(T), true_result)
assert_equal(100, len(clf.estimators_))
X_ = np.ascontiguousarray(X)
clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
clf.fit(X_, y)
assert_array_equal(clf.predict(T), true_result)
assert_equal(100, len(clf.estimators_))
y_ = np.asarray(y, dtype=np.int32)
y_ = np.ascontiguousarray(y_)
clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
clf.fit(X, y_)
assert_array_equal(clf.predict(T), true_result)
assert_equal(100, len(clf.estimators_))
y_ = np.asarray(y, dtype=np.int32)
y_ = np.asfortranarray(y_)
clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
clf.fit(X, y_)
assert_array_equal(clf.predict(T), true_result)
assert_equal(100, len(clf.estimators_))
示例3: __init__
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
class GBClassifier:
def __init__(self):
"""
Inititalizes the gradient descent classifier
"""
self.header = "#gbc"
self.clf = None
self.learningRate = 0.1
self.n_estimators = 100
self.loss = "deviance"
self.acceptedLossValues = ["deviance", "exponential"]
def setNumberOfEstimators(self, n_estimators):
"""
Sets the number of estimators of Gradient Boosting Classifier
"""
self.n_estimators = n_estimators
def setLoss(self, loss):
"""
Sets the loss parameter for the SGDC
"""
try:
if loss in self.acceptedLossValues:
self.loss = loss
else:
raise ValueError("Error in input value")
except Exception as error:
logging.warning("Error: No such loss value:%s", loss)
def buildModel(self):
"""
This builds the model of the Gradient boosting Classifier
"""
logging.info("Building Model")
self.clf = GradientBoostingClassifier(loss=self.loss, n_estimators=self.n_estimators,
learning_rate = self.learningRate)
logging.info("Finished Building Model")
def trainGBC(self,X, Y):
"""
Training the Gradient Boosting Classifier
"""
self.clf.fit(X, Y)
def validateGBC(self,X, Y):
"""
Validate the Gradient Boosting Classifier
"""
YPred = self.clf.predict(X)
print accuracy_score(Y, YPred)
def testGBC(self,X, Y):
"""
Test the Gradient Boosting Classifier
"""
YPred = self.clf.predict(X)
print accuracy_score(Y, YPred)
示例4: gbc
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def gbc(train,test,train_target,test_target, lr=.1, n_est=100):
clf = GradientBoostingClassifier(loss='deviance', learning_rate=lr, n_estimators=n_est)
clf.fit(train, train_target)
res = clf.predict(train)
print '*************************** GBC ****************'
print classification_report(train_target,res)
res1 = clf.predict(test)
print classification_report(test_target, res1)
return clf
示例5: test_degenerate_targets
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def test_degenerate_targets():
"""Check if we can fit even though all targets are equal. """
clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
# classifier should raise exception
assert_raises(ValueError, clf.fit, X, np.ones(len(X)))
clf = GradientBoostingRegressor(n_estimators=100, random_state=1)
clf.fit(X, np.ones(len(X)))
clf.predict(rng.rand(2))
assert_array_equal(np.ones((1,), dtype=np.float64), clf.predict(rng.rand(2)))
示例6: model_color_gboost
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def model_color_gboost(X_train, X_test, y_train, y_test):
# Train the model
clf = GradientBoostingClassifier(learning_rate=0.1, n_estimators=80, subsample=0.80, max_depth=4)
clf.fit(tfidf_train, y_train)
# Check the validity
pred = clf.predict(tfidf_train.toarray())
print "Accuracy on train set: ", 100*accuracy_score(pred, y_train)
pred = clf.predict(tfidf_test.toarray())
print "Accuracy on validation: ", 100*accuracy_score(pred, y_test)
print confusion_matrix(y_test, pred,
labels=['press-6', 'press-5', 'press-4', 'press-3', 'press-2', 'press-1'])
示例7: predict_author
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def predict_author(arr, yazar_features, yazar_classes):
results = []
print "\n[DEBUG] K-NN result (neighbors: 10)"
knn = KNeighborsClassifier(n_neighbors=10)
knn.fit(yazar_features, yazar_classes)
print knn.predict(arr)
results.append(knn.predict(arr)[0])
print "\n[DEBUG] SVC result (linear) (degree=3)"
svc = svm.SVC(kernel='linear', degree=3)
svc.fit(yazar_features, yazar_classes)
print svc.predict(arr)
results.append(svc.predict(arr)[0])
print "\n[DEBUG] Logistic Regression result ()"
regr = linear_model.LogisticRegression()
regr.fit(yazar_features, yazar_classes)
print regr.predict(arr)
results.append(regr.predict(arr)[0])
print "\n[DEBUG] Gaussian Naive Bayes"
gnb = GaussianNB()
gnb.fit(yazar_features, yazar_classes)
print gnb.predict(arr)
results.append(gnb.predict(arr)[0])
print "\n[DEBUG] Decision Tree Classifier"
dtc = tree.DecisionTreeClassifier()
dtc.fit(yazar_features, yazar_classes)
print dtc.predict(arr)
results.append(dtc.predict(arr)[0])
print "\n[DEBUG] Gradient Boosting Classification"
gbc = GradientBoostingClassifier()
gbc.fit(yazar_features, yazar_classes)
print gbc.predict(arr)
results.append(gbc.predict(arr)[0])
# output = open('features.pkl', 'wb')
# pickle.dump(yazar_features, output)
# output.close()
# output = open('classes.pkl', 'wb')
# pickle.dump(yazar_classes, output)
# output.close()
# test_yazar_features = [] # for test data
# test_yazar_classes = [] # for test classes
# # yazar_features = [] # for train data
# # yazar_classes = [] # for train classes
return results
示例8: gradient_boost
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def gradient_boost(x_train, x_test, y_train,
y_test, rands = None):
"""
Predict the lemons using a RandomForest and a random seed
both for the number of features, as well as for the size of the
sample to train the data on
ARGS:
- x_train: :class:`pandas.DataFrame` of the x_training data
- y_train: :class:`pandas.Series` of the y_training data
- x_test: :class:`pandas.DataFrame` of the x_testing data
- y_test: :class:`pandas.Series` of the y_testing data
- rands: a :class:`tuple` of the (rs, rf) to seed the sample
and features of the BaggingClassifier. If `None`, then
rands are generated and provided in the return `Series`
RETURNS:
:class:`pandas.Series` of the f1-scores and random seeds
"""
#create a dictionary for the return values
ret_d = {'train-f1':[], 'test-f1':[], 'rs':[], 'rf':[]}
#use the randoms provided if there are any, otherwise generate them
if not rands:
rs = numpy.random.rand()
rf = numpy.random.rand()
while rf < 0.1:
rf = numpy.random.rand()
else:
rs, rf = rands[0], rands[1]
#place them into the dictionary
ret_d['rs'], ret_d['rf'] = rs, rf
#create and run the bagging classifier
bc = GradientBoostingClassifier(n_estimators = 300,
max_features = rf)
bc.fit(x_train, y_train)
y_hat_train = bc.predict(x_train)
ret_d['train-f1'] = f1_score(y_train, y_hat_train)
y_hat_test = bc.predict(x_test)
ret_d['test-f1'] = f1_score(y_test, y_hat_test)
return pandas.Series(ret_d)
示例9: classify_survivors
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def classify_survivors(Y = labels, orig_test = test_data):
X, test = featurizer()
best_model = {'n_estimators': 20, 'learning_rate': 1.0, 'max_depth': 3}
gbt = GradientBoostingClassifier(subsample=0.8, min_samples_leaf=50, min_samples_split=20,
n_estimators = 20, learning_rate = 1.0, max_depth = 3)
ID_col = orig_test.loc[:,['PassengerId']]
print ID_col.ix[0:10]
gbt.fit(X,Y)
#print test.ix[0:10]
predicted_results = gbt.predict(test)
predicted_results = pd.DataFrame(predicted_results)
predicted = pd.concat( [ID_col,predicted_results], axis=1 )
predicted = predicted.rename(columns={0 : 'Survived'})
#predicted = predicted.drop(' ',axis=1)
del predicted['']
#Print some of the dataframe with predictions to test results
print predicted.ix[0:15],'\n'
#print X.ix[0:15]
#Output result dataframe as csv
predicted.to_csv('predicted_results.csv')
示例10: MyGradientBoosting
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
class MyGradientBoosting(MyClassifier):
def __init__(self):
self.gradient_boosting = None
def train(self, data_path='data/train.pkl', n_estimators=10, learning_rate=0.1):
labels, instances = load_pickled_dataset(data_path)
start_time = time.clock()
self.gradient_boosting = GradientBoostingClassifier(loss='deviance', learning_rate=learning_rate,
n_estimators=n_estimators, subsample=0.3,
min_samples_split=2,
min_samples_leaf=1,
max_depth=3,
init=None,
random_state=None,
max_features=None,
verbose=2)
self.gradient_boosting.fit(instances, labels)
end_time = time.clock()
print "STATUS: model training done. elapsed time - %d seconds" % (end_time - start_time)
print "INFO: " + str(self.gradient_boosting)
def predict(self, data_path='data/test.pkl'):
labels, instances = load_pickled_dataset(data_path)
return self.gradient_boosting.predict(instances)
def save(self, file_path='model/gbc_model'):
joblib.dump(self.gradient_boosting, file_path)
def load(self, file_path='model/gbc_model'):
self.gradient_boosting = joblib.load(file_path)
def write_results(self, predictions):
super(MyGradientBoosting, self).write(predictions, 'gbc_prediction.csv')
示例11: main
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def main():
print '[INFO, time: %s] Getting Data....' % (time.strftime('%H:%M:%S'))
testing_file = file('test.p', 'r')
training_file = file('train.p', 'r')
train = pickle.load(training_file)
test = pickle.load(testing_file)
testing_file.close()
training_file.close()
trainX = train[:,:-1]
trainy = train[:,-1]
testX = test[:,:-1]
testy = test[:,-1]
print '[INFO, time: %s] Fitting %s ...' % (time.strftime('%H:%M:%S'), 'GradientBoostingClassifier(n_estimators=1000)')
clf = GradientBoostingClassifier(n_estimators=1000)
clf.fit(trainX, trainy)
print '[INFO, time: %s] Making Predictions...' % (time.strftime('%H:%M:%S'))
prediction = clf.predict(testX)
print '[RESULT, time: %s] accuracy = %f' % (time.strftime('%H:%M:%S'),accuracy_score(testy, prediction))
model_save_file = file('gradient_1000.p', 'w')
pickle.dump(clf, model_save_file)
model_save_file.close()
print 'All done'
示例12: train_gbt
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def train_gbt(filename, color, name):
'''Train on Gradient Boosted Trees Classifier'''
# Read data
data2 = pd.read_csv(filename, encoding="utf")
X = data2.ix[:, 1:-1]
y = data2.ix[:, -1]
# Split into train, validation and test
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
# Define model
clf1 = GradientBoostingClassifier(learning_rate=0.05, max_depth=5, random_state=42)
# Fit model
t0 = time()
clf1.fit(X_train, y_train)
pred_probas = clf1.predict_proba(X_val)
predictions = clf1.predict(X_val)
print "Score", clf1.score(X_val, y_val)
importances = clf1.feature_importances_
indices = np.argsort(importances)[::-1]
# Metrics & Plotting
metrics[1, 0] = precision_score(y_val, predictions)
metrics[1, 1] = recall_score(y_val, predictions)
metrics[1, 2] = f1_score(y_val, predictions)
metrics[1, 3] = time() - t0
fpr_rf, tpr_rf, _ = roc_curve(y_val, predictions)
plt.plot(fpr_rf, tpr_rf, color=color, label=name)
return importances, indices
示例13: cv_model
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def cv_model():
DATA_FILE = './data/train-set-ru-b64-utf-8.txt'
all_data = []
target = []
with open(DATA_FILE) as df:
for i, line in enumerate(df):
print i
line = line.strip()
parts = line.split()
stats_collector = StatsCollector()
#print parts[2]
#print base64.b64decode(parts[3])#.decode('utf-8')
#print parts[2].decode('utf-8'), parts[3].decode('utf-8'), "\n"
stats_collector.collect(int(parts[1]), parts[3], parts[2])
# mark page url
all_data.append(stats_collector.get_features())
target.append(stats_collector.get_target())
#print all_data[-1]
data = np.asarray(all_data, dtype = np.float)
target = np.asarray(target, dtype = np.float)
clf = GradientBoostingClassifier(loss='deviance', learning_rate=0.05, n_estimators=400,\
min_samples_split=30, min_samples_leaf=15, max_depth=5)
kf = KFold(data.shape[0], n_folds = 3, shuffle = True)
for train_index, test_index in kf:
X_train, X_test = data[train_index], data[test_index]
y_train, y_test = target[train_index], target[test_index]
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print f1_score(y_test, y_pred)
示例14: fit_model
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def fit_model():
DATA_FILE = './data/train-set-ru-b64-utf-8.txt'
stats_collector = StatsCollector()
i=0
data = []
target = []
with open (DATA_FILE) as df:
for i, line in enumerate(df):
print i
line = line.strip()
parts = line.split()
stats_collector = StatsCollector()
stats_collector.collect(int(parts[1]), parts[3], parts[2])
data.append(stats_collector.get_features())
target.append(stats_collector.get_target())
#print len(data[-1])
data = np.asarray(data, dtype = np.float)
target = np.asarray(target, dtype = np.float)
print data.shape, target.shape
df.close()
clf = GradientBoostingClassifier(loss='deviance', learning_rate=0.07, n_estimators=300, min_samples_split=30,\
min_samples_leaf=15, max_depth=4)
clf.fit(data, target)
y_pred = clf.predict(data)
print f1_score(target, y_pred)
joblib.dump(clf, 'model/model.pkl')
示例15: main
# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def main():
print("gradient boosting classifier!")
X,Y,Xtest = importdata()
print(Y.shape)
param_grid={
"n_estimators":[10,100,200,2000,20000],
"min_samples_split":[5,10,20,50]
}
gb=GradientBoostingClassifier()
Gridsearch_impl(X,Y,gb,param_grid,5)
# for i in range(10,11,5):
# clf = DecisionTreeClassifier(min_samples_split=i)
# rf = RandomForestClassifier(n_estimators = 100,random_state=0,min_samples_split=i)
# ab = AdaBoostClassifier(rf,n_estimators = 10)
#ab = GradientBoostingClassifier(n_estimators = 100)
# score = cross_validation.cross_val_score(ab,X,Y,cv=3)
# print(score)
# print("average score %f"%np.mean(score))
# print("std %f"%np.std(score))
# ab.fit(X,Y)
Ytest = gb.predict(Xtest)
output(Ytest,'submit3.csv')