当前位置: 首页>>代码示例>>Python>>正文


Python GradientBoostingClassifier.predict方法代码示例

本文整理汇总了Python中sklearn.ensemble.GradientBoostingClassifier.predict方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingClassifier.predict方法的具体用法?Python GradientBoostingClassifier.predict怎么用?Python GradientBoostingClassifier.predict使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.GradientBoostingClassifier的用法示例。


在下文中一共展示了GradientBoostingClassifier.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: GradBoost

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def GradBoost(X_DS, Y_DS, X_train, X_test, y_train, y_test, Cl_Names = 'None', mask='None',Max_Depth=3):
#******************************************************************************

	from sklearn.ensemble import GradientBoostingClassifier as GBC #import library for machine learning analysis
	from sklearn.metrics import classification_report

	print 'Gradient Boosting: Training...' #notify the user about the status of the process 

	Gradient_Boosting_obj = GBC(max_depth=Max_Depth) #call the Gradient Boosting routine built in
	Gradient_Boosting_obj.fit(X_train, y_train) #fit the logistic model to the train data sets
	Pred_Train = Gradient_Boosting_obj.predict(X_train) #apply the logistic model to the train dataset
	Pred_Test = Gradient_Boosting_obj.predict(X_test) #apply the logistic model to the test dataset

	print 'Gradient Boosting: Completed!' #notify the user about the status of the process

	labels = len(np.unique(Y_DS)) #extract the labels from the classification classes
	Conf_M = np.zeros((labels,labels), dtype='int') #initialize the confusion matrix for the classification problem
	
	if Cl_Names != 'None':
		target_names = Cl_Names
	else:
		target_names = np.arange(len(np.unique(Y_DS))).astype(str).tolist()
	#end

	Conf_M = CM(y_test, Pred_Test,np.unique(Y_DS)) #calls the confusion matrix routine with the test set and prediction set

	print(classification_report(y_test, Pred_Test, target_names=target_names))  #print the performance indicators on the console

	return Gradient_Boosting_obj, Conf_M
开发者ID:RiB-,项目名称:CancerStudy,代码行数:31,代码来源:Data_Challenge_Code.py

示例2: test_mem_layout

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def test_mem_layout():
    # Test with different memory layouts of X and y
    X_ = np.asfortranarray(X)
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
    clf.fit(X_, y)
    assert_array_equal(clf.predict(T), true_result)
    assert_equal(100, len(clf.estimators_))

    X_ = np.ascontiguousarray(X)
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
    clf.fit(X_, y)
    assert_array_equal(clf.predict(T), true_result)
    assert_equal(100, len(clf.estimators_))

    y_ = np.asarray(y, dtype=np.int32)
    y_ = np.ascontiguousarray(y_)
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
    clf.fit(X, y_)
    assert_array_equal(clf.predict(T), true_result)
    assert_equal(100, len(clf.estimators_))

    y_ = np.asarray(y, dtype=np.int32)
    y_ = np.asfortranarray(y_)
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
    clf.fit(X, y_)
    assert_array_equal(clf.predict(T), true_result)
    assert_equal(100, len(clf.estimators_))
开发者ID:amueller,项目名称:scikit-learn,代码行数:29,代码来源:test_gradient_boosting.py

示例3: __init__

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
class GBClassifier:

    def __init__(self):
        """
        Inititalizes the gradient descent classifier
        """
        self.header = "#gbc"
        self.clf = None
        self.learningRate = 0.1
        self.n_estimators = 100
        self.loss = "deviance"
        self.acceptedLossValues = ["deviance", "exponential"]

    def setNumberOfEstimators(self, n_estimators):
        """
        Sets the number of estimators of Gradient Boosting Classifier
        """
        self.n_estimators = n_estimators

    def setLoss(self, loss):
        """
        Sets the loss parameter for the SGDC
        """
        try:
            if loss in self.acceptedLossValues:
                self.loss = loss
            else:
                raise ValueError("Error in input value")
        except Exception as error:
            logging.warning("Error: No such loss value:%s", loss)

    def buildModel(self):
        """
        This builds the model of the Gradient boosting Classifier
        """
        logging.info("Building Model")
        self.clf = GradientBoostingClassifier(loss=self.loss, n_estimators=self.n_estimators,
                     learning_rate = self.learningRate)
        logging.info("Finished Building Model")

    def trainGBC(self,X, Y):
        """
        Training the Gradient Boosting Classifier
        """
        self.clf.fit(X, Y)

    def validateGBC(self,X, Y):
        """
        Validate the Gradient Boosting Classifier
        """
        YPred = self.clf.predict(X)
        print accuracy_score(Y, YPred)

    def testGBC(self,X, Y):
        """
        Test the Gradient Boosting Classifier
        """
        YPred = self.clf.predict(X)
        print accuracy_score(Y, YPred)
开发者ID:USCDataScience,项目名称:NN-fileTypeDetection,代码行数:61,代码来源:gradientBoostingClassifier.py

示例4: gbc

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def gbc(train,test,train_target,test_target, lr=.1, n_est=100):
    clf = GradientBoostingClassifier(loss='deviance', learning_rate=lr, n_estimators=n_est)
    clf.fit(train, train_target)
    res = clf.predict(train)
    
    print '*************************** GBC ****************'
    print classification_report(train_target,res)
    
    res1 = clf.predict(test)
    print classification_report(test_target, res1)
    return clf
开发者ID:bemao,项目名称:Kaggle---pizza,代码行数:13,代码来源:pizza.py

示例5: test_degenerate_targets

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def test_degenerate_targets():
    """Check if we can fit even though all targets are equal. """
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1)

    # classifier should raise exception
    assert_raises(ValueError, clf.fit, X, np.ones(len(X)))

    clf = GradientBoostingRegressor(n_estimators=100, random_state=1)
    clf.fit(X, np.ones(len(X)))
    clf.predict(rng.rand(2))
    assert_array_equal(np.ones((1,), dtype=np.float64), clf.predict(rng.rand(2)))
开发者ID:Anubhav27,项目名称:scikit-learn,代码行数:13,代码来源:test_gradient_boosting.py

示例6: model_color_gboost

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def model_color_gboost(X_train, X_test, y_train, y_test):
    # Train the model
    clf = GradientBoostingClassifier(learning_rate=0.1, n_estimators=80, subsample=0.80, max_depth=4)
    clf.fit(tfidf_train, y_train)

    # Check the validity
    pred = clf.predict(tfidf_train.toarray())
    print "Accuracy on train set: ", 100*accuracy_score(pred, y_train)
    pred = clf.predict(tfidf_test.toarray())
    print "Accuracy on validation: ", 100*accuracy_score(pred, y_test)
    print confusion_matrix(y_test, pred, 
                           labels=['press-6', 'press-5', 'press-4', 'press-3', 'press-2', 'press-1'])
开发者ID:tomhettinger,项目名称:reddit_may2015,代码行数:14,代码来源:thebutton.py

示例7: predict_author

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def predict_author(arr, yazar_features, yazar_classes):
    results = []

    print "\n[DEBUG] K-NN result (neighbors: 10)"
    knn = KNeighborsClassifier(n_neighbors=10)
    knn.fit(yazar_features, yazar_classes)
    print knn.predict(arr)
    results.append(knn.predict(arr)[0])

    print "\n[DEBUG] SVC result (linear) (degree=3)"
    svc = svm.SVC(kernel='linear', degree=3)
    svc.fit(yazar_features, yazar_classes)
    print svc.predict(arr)
    results.append(svc.predict(arr)[0])

    print "\n[DEBUG] Logistic Regression result ()"
    regr = linear_model.LogisticRegression()
    regr.fit(yazar_features, yazar_classes)
    print regr.predict(arr)
    results.append(regr.predict(arr)[0])

    print "\n[DEBUG] Gaussian Naive Bayes"
    gnb = GaussianNB()
    gnb.fit(yazar_features, yazar_classes)
    print gnb.predict(arr)
    results.append(gnb.predict(arr)[0])

    print "\n[DEBUG] Decision Tree Classifier"
    dtc = tree.DecisionTreeClassifier()
    dtc.fit(yazar_features, yazar_classes)
    print dtc.predict(arr)
    results.append(dtc.predict(arr)[0])

    print "\n[DEBUG] Gradient Boosting Classification"
    gbc = GradientBoostingClassifier()
    gbc.fit(yazar_features, yazar_classes)
    print gbc.predict(arr)
    results.append(gbc.predict(arr)[0])

    # output = open('features.pkl', 'wb')
    # pickle.dump(yazar_features, output)
    # output.close()

    # output = open('classes.pkl', 'wb')
    # pickle.dump(yazar_classes, output)
    # output.close()

    # test_yazar_features = []        # for test data
    # test_yazar_classes = []         # for test classes
    # # yazar_features = []             # for train data
    # # yazar_classes = []              # for train classes

    return results
开发者ID:Searil,项目名称:kimyazmis,代码行数:55,代码来源:predictor.py

示例8: gradient_boost

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def gradient_boost(x_train, x_test, y_train, 
                                 y_test, rands = None):
    """
    Predict the lemons using a RandomForest and a random seed
    both for the number of features, as well as for the size of the
    sample to train the data on

    ARGS:

        - x_train: :class:`pandas.DataFrame` of the x_training data

        - y_train: :class:`pandas.Series` of the y_training data

        - x_test: :class:`pandas.DataFrame` of the x_testing data

        - y_test: :class:`pandas.Series` of the y_testing data

        - rands: a :class:`tuple` of the (rs, rf) to seed the sample
        and features of the BaggingClassifier.  If `None`, then
        rands are generated and provided in the return `Series`

    RETURNS:

        :class:`pandas.Series` of the f1-scores and random seeds
    """
    #create a dictionary for the return values
    ret_d = {'train-f1':[], 'test-f1':[], 'rs':[], 'rf':[]}

    #use the randoms provided if there are any, otherwise generate them
    if not rands:
        rs =  numpy.random.rand()
        rf = numpy.random.rand()
        while rf < 0.1:
            rf = numpy.random.rand()
    else:
        rs, rf = rands[0], rands[1]
    #place them into the dictionary
    ret_d['rs'], ret_d['rf'] = rs, rf
    #create and run the bagging classifier
    bc = GradientBoostingClassifier(n_estimators = 300,
                                    max_features = rf)
    bc.fit(x_train, y_train)

    y_hat_train = bc.predict(x_train)
    ret_d['train-f1'] = f1_score(y_train, y_hat_train)
    y_hat_test = bc.predict(x_test)
    ret_d['test-f1'] = f1_score(y_test, y_hat_test)
    return pandas.Series(ret_d)
开发者ID:Gehui,项目名称:group01-project03,代码行数:50,代码来源:predict_lemons.py

示例9: classify_survivors

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def classify_survivors(Y = labels, orig_test = test_data):
	X, test = featurizer()

	best_model = {'n_estimators': 20, 'learning_rate': 1.0, 'max_depth': 3}	

	gbt = GradientBoostingClassifier(subsample=0.8, min_samples_leaf=50, min_samples_split=20,
		n_estimators = 20, learning_rate = 1.0, max_depth = 3)

	ID_col = orig_test.loc[:,['PassengerId']]
	print ID_col.ix[0:10]
	gbt.fit(X,Y)
	#print test.ix[0:10]
	predicted_results = gbt.predict(test)
	predicted_results = pd.DataFrame(predicted_results)
	predicted = pd.concat( [ID_col,predicted_results], axis=1 )
	predicted = predicted.rename(columns={0 : 'Survived'})
	#predicted = predicted.drop(' ',axis=1)
	del predicted['']

	#Print some of the dataframe with predictions to test results
	print predicted.ix[0:15],'\n'
	#print X.ix[0:15]

	#Output result dataframe as csv
	predicted.to_csv('predicted_results.csv')
开发者ID:abernkopf,项目名称:Data_Science,代码行数:27,代码来源:Titanic_survival_model.py

示例10: MyGradientBoosting

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
class MyGradientBoosting(MyClassifier):
    def __init__(self):
        self.gradient_boosting = None

    def train(self, data_path='data/train.pkl', n_estimators=10, learning_rate=0.1):
        labels, instances = load_pickled_dataset(data_path)
        start_time = time.clock()
        self.gradient_boosting = GradientBoostingClassifier(loss='deviance', learning_rate=learning_rate,
                                                            n_estimators=n_estimators, subsample=0.3,
                                                            min_samples_split=2,
                                                            min_samples_leaf=1,
                                                            max_depth=3,
                                                            init=None,
                                                            random_state=None,
                                                            max_features=None,
                                                            verbose=2)
        self.gradient_boosting.fit(instances, labels)
        end_time = time.clock()
        print "STATUS: model training done. elapsed time - %d seconds" % (end_time - start_time)
        print "INFO: " + str(self.gradient_boosting)

    def predict(self, data_path='data/test.pkl'):
        labels, instances = load_pickled_dataset(data_path)
        return self.gradient_boosting.predict(instances)

    def save(self, file_path='model/gbc_model'):
        joblib.dump(self.gradient_boosting, file_path)

    def load(self, file_path='model/gbc_model'):
        self.gradient_boosting = joblib.load(file_path)

    def write_results(self, predictions):
        super(MyGradientBoosting, self).write(predictions, 'gbc_prediction.csv')
开发者ID:EDFward,项目名称:10601-playground,代码行数:35,代码来源:gradient_boosting.py

示例11: main

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def main():
    print '[INFO, time: %s] Getting Data....' % (time.strftime('%H:%M:%S'))
    testing_file = file('test.p', 'r')
    training_file = file('train.p', 'r')

    train = pickle.load(training_file)
    test = pickle.load(testing_file)

    testing_file.close()
    training_file.close()
    
    trainX = train[:,:-1]
    trainy = train[:,-1]
    
    testX = test[:,:-1]
    testy = test[:,-1]

    print '[INFO, time: %s] Fitting %s ...' % (time.strftime('%H:%M:%S'), 'GradientBoostingClassifier(n_estimators=1000)')
    clf = GradientBoostingClassifier(n_estimators=1000)
    clf.fit(trainX, trainy)

    print '[INFO, time: %s] Making Predictions...' % (time.strftime('%H:%M:%S'))
    prediction = clf.predict(testX)
    print '[RESULT, time: %s] accuracy = %f' % (time.strftime('%H:%M:%S'),accuracy_score(testy, prediction))


    model_save_file = file('gradient_1000.p', 'w')
    pickle.dump(clf, model_save_file)
    model_save_file.close()
    print 'All done'
开发者ID:sanketrahul,项目名称:cs-412_ml_course_project,代码行数:32,代码来源:gradientboosting_1000estimators.py

示例12: train_gbt

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def train_gbt(filename, color, name):
	'''Train on Gradient Boosted Trees Classifier'''
	# Read data
	data2 = pd.read_csv(filename, encoding="utf")
	X = data2.ix[:, 1:-1]
	y = data2.ix[:, -1]

	# Split into train, validation and test
	X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

	# Define model
	clf1 = GradientBoostingClassifier(learning_rate=0.05, max_depth=5, random_state=42)
	
	# Fit model
	t0 = time()
	clf1.fit(X_train, y_train)
	pred_probas = clf1.predict_proba(X_val)

	predictions = clf1.predict(X_val)
	
	print "Score", clf1.score(X_val, y_val)

	importances = clf1.feature_importances_
	indices = np.argsort(importances)[::-1]
	
	# Metrics & Plotting
	metrics[1, 0] = precision_score(y_val, predictions)
	metrics[1, 1] = recall_score(y_val, predictions)
	metrics[1, 2] = f1_score(y_val, predictions)
	metrics[1, 3] = time() - t0

	fpr_rf, tpr_rf, _ = roc_curve(y_val, predictions)
	plt.plot(fpr_rf, tpr_rf, color=color, label=name)

	return importances, indices
开发者ID:amy12xx,项目名称:TelecomChurnPrediction,代码行数:37,代码来源:explore_models.py

示例13: cv_model

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def cv_model():
    DATA_FILE  = './data/train-set-ru-b64-utf-8.txt'
    all_data = []
    target = []
    with open(DATA_FILE) as df:
        for i, line in enumerate(df):
            print i
            line = line.strip()
            parts = line.split()
            stats_collector = StatsCollector()
            #print parts[2]
            #print base64.b64decode(parts[3])#.decode('utf-8')
            #print parts[2].decode('utf-8'), parts[3].decode('utf-8'), "\n"
            stats_collector.collect(int(parts[1]), parts[3], parts[2])
            # mark page url
            all_data.append(stats_collector.get_features())
            target.append(stats_collector.get_target())
            #print all_data[-1]

    data = np.asarray(all_data, dtype = np.float)
    target = np.asarray(target, dtype = np.float)

    clf = GradientBoostingClassifier(loss='deviance', learning_rate=0.05, n_estimators=400,\
     min_samples_split=30, min_samples_leaf=15, max_depth=5)

    kf = KFold(data.shape[0], n_folds = 3, shuffle = True)

    for train_index, test_index in kf:
        X_train, X_test = data[train_index], data[test_index]
        y_train, y_test = target[train_index], target[test_index]
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        print f1_score(y_test, y_pred)
开发者ID:alex0parhomenko,项目名称:technosfera,代码行数:35,代码来源:antispam_classifier.py

示例14: fit_model

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def fit_model():
    DATA_FILE  = './data/train-set-ru-b64-utf-8.txt'
    stats_collector = StatsCollector()
    i=0
    data = []
    target = []

    with open (DATA_FILE) as df:
         for i, line in enumerate(df):
            print i
            line = line.strip()
            parts = line.split()
            stats_collector = StatsCollector()
            stats_collector.collect(int(parts[1]), parts[3], parts[2])
            data.append(stats_collector.get_features())
            target.append(stats_collector.get_target())
            #print len(data[-1])


    data = np.asarray(data, dtype = np.float)
    target = np.asarray(target, dtype = np.float)
    print data.shape, target.shape
    df.close()
    clf = GradientBoostingClassifier(loss='deviance', learning_rate=0.07, n_estimators=300, min_samples_split=30,\
         min_samples_leaf=15, max_depth=4)

    clf.fit(data, target)
    y_pred = clf.predict(data)
    print f1_score(target, y_pred)

    joblib.dump(clf, 'model/model.pkl') 
开发者ID:alex0parhomenko,项目名称:technosfera,代码行数:33,代码来源:antispam_classifier.py

示例15: main

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import predict [as 别名]
def main():
    print("gradient boosting  classifier!")

    X,Y,Xtest = importdata()
    print(Y.shape)
    param_grid={
            "n_estimators":[10,100,200,2000,20000],
            "min_samples_split":[5,10,20,50]
            }

    gb=GradientBoostingClassifier()
    Gridsearch_impl(X,Y,gb,param_grid,5)

#    for i in range(10,11,5):
#        clf = DecisionTreeClassifier(min_samples_split=i)
#        rf = RandomForestClassifier(n_estimators = 100,random_state=0,min_samples_split=i)
#        ab = AdaBoostClassifier(rf,n_estimators = 10)
        #ab = GradientBoostingClassifier(n_estimators = 100)
#        score = cross_validation.cross_val_score(ab,X,Y,cv=3)
      #  print(score)
      #  print("average score %f"%np.mean(score))
      #  print("std %f"%np.std(score))
      #  ab.fit(X,Y)
   


    Ytest = gb.predict(Xtest)
    output(Ytest,'submit3.csv')
开发者ID:kbai,项目名称:uss,代码行数:30,代码来源:gradientboost.py


注:本文中的sklearn.ensemble.GradientBoostingClassifier.predict方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。