Python XGBClassifier.predict方法代碼示例

本文整理匯總了Python中xgboost.sklearn.XGBClassifier.predict方法的典型用法代碼示例。如果您正苦於以下問題：Python XGBClassifier.predict方法的具體用法？Python XGBClassifier.predict怎麽用？Python XGBClassifier.predict使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類xgboost.sklearn.XGBClassifier的用法示例。

在下文中一共展示了XGBClassifier.predict方法的5個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: train_test_split

# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import predict [as 別名]
#define X y
X, y = data.loc[:,data.columns != 'state'].values, data.loc[:,data.columns == 'state'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

#ClusterCentroids
cc = ClusterCentroids(random_state=0)
os_X,os_y = cc.fit_sample(X_train,y_train)

#XGboost
clf_XG = XGBClassifier(learning_rate= 0.3, min_child_weight=1,
                       max_depth=6,gamma=0,subsample=1, max_delta_step=0, colsample_bytree=1,
                       reg_lambda=1, n_estimators=100, seed=1000, scale_pos_weight=1000)  
clf_XG.fit(os_X, os_y,eval_set=[(os_X, os_y), (X_test, y_test)],eval_metric='auc',verbose=False)  
evals_result = clf_XG.evals_result()  
y_true, y_pred = y_test, clf_XG.predict(X_test)  

#F1_score, precision, recall, specifity, G score
print "F1_score : %.4g" % metrics.f1_score(y_true, y_pred)  
print "Recall : %.4g" % metrics.recall_score(y_true, y_pred)
recall = metrics.recall_score(y_true, y_pred)  
print "Precision : %.4g" % metrics.precision_score(y_true, y_pred)
 
#Compute confusion matrix
cnf_matrix = confusion_matrix(y_test,y_pred)
np.set_printoptions(precision=2)
print "Specifity: " , float(cnf_matrix[0,0])/(cnf_matrix[0,0]+cnf_matrix[0,1])
specifity = float(cnf_matrix[0,0])/(cnf_matrix[0,0]+cnf_matrix[0,1]) 
print "G score: " , math.sqrt(recall/ specifity) 

#Plot non-normalized confusion matrix

開發者ID:non27，項目名稱:The-final-assignment，代碼行數:32，代碼來源:XGboost+ClusterCentroids.py

示例2: modelfit

# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import predict [as 別名]
#  reg_alpha=0.1,
#  seed=27)
# modelfit(xgb1, df_train, predictors, targetname, early_stopping_rounds=50)


xgb1 = XGBClassifier(
 learning_rate=0.01,
 n_estimators=700,
 max_depth=5,
 min_child_weight=8,
 gamma=0.3,
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 scale_pos_weight=1,
 seed=27)



xgb1.fit(df_train[predictors], df_train[targetname])
df_test['target'] = xgb1.predict(df_test[predictors])




df_test['target'] = df_test['target'].apply(lambda x: 'Y' if x==1 else 'N')

submission = pd.DataFrame()
submission['Loan_ID'] = df_test['Loan_ID']
submission['Loan_Status'] = df_test['target']
submission.to_csv('submission_XGB_retunned.csv', index=False)

開發者ID:Paliking，項目名稱:ML_examples，代碼行數:33，代碼來源:LoanPrediction2_XGB.py

示例3: int

# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import predict [as 別名]
from sklearn.preprocessing import LabelEncoder
from xgboost.sklearn import XGBClassifier
import dataGatherer as dg
isTest = int(sys.argv[1])
if isTest == 1:
	train, test, feature_train, feature_test, label_train, label_test = dg.test_data(.8)
else:
	train, test, feature_train, feature_test, label_train = dg.prod_data()
f_train = pd.concat([train,feature_train], axis = 1)
f_test = pd.concat([test,feature_test], axis = 1)
xgb = XGBClassifier(max_depth=6, learning_rate=0.1, n_estimators=50, objective='multi:softprob', subsample=1.0, colsample_bytree=1, seed=0)
le = LabelEncoder()
y = le.fit_transform(label_train.values)

xgb.fit(f_train.values, y)
y_pred = xgb.predict(f_test.values)
y_pred = le.inverse_transform(y_pred)
if isTest == 1 :
	y_f = y_pred == label_test.values
	print("misclassified = " + str(len(y_f[y_f==False])))
	print("currect class = " + str(len(y_f[y_f==True])))
	print("score = " + str(len(y_f[y_f==True])/len(y_f)))
	t = test[~y_f]
	l = label_test[~y_f]
	l_p = y_pred[~y_f]
	for i in range(0, len(l)):
		di.draw(t[i:i+1].values[0,], "images/prob_" + str(i) + "_" + str(l.values[i]) + "_" + str(l_p[i]) )
else :
	index = list(range(1,len(y_pred)+1))
	index = pd.DataFrame(index, columns = ['ImageId'])
	y_pred = pd.DataFrame(y_pred, columns = ['Label'])

開發者ID:uniqueuser，項目名稱:kaggle，代碼行數:33，代碼來源:model_xgb.py

示例4: XGBClassifier

# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import predict [as 別名]
data_x=pd.get_dummies(data.action_type,prefix="action_type")
cols=["combined_shot_type","game_event_id","period","playoffs",
      "shot_type","shot_zone_area","shot_zone_basic","shot_zone_range",
      "matchup","opponent","game_date","shot_distance","minutes_remaining","seconds_remaining",
      "loc_x","loc_y"]
for col in cols:
    data_x=pd.concat([data_x,pd.get_dummies(data[col],prefix=col),],axis=1)
train_x=data_x[-pd.isnull(data.shot_made_flag)]
test_x=data_x[pd.isnull(data.shot_made_flag)]
train_y=data.shot_made_flag[-pd.isnull(data.shot_made_flag)]

clf = XGBClassifier(max_depth=6, learning_rate=0.01, n_estimators=550,
                     subsample=0.5, colsample_bytree=0.5, seed=0)
clf.fit(train_x, train_y)
y_pred = clf.predict(train_x)
print("Number of mislabeled points out of a total %d points : %d"  % (train_x.shape[0],(train_y != y_pred).sum()))

def logloss(act, pred):
    epsilon = 1e-15
    pred = sp.maximum(epsilon, pred)
    pred = sp.minimum(1-epsilon, pred)
    ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred)))
    ll = ll * -1.0/len(act)
    print(ll)
    return ll
    
logloss(train_y,clf.predict_proba(train_x)[:,1])

test_y=clf.predict_proba(test_x)[:,1]
test_id=data[pd.isnull(data.shot_made_flag)]["shot_id"]

開發者ID:Helen-n，項目名稱:kaggle，代碼行數:32，代碼來源:xgboost.py

示例5: set_train_path

# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import predict [as 別名]
class TrollClassifier:
    def set_train_path(self, path):
        self.train_path = path

    def pre_process(self, json, istrain):
        mecab = Mecab()

        data = []

        for cnt, article in enumerate(json):
            if cnt % 10000 == 0:
                print(cnt)
                
            text = bs(article["text"], "html.parser").text
            #title_pos = ["%s_%s" % (word, pos) for word, pos in mecab.pos(article["title"])]
            #author_pos = ["%s_%s" % (word, pos) for word, pos in mecab.pos(article["author"])]
            text_pos = ["%s_%s" % (first, second) for first, second in mecab.pos(text)]

            data.append({
                #"title_pos": title_pos,
                #"title_pos_sentences" : " ".join(title_pos),
                #"author_pos": author_pos,
                #"author_pos_sentences" : " ".join(author_pos),
                "text":article["text"],
                "text_pos": text_pos,
                "text_pos_sentences" : " ".join(text_pos),
                #"forumid": article["forumid"],                    
                "pk": article["pk"]
            })

            if istrain == True:
                data[cnt]["istroll"] = article["is_troll"]

        data = pd.DataFrame.from_dict(data)
        data = data.set_index('pk')

        return data

    def fit(self, json_train, n_estimators = 10, is_xgb = True):

        train = self.pre_process(json_train, istrain = True)
        
        bow_vectorizer = BagOfWordsVectorizer()
        word2vec_model = Word2VecModel()
        tag_counter_model = TagCounterModel()

        # word2vec_model.fit(train["author_pos_sentences"], 500)
        # author_features = word2vec_model.transform(train["author_pos_sentences"], "author")
        # self.author_model = word2vec_model.get_model()

#        bow_vectorizer.fit(train["title_pos_sentences"], 1000)
#        title_features = bow_vectorizer.transform(train["title_pos_sentences"], "title")
#        self.title_model = bow_vectorizer.get_vectorizer()

        bow_vectorizer.fit(train["text_pos_sentences"], 1000)
        text_features = bow_vectorizer.transform(train["text_pos_sentences"], "text")
        self.text_model = bow_vectorizer.get_vectorizer()

#        tag_features = tag_counter_model.fit_transform(train["text"])
#        self.tag_model = tag_counter_model.get_col()

        train = pd.concat([train, text_features], axis = 1)

        #le = preprocessing.LabelEncoder()

        # train["forumid"] = le.fit_transform(train["forumid"])
        
        label = train['istroll']
        train = train.drop('istroll', axis=1)
        train = train.drop(['text', 'text_pos', 'text_pos_sentences'], axis=1)
        
        print(train.columns)

        train.columns = [str(x) for x in range(len(train.columns))]
        
        if is_xgb == False:
            self.model = RandomForestClassifier(n_estimators, n_jobs=-1)
        else:
            self.model = XGBClassifier(n_estimators = n_estimators, max_depth = 10)

        print(train.shape)
        self.model.fit(train, label)

    def save_model(self, save_path = "predict_model"):

        if not os.path.exists(save_path):
            os.makedirs(save_path)

        #pickle.dump(self.author_model, open("%s/author_model.p" % save_path, "wb"), protocol = pickle.HIGHEST_PROTOCOL)
        #pickle.dump(self.title_model, open("%s/title_model.p" % save_path, "wb"), protocol = pickle.HIGHEST_PROTOCOL)
        pickle.dump(self.text_model, open("%s/text_model.p" % save_path, "wb"), protocol = pickle.HIGHEST_PROTOCOL)
        #pickle.dump(self.tag_model, open("%s/tag_model.p" % save_path,"wb"), protocol = pickle.HIGHEST_PROTOCOL)
        pickle.dump(self.model, open("%s/predict_model.p" % save_path,"wb"), protocol = pickle.HIGHEST_PROTOCOL)

    def load_model(self, save_path = "predict_model"):
        #self.author_model = pickle.load(open("%s/author_model.p" % save_path, "rb"))
        #self.title_model = pickle.load(open("%s/title_model.p" % save_path, "rb"))
        self.text_model = pickle.load(open("%s/text_model.p" % save_path, "rb"))
        #self.tag_model = pickle.load(open("%s/tag_model.p" % save_path, "rb"))
        self.model = pickle.load(open("%s/predict_model.p" % save_path,"rb"))
#.........這裏部分代碼省略.........

開發者ID:MacLunch，項目名稱:MacLunch，代碼行數:103，代碼來源:Ilwar.py

注：本文中的xgboost.sklearn.XGBClassifier.predict方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。