本文整理匯總了Python中xgboost.sklearn.XGBClassifier.predict方法的典型用法代碼示例。如果您正苦於以下問題:Python XGBClassifier.predict方法的具體用法?Python XGBClassifier.predict怎麽用?Python XGBClassifier.predict使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類xgboost.sklearn.XGBClassifier
的用法示例。
在下文中一共展示了XGBClassifier.predict方法的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: train_test_split
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import predict [as 別名]
#define X y
X, y = data.loc[:,data.columns != 'state'].values, data.loc[:,data.columns == 'state'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
#ClusterCentroids
cc = ClusterCentroids(random_state=0)
os_X,os_y = cc.fit_sample(X_train,y_train)
#XGboost
clf_XG = XGBClassifier(learning_rate= 0.3, min_child_weight=1,
max_depth=6,gamma=0,subsample=1, max_delta_step=0, colsample_bytree=1,
reg_lambda=1, n_estimators=100, seed=1000, scale_pos_weight=1000)
clf_XG.fit(os_X, os_y,eval_set=[(os_X, os_y), (X_test, y_test)],eval_metric='auc',verbose=False)
evals_result = clf_XG.evals_result()
y_true, y_pred = y_test, clf_XG.predict(X_test)
#F1_score, precision, recall, specifity, G score
print "F1_score : %.4g" % metrics.f1_score(y_true, y_pred)
print "Recall : %.4g" % metrics.recall_score(y_true, y_pred)
recall = metrics.recall_score(y_true, y_pred)
print "Precision : %.4g" % metrics.precision_score(y_true, y_pred)
#Compute confusion matrix
cnf_matrix = confusion_matrix(y_test,y_pred)
np.set_printoptions(precision=2)
print "Specifity: " , float(cnf_matrix[0,0])/(cnf_matrix[0,0]+cnf_matrix[0,1])
specifity = float(cnf_matrix[0,0])/(cnf_matrix[0,0]+cnf_matrix[0,1])
print "G score: " , math.sqrt(recall/ specifity)
#Plot non-normalized confusion matrix
示例2: modelfit
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import predict [as 別名]
# reg_alpha=0.1,
# seed=27)
# modelfit(xgb1, df_train, predictors, targetname, early_stopping_rounds=50)
xgb1 = XGBClassifier(
learning_rate=0.01,
n_estimators=700,
max_depth=5,
min_child_weight=8,
gamma=0.3,
subsample=0.8,
colsample_bytree=0.8,
objective= 'binary:logistic',
scale_pos_weight=1,
seed=27)
xgb1.fit(df_train[predictors], df_train[targetname])
df_test['target'] = xgb1.predict(df_test[predictors])
df_test['target'] = df_test['target'].apply(lambda x: 'Y' if x==1 else 'N')
submission = pd.DataFrame()
submission['Loan_ID'] = df_test['Loan_ID']
submission['Loan_Status'] = df_test['target']
submission.to_csv('submission_XGB_retunned.csv', index=False)
示例3: int
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import predict [as 別名]
from sklearn.preprocessing import LabelEncoder
from xgboost.sklearn import XGBClassifier
import dataGatherer as dg
isTest = int(sys.argv[1])
if isTest == 1:
train, test, feature_train, feature_test, label_train, label_test = dg.test_data(.8)
else:
train, test, feature_train, feature_test, label_train = dg.prod_data()
f_train = pd.concat([train,feature_train], axis = 1)
f_test = pd.concat([test,feature_test], axis = 1)
xgb = XGBClassifier(max_depth=6, learning_rate=0.1, n_estimators=50, objective='multi:softprob', subsample=1.0, colsample_bytree=1, seed=0)
le = LabelEncoder()
y = le.fit_transform(label_train.values)
xgb.fit(f_train.values, y)
y_pred = xgb.predict(f_test.values)
y_pred = le.inverse_transform(y_pred)
if isTest == 1 :
y_f = y_pred == label_test.values
print("misclassified = " + str(len(y_f[y_f==False])))
print("currect class = " + str(len(y_f[y_f==True])))
print("score = " + str(len(y_f[y_f==True])/len(y_f)))
t = test[~y_f]
l = label_test[~y_f]
l_p = y_pred[~y_f]
for i in range(0, len(l)):
di.draw(t[i:i+1].values[0,], "images/prob_" + str(i) + "_" + str(l.values[i]) + "_" + str(l_p[i]) )
else :
index = list(range(1,len(y_pred)+1))
index = pd.DataFrame(index, columns = ['ImageId'])
y_pred = pd.DataFrame(y_pred, columns = ['Label'])
示例4: XGBClassifier
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import predict [as 別名]
data_x=pd.get_dummies(data.action_type,prefix="action_type")
cols=["combined_shot_type","game_event_id","period","playoffs",
"shot_type","shot_zone_area","shot_zone_basic","shot_zone_range",
"matchup","opponent","game_date","shot_distance","minutes_remaining","seconds_remaining",
"loc_x","loc_y"]
for col in cols:
data_x=pd.concat([data_x,pd.get_dummies(data[col],prefix=col),],axis=1)
train_x=data_x[-pd.isnull(data.shot_made_flag)]
test_x=data_x[pd.isnull(data.shot_made_flag)]
train_y=data.shot_made_flag[-pd.isnull(data.shot_made_flag)]
clf = XGBClassifier(max_depth=6, learning_rate=0.01, n_estimators=550,
subsample=0.5, colsample_bytree=0.5, seed=0)
clf.fit(train_x, train_y)
y_pred = clf.predict(train_x)
print("Number of mislabeled points out of a total %d points : %d" % (train_x.shape[0],(train_y != y_pred).sum()))
def logloss(act, pred):
epsilon = 1e-15
pred = sp.maximum(epsilon, pred)
pred = sp.minimum(1-epsilon, pred)
ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred)))
ll = ll * -1.0/len(act)
print(ll)
return ll
logloss(train_y,clf.predict_proba(train_x)[:,1])
test_y=clf.predict_proba(test_x)[:,1]
test_id=data[pd.isnull(data.shot_made_flag)]["shot_id"]
示例5: set_train_path
# 需要導入模塊: from xgboost.sklearn import XGBClassifier [as 別名]
# 或者: from xgboost.sklearn.XGBClassifier import predict [as 別名]
class TrollClassifier:
def set_train_path(self, path):
self.train_path = path
def pre_process(self, json, istrain):
mecab = Mecab()
data = []
for cnt, article in enumerate(json):
if cnt % 10000 == 0:
print(cnt)
text = bs(article["text"], "html.parser").text
#title_pos = ["%s_%s" % (word, pos) for word, pos in mecab.pos(article["title"])]
#author_pos = ["%s_%s" % (word, pos) for word, pos in mecab.pos(article["author"])]
text_pos = ["%s_%s" % (first, second) for first, second in mecab.pos(text)]
data.append({
#"title_pos": title_pos,
#"title_pos_sentences" : " ".join(title_pos),
#"author_pos": author_pos,
#"author_pos_sentences" : " ".join(author_pos),
"text":article["text"],
"text_pos": text_pos,
"text_pos_sentences" : " ".join(text_pos),
#"forumid": article["forumid"],
"pk": article["pk"]
})
if istrain == True:
data[cnt]["istroll"] = article["is_troll"]
data = pd.DataFrame.from_dict(data)
data = data.set_index('pk')
return data
def fit(self, json_train, n_estimators = 10, is_xgb = True):
train = self.pre_process(json_train, istrain = True)
bow_vectorizer = BagOfWordsVectorizer()
word2vec_model = Word2VecModel()
tag_counter_model = TagCounterModel()
# word2vec_model.fit(train["author_pos_sentences"], 500)
# author_features = word2vec_model.transform(train["author_pos_sentences"], "author")
# self.author_model = word2vec_model.get_model()
# bow_vectorizer.fit(train["title_pos_sentences"], 1000)
# title_features = bow_vectorizer.transform(train["title_pos_sentences"], "title")
# self.title_model = bow_vectorizer.get_vectorizer()
bow_vectorizer.fit(train["text_pos_sentences"], 1000)
text_features = bow_vectorizer.transform(train["text_pos_sentences"], "text")
self.text_model = bow_vectorizer.get_vectorizer()
# tag_features = tag_counter_model.fit_transform(train["text"])
# self.tag_model = tag_counter_model.get_col()
train = pd.concat([train, text_features], axis = 1)
#le = preprocessing.LabelEncoder()
# train["forumid"] = le.fit_transform(train["forumid"])
label = train['istroll']
train = train.drop('istroll', axis=1)
train = train.drop(['text', 'text_pos', 'text_pos_sentences'], axis=1)
print(train.columns)
train.columns = [str(x) for x in range(len(train.columns))]
if is_xgb == False:
self.model = RandomForestClassifier(n_estimators, n_jobs=-1)
else:
self.model = XGBClassifier(n_estimators = n_estimators, max_depth = 10)
print(train.shape)
self.model.fit(train, label)
def save_model(self, save_path = "predict_model"):
if not os.path.exists(save_path):
os.makedirs(save_path)
#pickle.dump(self.author_model, open("%s/author_model.p" % save_path, "wb"), protocol = pickle.HIGHEST_PROTOCOL)
#pickle.dump(self.title_model, open("%s/title_model.p" % save_path, "wb"), protocol = pickle.HIGHEST_PROTOCOL)
pickle.dump(self.text_model, open("%s/text_model.p" % save_path, "wb"), protocol = pickle.HIGHEST_PROTOCOL)
#pickle.dump(self.tag_model, open("%s/tag_model.p" % save_path,"wb"), protocol = pickle.HIGHEST_PROTOCOL)
pickle.dump(self.model, open("%s/predict_model.p" % save_path,"wb"), protocol = pickle.HIGHEST_PROTOCOL)
def load_model(self, save_path = "predict_model"):
#self.author_model = pickle.load(open("%s/author_model.p" % save_path, "rb"))
#self.title_model = pickle.load(open("%s/title_model.p" % save_path, "rb"))
self.text_model = pickle.load(open("%s/text_model.p" % save_path, "rb"))
#self.tag_model = pickle.load(open("%s/tag_model.p" % save_path, "rb"))
self.model = pickle.load(open("%s/predict_model.p" % save_path,"rb"))
#.........這裏部分代碼省略.........