当前位置: 首页>>代码示例>>Python>>正文


Python xgb_classifier.xgb_classifier函数代码示例

本文整理汇总了Python中xgb_classifier.xgb_classifier函数的典型用法代码示例。如果您正苦于以下问题:Python xgb_classifier函数的具体用法?Python xgb_classifier怎么用?Python xgb_classifier使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了xgb_classifier函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: kfold_cv

def kfold_cv(X_train, y_train,idx,k):

    kf = StratifiedKFold(y_train,n_folds=k)
    xx=[]
    count=0
    for train_index, test_index in kf:
        count+=1
        X_train_cv, X_test_cv = X_train[train_index,:],X_train[test_index,:]
        gc.collect()
        y_train_cv, y_test_cv = y_train[train_index],y_train[test_index]
        y_pred=np.zeros(X_test_cv.shape[0])
        m=0
         
        for j in range(m):
            clf=xgb_classifier(eta=0.05,min_child_weight=20,col=0.5,subsample=0.7,depth=7,num_round=400,seed=j*77,gamma=0.1)
            y_pred+=clf.train_predict(X_train_cv,(y_train_cv),X_test_cv,y_test=(y_test_cv))
            yqq=y_pred*(1.0/(j+1))

            print j,llfun(y_test_cv,yqq)

        #y_pred/=m;
        clf=XGBClassifier(max_depth=10,colsample_bytree=0.8,learning_rate=0.02,n_estimators=500,nthread=-1)
        #clf=RandomForestClassifier(n_jobs=-1,n_estimators=100,max_depth=100)
        clf.fit(X_train_cv,(y_train_cv),eval_metric="logloss",eval_set=[(X_test_cv, y_test_cv)])
        y_pred=clf.predict_proba(X_test_cv).T[1]
        print y_pred.shape
        xx.append(llfun(y_test_cv,(y_pred)))
        ypred=y_pred
        yreal=y_test_cv
        idx=idx[test_index]
        print xx[-1]#,y_pred.shape
        break

    print xx,'average:',np.mean(xx),'std',np.std(xx)
    return ypred,yreal,idx#np.mean(xx)
开发者ID:daxiongshu,项目名称:bnp,代码行数:35,代码来源:ada7.py

示例2: xgb_meta_predict

def xgb_meta_predict(data_base_dir,data_meta_random_dir,submission_dir):

    test_id=pickle.load(open(data_base_dir+"test_id.p","rb"))
    y_meta=pickle.load(open(data_meta_random_dir+"y_meta.p","rb"))
    
   
    X_numerical_random=pickle.load(open(data_meta_random_dir+"X_numerical_meta.p","rb"))
    X_test_numerical=pickle.load(open(data_base_dir+"X_test_numerical.p","rb"))
    
    
    X_random_rf=pickle.load(open(data_meta_random_dir+ "X_meta_random_rf.p", "rb" ) )
    X_test_rf=pickle.load(open(data_meta_random_dir+ "X_test_meta_rf.p", "rb" ) )
    
    X_random_svc=pickle.load(open(data_meta_random_dir+ "X_meta_random_svc.p", "rb" ) )
    X_test_svc=pickle.load(open(data_meta_random_dir+ "X_test_meta_svc.p", "rb" ) )
    
  
    
    # private LB  0.0054101
    xgb_clf=xgb_classifier(eta=0.2,min_child_weight=1,depth=10,num_round=70,threads=16) 
    X_xgb_predict = xgb_clf.train_predict_all_labels(np.hstack([X_random_rf,X_random_svc,X_numerical_random]), y_meta,np.hstack([ X_test_rf,X_test_svc,X_test_numerical]),predict_y14=True)
    save_predictions(submission_dir+'xgb-random-d10-e0.2-min1-tree70.csv.gz', test_id , X_xgb_predict)
    
    # private LB 0.0053053
    xgb_clf=xgb_classifier(eta=0.2,min_child_weight=6,depth=12,num_round=80,threads=16) 
    X_xgb_predict = xgb_clf.train_predict_all_labels(np.hstack([X_random_rf,X_random_svc,X_numerical_random]), y_meta,np.hstack([X_test_rf,X_test_svc,X_test_numerical]),predict_y14=True)
    save_predictions(submission_dir+'xgb-random-d12-e0.2-min6-tree80.csv.gz', test_id , X_xgb_predict)
    
    # private LB  0.0052910
    xgb_clf=xgb_classifier(eta=0.09,min_child_weight=6,depth=25,num_round=100,threads=16) 
    X_xgb_predict = xgb_clf.train_predict_all_labels(np.hstack([X_random_rf,X_random_svc,X_numerical_random]), y_meta,np.hstack([X_test_rf,X_test_svc,X_test_numerical]),predict_y14=True)
    save_predictions(submission_dir+'xgb-random-d25-svc-e0.09-min6-tree100.csv.gz', test_id , X_xgb_predict)
开发者ID:Kapetis,项目名称:kaggle-tradeshift-winning-solution,代码行数:32,代码来源:xgb_meta_random_split_predict.py

示例3: kfold_cv

def kfold_cv(X_train, y_train,idx,k):

    kf = StratifiedKFold(y_train,n_folds=k)
    xx=[]
    count=0
    for train_index, test_index in kf:
        count+=1
        X_train_cv, X_test_cv = X_train[train_index,:],X_train[test_index,:]
        gc.collect()
        y_train_cv, y_test_cv = y_train[train_index],y_train[test_index]
        y_pred=np.zeros(X_test_cv.shape[0])
        m=0
         
        for j in range(m):
            clf=xgb_classifier(eta=0.1,min_child_weight=20,col=0.5,subsample=0.7,depth=5,num_round=200,seed=j*77,gamma=0.1)
            y_pred+=clf.train_predict(X_train_cv,(y_train_cv),X_test_cv,y_test=(y_test_cv))
        #y_pred/=m;
        clf=ExtraTreesClassifier(n_estimators=700,max_features= 50,criterion= 'entropy',min_samples_split= 3,
                            max_depth= 60, min_samples_leaf= 4,verbose=1,n_jobs=-1)
        #clf=RandomForestClassifier(n_jobs=-1,n_estimators=100,max_depth=100)
        clf.fit(X_train_cv,(y_train_cv))
        y_pred=clf.predict_proba(X_test_cv).T[1]
        print y_pred.shape
        xx.append(llfun(y_test_cv,(y_pred)))
        ypred=y_pred
        yreal=y_test_cv
        idx=idx[test_index]
        print xx[-1]#,y_pred.shape
        break

    print xx,'average:',np.mean(xx),'std',np.std(xx)
    return ypred,yreal,idx#np.mean(xx)
开发者ID:daxiongshu,项目名称:bnp,代码行数:32,代码来源:ex2.py

示例4: kfold_cv

def kfold_cv(X_train, y_train,idx,k):

    kf = StratifiedKFold(y_train,n_folds=k)
    xx=[]
    count=0
    for train_index, test_index in kf:
        count+=1
        X_train_cv, X_test_cv = X_train[train_index,:],X_train[test_index,:]
        gc.collect()
        y_train_cv, y_test_cv = y_train[train_index],y_train[test_index]
        y_pred=np.zeros(X_test_cv.shape[0])
        m=1
         
        for j in range(m):
            clf=xgb_classifier(eta=0.1,min_child_weight=20,col=0.7,subsample=1,depth=10,num_round=50,seed=j*77,gamma=0.1)
            y_pred+=clf.train_predict(X_train_cv,(y_train_cv),X_test_cv,y_test=(y_test_cv))
        y_pred/=m;
        print y_pred.shape
        xx.append(llfun(y_test_cv,(y_pred)))
        #ypred=y_pred
        #yreal=y_test_cv
        #idx=idx[test_index]
        print xx[-1]#,y_pred.shape
        #break

    print xx,'average:',np.mean(xx),'std',np.std(xx)
开发者ID:daxiongshu,项目名称:bnp,代码行数:26,代码来源:rf2.py

示例5: kfold_cv

def kfold_cv(X_train, y_train,idx,k):

    kf = StratifiedKFold(y_train,n_folds=k)
    xx=[]
    count=0
    ypred=np.zeros(X_train.shape[0])
    for train_index, test_index in kf:
        count+=1
        X_train_cv, X_test_cv = X_train[train_index,:],X_train[test_index,:]
        gc.collect()
        y_train_cv, y_test_cv = y_train[train_index],y_train[test_index]
        y_pred=np.zeros(X_test_cv.shape[0])
        m=1
         
        for j in range(m):
            clf=xgb_classifier(eta=0.01,min_child_weight=10,col=0.7,subsample=0.68,depth=5,num_round=500,seed=j*77,gamma=0)

            y_pred+=clf.train_predict(X_train_cv,(y_train_cv),X_test_cv,y_test=(y_test_cv))
            yqq=y_pred/(1+j)
            print j,llfun(y_test_cv,yqq)
        y_pred/=m;
        #clf=RandomForestClassifier(n_jobs=-1,n_estimators=100,max_depth=100)
        #clf.fit(X_train_cv,(y_train_cv))
        #y_pred=clf.predict_proba(X_test_cv).T[1]
        print y_pred.shape
        xx.append(llfun(y_test_cv,(y_pred)))
        ypred[test_index]=y_pred
        print xx[-1]#,y_pred.shape

    print xx,'average:',np.mean(xx),'std',np.std(xx)
    return ypred
开发者ID:daxiongshu,项目名称:bnp,代码行数:31,代码来源:getf.py

示例6: xgb_meta_predict

def xgb_meta_predict(data_base_dir,data_meta_part1_dir,submission_dir):
    test_id=pickle.load(open(data_base_dir+"test_id.p","rb"))
    y_all=pickle.load(open(data_base_dir+"y.p","rb"))
    y_part1=y_all[:y_all.shape[0]/2,:]
    
    X_numerical=pickle.load(open(data_base_dir+"X_numerical.p","rb"))
    X_numerical_part1=X_numerical[:X_numerical.shape[0]/2,:]
    X_test_numerical=pickle.load(open(data_base_dir+"X_test_numerical.p","rb"))
    
    X_part1_xgb=pickle.load(open(data_meta_part1_dir+ "X_meta_part1_xgb.p", "rb" ) )
    X_test_xgb =pickle.load(open(data_meta_part1_dir+ "X_test_meta_xgb_all.p", "rb" ) )
    
    X_part1_rf=pickle.load(open(data_meta_part1_dir+ "X_meta_part1_rf.p", "rb" ) )
    X_test_rf=pickle.load(open(data_meta_part1_dir+ "X_test_meta_rf.p", "rb" ) )
    
    X_part1_sgd=pickle.load(open(data_meta_part1_dir+ "X_meta_part1_sgd.p", "rb" ) )
    X_test_sgd=pickle.load(open(data_meta_part1_dir+ "X_test_meta_sgd.p", "rb" ) )
    
    X_part1_best_online=pickle.load(open(data_meta_part1_dir+ "X_meta_part1_online.p", "rb" ) )
    X_test_best_online=pickle.load(open(data_meta_part1_dir+ "X_test_meta_online.p", "rb" ) )
    X_test_online_ensemble=pickle.load(open(data_meta_part1_dir+ "X_test_meta_online_ensemble.p", "rb" ) )
    
    
    # best single model submitted, private LB 0.0044595, X_test_meta 
    xgb_clf=xgb_classifier(eta=0.09,min_child_weight=6,depth=18,num_round=120,threads=16) 
    X_xgb_predict = xgb_clf.train_predict_all_labels(np.hstack([X_part1_best_online,X_part1_rf,X_part1_sgd,X_part1_xgb,X_numerical_part1]), y_part1,np.hstack([X_test_online_ensemble, X_test_rf,X_test_sgd,X_test_xgb,X_test_numerical]),predict_y14=True)
    #save_predictions(submission_dir+'xgb-part1-d18-e0.09-min6-tree120-xgb_base.csv.gz', test_id , X_xgb_predict)
    
    # best single model (not submitted by itself), private LB 0.0044591, not submitted alone
    xgb_clf=xgb_classifier(eta=0.07,min_child_weight=6,depth=20,num_round=150,threads=16) 
    X_xgb_predict = xgb_clf.train_predict_all_labels(np.hstack([X_part1_best_online,X_part1_rf,X_part1_sgd,X_part1_xgb,X_numerical_part1]), y_part1,np.hstack([X_test_online_ensemble, X_test_rf,X_test_sgd,X_test_xgb,X_test_numerical]),predict_y14=True)
    #save_predictions(submission_dir+'xgb-part1-d20-e0.07-min6-tree150-xgb_base.csv.gz', test_id , X_xgb_predict)
    
    # private LB 0.0047360 correct! try "boosting from existing predictions"
    xgb_clf=xgb_classifier(eta=0.07,min_child_weight=6,depth=20,num_round=20,threads=16,exist_prediction=True,exist_num_round=150) 
    X_xgb_predict = xgb_clf.train_predict_all_labels(np.hstack([X_part1_best_online,X_part1_rf,X_part1_sgd,X_numerical_part1]), y_part1,np.hstack([X_test_best_online, X_test_rf,X_test_sgd,X_test_numerical]),predict_y14=True)
    #save_predictions(submission_dir+'xgb-part1-d20-e0.07-min6-tree20-extree-150.csv.gz', test_id , X_xgb_predict)
    
    # private LB 0.0047103, 
    xgb_clf=xgb_classifier(eta=0.09,min_child_weight=6,depth=18,num_round=1,threads=16,exist_prediction=True,exist_num_round=120) 
    X_xgb_predict = xgb_clf.train_predict_all_labels(np.hstack([X_part1_best_online,X_part1_rf,X_part1_sgd,X_numerical_part1]), y_part1,np.hstack([X_test_online_ensemble, X_test_rf,X_test_sgd,X_test_numerical]),predict_y14=True)
   # save_predictions(submission_dir+'xgb-part1-d18-e0.09-min6-tree1-extree-120.csv.gz', test_id , X_xgb_predict)
    
    # private LB 0.0047000, using ensembled online predictions as meta feature for test sets!
    xgb_clf=xgb_classifier(eta=0.07,min_child_weight=6,depth=20,num_round=150,threads=16) 
    X_xgb_predict = xgb_clf.train_predict_all_labels(np.hstack([X_part1_best_online,X_part1_rf,X_part1_sgd,X_numerical_part1]), y_part1,np.hstack([X_test_online_ensemble, X_test_rf,X_test_sgd,X_test_numerical]),predict_y14=True)
    #save_predictions(submission_dir+'xgb-part1-d20-e0.07-min6-tree150.csv.gz', test_id , X_xgb_predict)
    
    # private LB 0.0047313, correct!
    xgb_clf=xgb_classifier(eta=0.07,min_child_weight=6,depth=19,num_round=150,threads=16) 
    X_xgb_predict = xgb_clf.train_predict_all_labels(np.hstack([X_part1_best_online,X_part1_rf,X_part1_sgd,X_numerical_part1]), y_part1,np.hstack([X_test_best_online, X_test_rf,X_test_sgd,X_test_numerical]),predict_y14=True)
    #save_predictions(submission_dir+'xgb-part1-d19-e0.07-min6-tree150.csv.gz', test_id , X_xgb_predict)
    
    # private LB 0.0047446, correct!
    xgb_clf=xgb_classifier(eta=0.09,min_child_weight=6,depth=18,num_round=120,threads=16) 
    X_xgb_predict = xgb_clf.train_predict_all_labels(np.hstack([X_part1_best_online,X_part1_rf,X_part1_sgd,X_numerical_part1]), y_part1,np.hstack([X_test_best_online, X_test_rf,X_test_sgd,X_test_numerical]),predict_y14=True)
   # save_predictions(submission_dir+'xgb-part1-d18-e0.09-min6-tree120.csv.gz', test_id , X_xgb_predict)
    
    
    
开发者ID:sduprey,项目名称:PYTHON_WEB,代码行数:57,代码来源:xgb_meta_part1_predict.py

示例7: xgb_meta_predict

def xgb_meta_predict(data_base_dir,data_meta_part1_dir,submission_dir):
    test_id=pickle.load(open(data_base_dir+"test_id.p","rb"))
    y_all=pickle.load(open(data_base_dir+"y.p","rb"))
    X_all=pickle.load(open(data_base_dir+"X_all.p","rb"))
    X_test=pickle.load(open(data_base_dir+"X_test_all.p","rb"))
    y_part1=y_all[:y_all.shape[0]/2,:]
    
    
    xgb_clf=xgb_classifier(eta=0.07,min_child_weight=6,depth=20,num_round=150,threads=16) 
    X_xgb_predict = xgb_clf.train_predict_all_labels(X_all, y_all,X_test,predict_y14=True)
    save_predictions(submission_dir+'xgb-raw-d20-e0.07-min6-tree150.csv.gz', test_id , X_xgb_predict)
    
    xgb_clf=xgb_classifier(eta=0.1,min_child_weight=7,depth=100,num_round=150,threads=16) 
    X_xgb_predict = xgb_clf.train_predict_label(X_all, y_all,X_test,label=33) # predict label 33 only
    save_predictions(submission_dir+'xgb-y33-d100-e0.1-min7-tree150.csv.gz', test_id , X_xgb_predict)
    
    
    X_part1_best_online=pickle.load(open(data_meta_part1_dir+ "X_meta_part1_online.p", "rb" ) )
    X_test_best_online=pickle.load(open(data_meta_part1_dir+ "X_test_meta_online.p", "rb" ) )
    
    X_numerical=pickle.load(open(data_base_dir+"X_numerical.p","rb"))
    X_numerical_part1=X_numerical[:X_numerical.shape[0]/2,:]
    X_test_numerical=pickle.load(open(data_base_dir+"X_test_numerical.p","rb"))
    
    X_part1_xgb=pickle.load(open(data_meta_part1_dir+ "X_meta_part1_xgb.p", "rb" ) )
    X_test_xgb =pickle.load(open(data_meta_part1_dir+ "X_test_meta_xgb_all.p", "rb" ) )
    
    X_part1_rf=pickle.load(open(data_meta_part1_dir+ "X_meta_part1_rf.p", "rb" ) )
    X_test_rf=pickle.load(open(data_meta_part1_dir+ "X_test_meta_rf.p", "rb" ) )
    
    X_part1_sgd=pickle.load(open(data_meta_part1_dir+ "X_meta_part1_sgd.p", "rb" ) )
    X_test_sgd=pickle.load(open(data_meta_part1_dir+ "X_test_meta_sgd.p", "rb" ) )
    
    X_sparse=pickle.load(open(data_base_dir+"X_sparse.p","rb"))
    X_test_sparse=pickle.load(open(data_base_dir+"X_test_sparse.p","rb"))
    X_sparse_part1=X_sparse[:X_sparse.shape[0]/2,:]
    
    X=sparse.csr_matrix(sparse.hstack((X_sparse_part1,sparse.coo_matrix(np.hstack  ([X_part1_best_online,X_part1_rf,X_part1_sgd,X_part1_xgb,X_numerical_part1]).astype(float)))))
    Xt=sparse.csr_matrix(sparse.hstack((X_test_sparse,sparse.coo_matrix(np.hstack  ([X_test_best_online,X_test_rf,X_test_sgd,X_test_xgb,X_test_numerical]).astype(float)))))
    xgb_clf=xgb_classifier(eta=0.1,min_child_weight=6,depth=30,num_round=80,threads=16)
    X_xgb_predict = xgb_clf.train_predict_label(X, y_part1,Xt,label=33) # predict label 33 only
    save_predictions(submission_dir+'xgb-y33-d30-e0.1-min6-tree80-all-sparse.csv.gz', test_id , X_xgb_predict)
开发者ID:Kapetis,项目名称:kaggle-tradeshift-winning-solution,代码行数:42,代码来源:other_model.py

示例8: train_predict

def train_predict(X,y,Xt,yt=[],c=1):
    if c==1:
        clf=xgb_classifier(num_round=60,eta=0.1,min_child_weight=5,depth=7, subsample=1,col=1)
        return clf.train_predict(X,y,Xt,yt)
    if c==2:
        clf=RandomForestRegressor(n_estimators=200,n_jobs=-1,max_depth=13,min_samples_split=4,min_samples_leaf=9, max_leaf_nodes= 1100)
        clf.fit(X,y)
        return clf.predict(Xt)    
    if c==3:
        clf=RankSVM()
        clf.fit(X,y)
        return clf.predict(Xt)
开发者ID:daxiongshu,项目名称:Coupon-Purchase-Prediction,代码行数:12,代码来源:per-customer.py

示例9: train_predict

def train_predict(X,y,Xt,yt=[],c=1):
    if c=='xgb':
        clf=xgb_classifier(num_round=200,eta=0.1,min_child_weight=2,depth=20, subsample=1,col=0.6)
        return clf.train_predict(X,y,Xt,yt)
    if c=='rf':
        clf=RandomForestClassifier(n_estimators=200,n_jobs=-1,max_depth=13,min_samples_split=4,min_samples_leaf=9, max_leaf_nodes= 1100)
        clf.fit(X,y)
        return clf.predict_proba(Xt).T[1]    
    if c=='rf1':
        clf=RandomForestClassifier(n_estimators=1000,n_jobs=-1)
        clf.fit(X,y)
        return clf.predict_proba(Xt).T[1]
开发者ID:daxiongshu,项目名称:Dato-Sponsored-Page-Prediction,代码行数:12,代码来源:xgb2x.py

示例10: train_predict

def train_predict(X,y,Xt,yt=[],c=1):
    if c==1:
        #clf=xgb_classifier(num_round=45,eta=0.1,min_child_weight=5,depth=10, subsample=0.5,col=1) 
        clf=xgb_classifier(num_round=45,eta=0.1,min_child_weight=20,depth=20, subsample=0.1,col=0.7)
	#clf=xgb_classifier(num_round=300,eta=0.01,min_child_weight=20,depth=8, subsample=0.1,col=0.7)
        return clf.train_predict(X,y,Xt,yt)
    elif c==2:
	clf = LDA()
	clf.fit(X,y)
	preds = clf.predict_proba(Xt)[:,1]
	return preds
    elif c==3:
        clf = LogisticRegression()
        clf.fit(X,y)
        preds = clf.predict_proba(Xt)[:,1]
        return preds
开发者ID:daxiongshu,项目名称:Grasp-and-Lift,代码行数:16,代码来源:stack1_3_new_cv.py

示例11: kfold_cv

def kfold_cv(X_train, y_train, k):

    kf = StratifiedKFold(y_train, n_folds=k)

    xx = []
    zz = []
    ypred = np.zeros((y_train.shape[0], 3))
    for train_index, test_index in kf:

        X_train_cv, X_test_cv = X_train[train_index, :], X_train[test_index, :]
        y_train_cv, y_test_cv = y_train[train_index], y_train[test_index]
        clf = xgb_classifier(eta=0.1, col=0.4, min_child_weight=10, depth=6, num_round=50)  # good!
        y_pred = clf.multi(X_train_cv, y_train_cv, X_test_cv, 3, y_test=y_test_cv)
        xx.append(multiclass_log_loss(y_test_cv, y_pred))
        print xx[-1]  # ,y_pred.shape,zz[-1]
        ypred[test_index] = y_pred
    print "average:", np.mean(xx), "std", np.std(xx)
    return ypred, np.mean(xx)
开发者ID:daxiongshu,项目名称:network,代码行数:18,代码来源:mycv_0.616482.py

示例12: train_predict

def train_predict(X,y,Xt,yt=[],c=1):
    if c==1:
        #clf=xgb_classifier(num_round=45,eta=0.1,min_child_weight=5,depth=10, subsample=0.5,col=1) 
        #clf=xgb_classifier(num_round=55,eta=0.1,min_child_weight=20,depth=20, subsample=0.1,col=0.7)
	clf=xgb_classifier(num_round=500,eta=0.01,min_child_weight=20,depth=10, subsample=0.1,col=0.7)
	#clf=xgb_classifier(num_round=500,eta=0.01,min_child_weight=20,depth=10, subsample=0.1,col=0.7) # First digit touch - 0.966262479533 #BothStartLoadPhase-0.969428966329
	#clf=xgb_classifier(num_round=500,eta=0.01,min_child_weight=20,depth=10, subsample=0.1,col=0.7)  # HandStart - 0.930538668081
        return clf.train_predict(X,y,Xt,yt)
    elif c==2:
	clf = LDA()
	clf.fit(X,y)
	preds = clf.predict_proba(Xt)[:,1]
	return preds
    elif c==3:
        clf = LogisticRegression()
        clf.fit(X,y)
        preds = clf.predict_proba(Xt)[:,1]
        return preds
开发者ID:daxiongshu,项目名称:Grasp-and-Lift,代码行数:18,代码来源:stack1_3_30_new_cv.py

示例13: kfold_cv

def kfold_cv(X_train, y_train,k):


    kf = StratifiedKFold(y_train,n_folds=k)

    xx=[]
    zz=[]
    ypred=np.zeros((y_train.shape[0],3))
    for train_index, test_index in kf:

        X_train_cv, X_test_cv = X_train[train_index,:],X_train[test_index,:]
        y_train_cv, y_test_cv = y_train[train_index],y_train[test_index]
        clf=xgb_classifier(eta=0.1,gamma=1e-3,col=0.35,min_child_weight=0.5,depth=7,num_round=160)
        y_pred=clf.multi(X_train_cv,y_train_cv,X_test_cv,3,y_test=y_test_cv)
        xx.append(multiclass_log_loss(y_test_cv,y_pred))
        print xx[-1]#,y_pred.shape,zz[-1]
        ypred[test_index]=y_pred
    print xx
    print 'average:',np.mean(xx),'std',np.std(xx)
    return ypred,np.mean(xx)
开发者ID:daxiongshu,项目名称:network,代码行数:20,代码来源:mycv_0.509873.py

示例14: kfold_cv

def kfold_cv(X_train, y_train,k):


    kf = StratifiedKFold(y_train,n_folds=k)

    xx=[]
    zz=[]
    ypred=np.zeros((y_train.shape[0],3))
    for train_index, test_index in kf:

        X_train_cv, X_test_cv = X_train[train_index,:],X_train[test_index,:]
        y_train_cv, y_test_cv = y_train[train_index],y_train[test_index]
        #clf=RandomForestClassifier(n_jobs=-1,max_depth=21,max_features=30,n_estimators=100)
        #clf.fit(X_train_cv,y_train_cv)
        #y_pred=clf.predict_proba(X_test_cv)
        clf=xgb_classifier(eta=0.25,col=0.4,min_child_weight=1,depth=6,num_round=70)
        y_pred=clf.multi(X_train_cv,y_train_cv,X_test_cv,3,y_test=y_test_cv)
        xx.append(multiclass_log_loss(y_test_cv,y_pred))
        print xx[-1]#,y_pred.shape,zz[-1]
        ypred[test_index]=y_pred
    print xx
    print 'average:',np.mean(xx),'std',np.std(xx)
    return ypred,np.mean(xx)
开发者ID:daxiongshu,项目名称:network,代码行数:23,代码来源:mycv_0.515721.py

示例15: Memory

from sklearn.externals.joblib import Memory
from sklearn.datasets import load_svmlight_file
mem = Memory("./mycache")

@mem.cache
def get_data(path):
    data = load_svmlight_file(path)
    return data[0], data[1]
idname='ID'
labelname='target'
train=pd.read_csv('train_clean1.csv',index_col=idname)

y=np.array(train[labelname]).astype(float)
train.drop([labelname],inplace=True,axis=1)
X=train.as_matrix()
del train


train=pd.read_csv('test_clean1.csv',index_col=idname)

Xt=train.as_matrix()

idx=np.array(train.index)
del train

clf=xgb_classifier(eta=0.1,min_child_weight=20,col=0.7,subsample=1,depth=10,num_round=50,seed=0,gamma=0.1)
yp=clf.train_predict(X,y,Xt)
s=pd.DataFrame({idname:idx,'PredictedProb':yp})
s.to_csv('xgb1.csv',index=False)
开发者ID:daxiongshu,项目名称:bnp,代码行数:29,代码来源:xgb1.py


注:本文中的xgb_classifier.xgb_classifier函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。