当前位置: 首页>>代码示例>>Python>>正文


Python GradientBoostingClassifier.transform方法代码示例

本文整理汇总了Python中sklearn.ensemble.GradientBoostingClassifier.transform方法的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingClassifier.transform方法的具体用法?Python GradientBoostingClassifier.transform怎么用?Python GradientBoostingClassifier.transform使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.GradientBoostingClassifier的用法示例。


在下文中一共展示了GradientBoostingClassifier.transform方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: gbdt_feature

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import transform [as 别名]
def gbdt_feature(X,y,Z):
    
    from sklearn.ensemble import GradientBoostingClassifier
    gbdt = GradientBoostingClassifier(n_estimators=200,max_depth=5)
    gbdt.fit(X, y)  # 训练。喝杯咖啡吧
    
    f = gbdt.transform(Z, threshold=None)
    ff = pd.DataFrame(f)
    ff.columns = ['f0','f1','f2','f3','f4','f5','f6','f7','f8','f9','f10',
        'f11','f12','f13','f14','f15','f16','f17','f18','f19','f20','f21']   
        
    feature_all = read_user('data\\test\\feature_all_15.csv')
    feature_all1 = pd.merge(feature_all,ff,how='outer',left_index=True,right_index=True)
    feature_all1.set_index('enrollment_id').to_csv('data\\test\\feature_all_16.csv')    
开发者ID:TythonLee,项目名称:kddcup-2015,代码行数:16,代码来源:main.py

示例2: len

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import transform [as 别名]
    plt.ylabel('Loss')
    plt.legend(loc="best")
    plt.savefig("loss.png")
    plt.close()

    # tree ensambles
    #print clf.estimator_.feature_importances_
    #print clf.toarray().shape
    # >(26049, 100)
    # input_features = 26049, weak_learners = 100
    #print len(one_hot.toarray()[:,0]), one_hot.toarray()[:,0]
    #print len(one_hot.toarray()[0,:]), one_hot.toarray()[0,:]

    # get test data from train trees
    transformated_train_features = clf.one_hot_encoding.toarray()
    transformated_test_features = clf.transform(X_test, y_test)
    #print transformated_train_features.shape, X_train.shape
    #print transformated_test_features.shape, X_test.shape
    
    out_fname = "%s/%s" % (os.environ["HOME"],
                           "data/gbdt/encoding_tree_cv.pkl.gz")
    with gzip.open(out_fname, "wb") as gf:
        cPickle.dump([[transformated_train_features, y_train], [transformated_test_features, y_test]],
                     gf,
                     cPickle.HIGHEST_PROTOCOL)
    print clf.best_params

    # 3. Logistic Regression using transformated features
    # determine C by train data
    parameters = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
                  'penalty' : ["l1","l2"]}
开发者ID:Quasi-quant2010,项目名称:gbdt,代码行数:33,代码来源:feature_transformation_ver3.py

示例3: gbdt_plus_liner_classifier_grid_search

# 需要导入模块: from sklearn.ensemble import GradientBoostingClassifier [as 别名]
# 或者: from sklearn.ensemble.GradientBoostingClassifier import transform [as 别名]

#.........这里部分代码省略.........

        # tree ensambles
        score_threshold=0.8
        index2feature = dict(zip(np.arange(len(X_train.columns.values)), X_train.columns.values))
        feature_importances_index = [str(j) for j in clf.estimator_.feature_importances_.argsort()[::-1]]
        feature_importances_score = [clf.estimator_.feature_importances_[int(j)] for j in feature_importances_index]
        fis = pd.DataFrame(
            {'name':[index2feature.get(int(key),'Null') for key in feature_importances_index],
             'score':feature_importances_score}
            )
        score_threshold = fis['score'][fis['score'] > 0.0].quantile(score_threshold)
        # where_str = 'score > %f & score > %f' % (score_threshold, 0.0)
        where_str = 'score >= %f' % (score_threshold)
        fis = fis.query(where_str)
        sns.barplot(x = 'score', y = 'name',
                    data = fis,
                    ax=ax3,
                    color="blue")
        ax3.set_xlabel("Feature_Importance", fontsize=10)
        plt.tight_layout()
        plt.savefig(graph_fname)
        plt.close()

        #print clf.toarray().shape
        # >(26049, 100)
        #input_features = 26049, weak_learners = 100
        #print len(one_hot.toarray()[:,0]), one_hot.toarray()[:,0]
        #print len(one_hot.toarray()[0,:]), one_hot.toarray()[0,:]

        ## feature transformation : get test data from train trees
        #print transformated_train_features.shape, X_train.shape
        #print transformated_test_features.shape, X_test.shape

        transformated_train_features = clf.one_hot_encoding
        if type(X_test) == pd.core.frame.DataFrame:
            transformated_test_features = clf.transform(X_test.as_matrix().astype(np.float32), 
                                                        y_test)
        elif type(X_train) == np.ndarray:
            transformated_test_features = clf.transform(X_test, y_test)

        #model_folder = stack_setting_['1-Level']['gbdt_linear']['upper']['gbdt']['folder']
        #model_train_fname = stack_setting_['1-Level']['gbdt_linear']['upper']['gbdt']['train']
        #model_train_fname = os.path.join(Config.get_string('data.path'), 
        #                                 model_folder, 
        #                                 model_train_fname)
        with gzip.open(model_train_fname, "wb") as gf:
            cPickle.dump([transformated_train_features, y_train], 
                         gf,
                         cPickle.HIGHEST_PROTOCOL)

        #model_folder = stack_setting_['1-Level']['gbdt_linear']['upper']['gbdt']['folder']
        #model_test_fname = stack_setting_['1-Level']['gbdt_linear']['upper']['gbdt']['test']
        #model_test_fname = os.path.join(Config.get_string('data.path'), 
        #                                model_folder, 
        #                                model_test_fname)
        with gzip.open(model_test_fname, "wb") as gf:
            cPickle.dump([transformated_test_features, y_test],
                         gf,
                         cPickle.HIGHEST_PROTOCOL)


    """
    # 2. lower model
    if lower_param_keys is None:
        lower_param_keys = ['model_type', 'n_neighbors', 'weights',
                            'algorithm', 'leaf_size', 'metric', 'p', 'n_jobs']
开发者ID:Quasi-quant2010,项目名称:Stacking,代码行数:70,代码来源:run_gbdt_plus_liner_classifier_grid_search.20160414.py


注:本文中的sklearn.ensemble.GradientBoostingClassifier.transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。