当前位置: 首页>>代码示例>>Python>>正文


Python catboost.CatBoostClassifier类代码示例

本文整理汇总了Python中catboost.CatBoostClassifier的典型用法代码示例。如果您正苦于以下问题:Python CatBoostClassifier类的具体用法?Python CatBoostClassifier怎么用?Python CatBoostClassifier使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了CatBoostClassifier类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: train_preprocessor

def train_preprocessor(path='.', train='train.csv'):
    print('start train trash preprocessor...')
    df = pd.read_csv(os.path.join(path, train))

    train_data = df[:-100]
    validation_data = df[-100: -50]

    vectorizer = CountVectorizer()
    x_train_counts = vectorizer.fit_transform(train_data.text)
    x_validation_counts = vectorizer.transform(validation_data.text)

    model = CatBoostClassifier(iterations=250,
                               train_dir=path,
                               logging_level='Silent',
                               allow_writing_files=False
                               )

    model.fit(X=x_train_counts.toarray(),
              y=train_data.status,
              eval_set=(x_validation_counts.toarray(), validation_data.status),
              use_best_model=True,)

    model.save_model(os.path.join(path, 'trash_model'))
    joblib.dump(vectorizer,os.path.join(path, 'trash_vectorizer'))
    print('end train sentiment preprocessor...')
开发者ID:AnastasiaProk,项目名称:ws2018_forum_analyzer,代码行数:25,代码来源:trash_preprocessing.py

示例2: test_full_history

def test_full_history():
    train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    test_pool = Pool(TEST_FILE, column_description=CD_FILE)
    model = CatBoostClassifier(od_type='Iter', od_wait=20, random_seed=42, approx_on_full_history=True)
    model.fit(train_pool, eval_set=test_pool)
    model.save_model(OUTPUT_MODEL_PATH)
    return compare_canonical_models(OUTPUT_MODEL_PATH)
开发者ID:iamnik13,项目名称:catboost,代码行数:7,代码来源:test.py

示例3: test_wrong_feature_count

def test_wrong_feature_count():
    with pytest.raises(CatboostError):
        data = np.random.rand(100, 10)
        label = np.random.randint(2, size=100)
        model = CatBoostClassifier()
        model.fit(data, label)
        model.predict(data[:, :-1])
开发者ID:iamnik13,项目名称:catboost,代码行数:7,代码来源:test.py

示例4: test_raw_predict_equals_to_model_predict

def test_raw_predict_equals_to_model_predict():
    train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    test_pool = Pool(TEST_FILE, column_description=CD_FILE)
    model = CatBoostClassifier(iterations=10, random_seed=0)
    model.fit(train_pool, eval_set=test_pool)
    pred = model.predict(test_pool, prediction_type='RawFormulaVal')
    assert all(model.get_test_eval() == pred)
开发者ID:Xiaodingdangguaiguai,项目名称:catboost,代码行数:7,代码来源:test.py

示例5: test_pool_after_fit

def test_pool_after_fit():
    pool1 = Pool(TRAIN_FILE, column_description=CD_FILE)
    pool2 = Pool(TRAIN_FILE, column_description=CD_FILE)
    assert _check_data(pool1.get_features(), pool2.get_features())
    model = CatBoostClassifier(iterations=5, random_seed=0)
    model.fit(pool2)
    assert _check_data(pool1.get_features(), pool2.get_features())
开发者ID:iamnik13,项目名称:catboost,代码行数:7,代码来源:test.py

示例6: cleaning_comments

def cleaning_comments(raw_comments, path='.') -> str:
    print('start cleaning of comments...')

    raw = pd.read_csv(raw_comments)
    cleaned_comments = os.path.join(path, 'cleaned_comments.csv')
    bad_comments = os.path.join(path, 'bad_comments.csv')
    model = CatBoostClassifier().load_model(os.path.join(path, 'trash_model'))
    vectorizer = joblib.load(os.path.join(path, 'trash_vectorizer'))

    hyp = model.predict_proba(vectorizer.transform(raw.text).toarray())
    with open(cleaned_comments, 'w') as cleaned, open(bad_comments, 'w') as bad:
        bad_file = 'likes,status,text\n'
        cleaned_file = 'likes,status,text\n'
        for i in range(len(hyp)):
            if hyp[i][0] < 0.6:
                bad_file += str(raw.likes[i]) + ',1,"' + raw.text[i] + '"\n'
            else:
                cleaned_file += str(raw.likes[i]) + ',0,"' + raw.text[i] + '"\n'
        cleaned.write(cleaned_file)
        bad.write(bad_file)

    os.remove(raw_comments)

    print('end cleaning of comments...')
    return cleaned_comments
开发者ID:AnastasiaProk,项目名称:ws2018_forum_analyzer,代码行数:25,代码来源:trash_preprocessing.py

示例7: test_ntree_limit

def test_ntree_limit():
    train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    test_pool = Pool(TEST_FILE, column_description=CD_FILE)
    model = CatBoostClassifier(iterations=100, random_seed=0)
    model.fit(train_pool)
    pred = model.predict_proba(test_pool, ntree_end=10)
    np.save(PREDS_PATH, np.array(pred))
    return local_canonical_file(PREDS_PATH)
开发者ID:iamnik13,项目名称:catboost,代码行数:8,代码来源:test.py

示例8: test_non_ones_weight

def test_non_ones_weight():
    pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    weight = np.arange(1, pool.num_row()+1)
    pool.set_weight(weight)
    model = CatBoostClassifier(iterations=2, random_seed=0)
    model.fit(pool)
    model.save_model(OUTPUT_MODEL_PATH)
    return compare_canonical_models(OUTPUT_MODEL_PATH)
开发者ID:iamnik13,项目名称:catboost,代码行数:8,代码来源:test.py

示例9: test_zero_baseline

def test_zero_baseline():
    pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    baseline = np.zeros(pool.num_row())
    pool.set_baseline(baseline)
    model = CatBoostClassifier(iterations=2, random_seed=0)
    model.fit(pool)
    model.save_model(OUTPUT_MODEL_PATH)
    return compare_canonical_models(OUTPUT_MODEL_PATH)
开发者ID:iamnik13,项目名称:catboost,代码行数:8,代码来源:test.py

示例10: test_no_cat_in_predict

def test_no_cat_in_predict():
    train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    test_pool = Pool(TEST_FILE, column_description=CD_FILE)
    model = CatBoostClassifier(iterations=2, random_seed=0)
    model.fit(train_pool)
    pred1 = model.predict(map_cat_features(test_pool.get_features(), train_pool.get_cat_feature_indices()))
    pred2 = model.predict(Pool(map_cat_features(test_pool.get_features(), train_pool.get_cat_feature_indices()), cat_features=train_pool.get_cat_feature_indices()))
    assert _check_data(pred1, pred2)
开发者ID:iamnik13,项目名称:catboost,代码行数:8,代码来源:test.py

示例11: test_predict_class

def test_predict_class():
    train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    test_pool = Pool(TEST_FILE, column_description=CD_FILE)
    model = CatBoostClassifier(iterations=2, random_seed=0)
    model.fit(train_pool)
    pred = model.predict(test_pool, prediction_type="Class")
    np.save(PREDS_PATH, np.array(pred))
    return local_canonical_file(PREDS_PATH)
开发者ID:iamnik13,项目名称:catboost,代码行数:8,代码来源:test.py

示例12: test_staged_predict

def test_staged_predict():
    train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    test_pool = Pool(TEST_FILE, column_description=CD_FILE)
    model = CatBoostClassifier(iterations=10, random_seed=0)
    model.fit(train_pool)
    preds = []
    for pred in model.staged_predict(test_pool):
        preds.append(pred)
    np.save(PREDS_PATH, np.array(preds))
    return local_canonical_file(PREDS_PATH)
开发者ID:iamnik13,项目名称:catboost,代码行数:10,代码来源:test.py

示例13: create_model

    def create_model(self, kfold_X_train, y_train, kfold_X_valid, y_test, test):


        best = CatBoostClassifier(loss_function='MultiClassOneVsAll', learning_rate=0.07940735491731761, depth=8)
        best.fit(kfold_X_train, y_train)

        # 对验证集predict
        pred = best.predict_proba(kfold_X_valid)
        results = best.predict_proba(test)

        return pred, results, best
开发者ID:ansvver,项目名称:SOHU_competition,代码行数:11,代码来源:catboost_model.py

示例14: test_ignored_features

def test_ignored_features():
    train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    test_pool = Pool(TEST_FILE, column_description=CD_FILE)
    model1 = CatBoostClassifier(iterations=5, random_seed=0, ignored_features=[1, 2, 3])
    model2 = CatBoostClassifier(iterations=5, random_seed=0)
    model1.fit(train_pool)
    model2.fit(train_pool)
    predictions1 = model1.predict(test_pool)
    predictions2 = model2.predict(test_pool)
    assert not _check_data(predictions1, predictions2)
    model1.save_model(OUTPUT_MODEL_PATH)
    return compare_canonical_models(OUTPUT_MODEL_PATH)
开发者ID:iamnik13,项目名称:catboost,代码行数:12,代码来源:test.py

示例15: train_catboost_model

def train_catboost_model(df, target, cat_features, params, verbose=True):

    if not isinstance(df, DataFrame):
        raise Exception('DataFrame object expected, but got ' + repr(df))

    print 'features:', df.columns.tolist()

    cat_features_index = list(df.columns.get_loc(feature) for feature in cat_features)
    print 'cat features:', cat_features_index
    model = CatBoostClassifier(**params)
    model.fit(df, target, cat_features=cat_features_index, verbose=verbose)
    return model
开发者ID:bamx23,项目名称:ClickHouse,代码行数:12,代码来源:train.py


注:本文中的catboost.CatBoostClassifier类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。