Python catboost.CatBoostClassifier类代码示例

本文整理汇总了Python中catboost.CatBoostClassifier类的典型用法代码示例。如果您正苦于以下问题：Python CatBoostClassifier类的具体用法？Python CatBoostClassifier怎么用？Python CatBoostClassifier使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了CatBoostClassifier类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: train_preprocessor

def train_preprocessor(path='.', train='train.csv'):
    print('start train trash preprocessor...')
    df = pd.read_csv(os.path.join(path, train))

    train_data = df[:-100]
    validation_data = df[-100: -50]

    vectorizer = CountVectorizer()
    x_train_counts = vectorizer.fit_transform(train_data.text)
    x_validation_counts = vectorizer.transform(validation_data.text)

    model = CatBoostClassifier(iterations=250,
                               train_dir=path,
                               logging_level='Silent',
                               allow_writing_files=False
                               )

    model.fit(X=x_train_counts.toarray(),
              y=train_data.status,
              eval_set=(x_validation_counts.toarray(), validation_data.status),
              use_best_model=True,)

    model.save_model(os.path.join(path, 'trash_model'))
    joblib.dump(vectorizer,os.path.join(path, 'trash_vectorizer'))
    print('end train sentiment preprocessor...')

开发者ID:AnastasiaProk，项目名称:ws2018_forum_analyzer，代码行数:25，代码来源:trash_preprocessing.py

示例2: test_full_history

def test_full_history():
    train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    test_pool = Pool(TEST_FILE, column_description=CD_FILE)
    model = CatBoostClassifier(od_type='Iter', od_wait=20, random_seed=42, approx_on_full_history=True)
    model.fit(train_pool, eval_set=test_pool)
    model.save_model(OUTPUT_MODEL_PATH)
    return compare_canonical_models(OUTPUT_MODEL_PATH)

开发者ID:iamnik13，项目名称:catboost，代码行数:7，代码来源:test.py

示例3: test_wrong_feature_count

def test_wrong_feature_count():
    with pytest.raises(CatboostError):
        data = np.random.rand(100, 10)
        label = np.random.randint(2, size=100)
        model = CatBoostClassifier()
        model.fit(data, label)
        model.predict(data[:, :-1])

开发者ID:iamnik13，项目名称:catboost，代码行数:7，代码来源:test.py

示例4: test_raw_predict_equals_to_model_predict

def test_raw_predict_equals_to_model_predict():
    train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    test_pool = Pool(TEST_FILE, column_description=CD_FILE)
    model = CatBoostClassifier(iterations=10, random_seed=0)
    model.fit(train_pool, eval_set=test_pool)
    pred = model.predict(test_pool, prediction_type='RawFormulaVal')
    assert all(model.get_test_eval() == pred)

开发者ID:Xiaodingdangguaiguai，项目名称:catboost，代码行数:7，代码来源:test.py

示例5: test_pool_after_fit

def test_pool_after_fit():
    pool1 = Pool(TRAIN_FILE, column_description=CD_FILE)
    pool2 = Pool(TRAIN_FILE, column_description=CD_FILE)
    assert _check_data(pool1.get_features(), pool2.get_features())
    model = CatBoostClassifier(iterations=5, random_seed=0)
    model.fit(pool2)
    assert _check_data(pool1.get_features(), pool2.get_features())

开发者ID:iamnik13，项目名称:catboost，代码行数:7，代码来源:test.py

示例6: cleaning_comments

def cleaning_comments(raw_comments, path='.') -> str:
    print('start cleaning of comments...')

    raw = pd.read_csv(raw_comments)
    cleaned_comments = os.path.join(path, 'cleaned_comments.csv')
    bad_comments = os.path.join(path, 'bad_comments.csv')
    model = CatBoostClassifier().load_model(os.path.join(path, 'trash_model'))
    vectorizer = joblib.load(os.path.join(path, 'trash_vectorizer'))

    hyp = model.predict_proba(vectorizer.transform(raw.text).toarray())
    with open(cleaned_comments, 'w') as cleaned, open(bad_comments, 'w') as bad:
        bad_file = 'likes,status,text\n'
        cleaned_file = 'likes,status,text\n'
        for i in range(len(hyp)):
            if hyp[i][0] < 0.6:
                bad_file += str(raw.likes[i]) + ',1,"' + raw.text[i] + '"\n'
            else:
                cleaned_file += str(raw.likes[i]) + ',0,"' + raw.text[i] + '"\n'
        cleaned.write(cleaned_file)
        bad.write(bad_file)

    os.remove(raw_comments)

    print('end cleaning of comments...')
    return cleaned_comments

开发者ID:AnastasiaProk，项目名称:ws2018_forum_analyzer，代码行数:25，代码来源:trash_preprocessing.py