本文整理汇总了Python中catboost.CatBoostClassifier类的典型用法代码示例。如果您正苦于以下问题:Python CatBoostClassifier类的具体用法?Python CatBoostClassifier怎么用?Python CatBoostClassifier使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了CatBoostClassifier类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train_preprocessor
def train_preprocessor(path='.', train='train.csv'):
print('start train trash preprocessor...')
df = pd.read_csv(os.path.join(path, train))
train_data = df[:-100]
validation_data = df[-100: -50]
vectorizer = CountVectorizer()
x_train_counts = vectorizer.fit_transform(train_data.text)
x_validation_counts = vectorizer.transform(validation_data.text)
model = CatBoostClassifier(iterations=250,
train_dir=path,
logging_level='Silent',
allow_writing_files=False
)
model.fit(X=x_train_counts.toarray(),
y=train_data.status,
eval_set=(x_validation_counts.toarray(), validation_data.status),
use_best_model=True,)
model.save_model(os.path.join(path, 'trash_model'))
joblib.dump(vectorizer,os.path.join(path, 'trash_vectorizer'))
print('end train sentiment preprocessor...')
示例2: test_full_history
def test_full_history():
train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
test_pool = Pool(TEST_FILE, column_description=CD_FILE)
model = CatBoostClassifier(od_type='Iter', od_wait=20, random_seed=42, approx_on_full_history=True)
model.fit(train_pool, eval_set=test_pool)
model.save_model(OUTPUT_MODEL_PATH)
return compare_canonical_models(OUTPUT_MODEL_PATH)
示例3: test_wrong_feature_count
def test_wrong_feature_count():
with pytest.raises(CatboostError):
data = np.random.rand(100, 10)
label = np.random.randint(2, size=100)
model = CatBoostClassifier()
model.fit(data, label)
model.predict(data[:, :-1])
示例4: test_raw_predict_equals_to_model_predict
def test_raw_predict_equals_to_model_predict():
train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
test_pool = Pool(TEST_FILE, column_description=CD_FILE)
model = CatBoostClassifier(iterations=10, random_seed=0)
model.fit(train_pool, eval_set=test_pool)
pred = model.predict(test_pool, prediction_type='RawFormulaVal')
assert all(model.get_test_eval() == pred)
示例5: test_pool_after_fit
def test_pool_after_fit():
pool1 = Pool(TRAIN_FILE, column_description=CD_FILE)
pool2 = Pool(TRAIN_FILE, column_description=CD_FILE)
assert _check_data(pool1.get_features(), pool2.get_features())
model = CatBoostClassifier(iterations=5, random_seed=0)
model.fit(pool2)
assert _check_data(pool1.get_features(), pool2.get_features())
示例6: cleaning_comments
def cleaning_comments(raw_comments, path='.') -> str:
print('start cleaning of comments...')
raw = pd.read_csv(raw_comments)
cleaned_comments = os.path.join(path, 'cleaned_comments.csv')
bad_comments = os.path.join(path, 'bad_comments.csv')
model = CatBoostClassifier().load_model(os.path.join(path, 'trash_model'))
vectorizer = joblib.load(os.path.join(path, 'trash_vectorizer'))
hyp = model.predict_proba(vectorizer.transform(raw.text).toarray())
with open(cleaned_comments, 'w') as cleaned, open(bad_comments, 'w') as bad:
bad_file = 'likes,status,text\n'
cleaned_file = 'likes,status,text\n'
for i in range(len(hyp)):
if hyp[i][0] < 0.6:
bad_file += str(raw.likes[i]) + ',1,"' + raw.text[i] + '"\n'
else:
cleaned_file += str(raw.likes[i]) + ',0,"' + raw.text[i] + '"\n'
cleaned.write(cleaned_file)
bad.write(bad_file)
os.remove(raw_comments)
print('end cleaning of comments...')
return cleaned_comments
示例7: test_ntree_limit
def test_ntree_limit():
train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
test_pool = Pool(TEST_FILE, column_description=CD_FILE)
model = CatBoostClassifier(iterations=100, random_seed=0)
model.fit(train_pool)
pred = model.predict_proba(test_pool, ntree_end=10)
np.save(PREDS_PATH, np.array(pred))
return local_canonical_file(PREDS_PATH)
示例8: test_non_ones_weight
def test_non_ones_weight():
pool = Pool(TRAIN_FILE, column_description=CD_FILE)
weight = np.arange(1, pool.num_row()+1)
pool.set_weight(weight)
model = CatBoostClassifier(iterations=2, random_seed=0)
model.fit(pool)
model.save_model(OUTPUT_MODEL_PATH)
return compare_canonical_models(OUTPUT_MODEL_PATH)
示例9: test_zero_baseline
def test_zero_baseline():
pool = Pool(TRAIN_FILE, column_description=CD_FILE)
baseline = np.zeros(pool.num_row())
pool.set_baseline(baseline)
model = CatBoostClassifier(iterations=2, random_seed=0)
model.fit(pool)
model.save_model(OUTPUT_MODEL_PATH)
return compare_canonical_models(OUTPUT_MODEL_PATH)
示例10: test_no_cat_in_predict
def test_no_cat_in_predict():
train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
test_pool = Pool(TEST_FILE, column_description=CD_FILE)
model = CatBoostClassifier(iterations=2, random_seed=0)
model.fit(train_pool)
pred1 = model.predict(map_cat_features(test_pool.get_features(), train_pool.get_cat_feature_indices()))
pred2 = model.predict(Pool(map_cat_features(test_pool.get_features(), train_pool.get_cat_feature_indices()), cat_features=train_pool.get_cat_feature_indices()))
assert _check_data(pred1, pred2)
示例11: test_predict_class
def test_predict_class():
train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
test_pool = Pool(TEST_FILE, column_description=CD_FILE)
model = CatBoostClassifier(iterations=2, random_seed=0)
model.fit(train_pool)
pred = model.predict(test_pool, prediction_type="Class")
np.save(PREDS_PATH, np.array(pred))
return local_canonical_file(PREDS_PATH)
示例12: test_staged_predict
def test_staged_predict():
train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
test_pool = Pool(TEST_FILE, column_description=CD_FILE)
model = CatBoostClassifier(iterations=10, random_seed=0)
model.fit(train_pool)
preds = []
for pred in model.staged_predict(test_pool):
preds.append(pred)
np.save(PREDS_PATH, np.array(preds))
return local_canonical_file(PREDS_PATH)
示例13: create_model
def create_model(self, kfold_X_train, y_train, kfold_X_valid, y_test, test):
best = CatBoostClassifier(loss_function='MultiClassOneVsAll', learning_rate=0.07940735491731761, depth=8)
best.fit(kfold_X_train, y_train)
# 对验证集predict
pred = best.predict_proba(kfold_X_valid)
results = best.predict_proba(test)
return pred, results, best
示例14: test_ignored_features
def test_ignored_features():
train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
test_pool = Pool(TEST_FILE, column_description=CD_FILE)
model1 = CatBoostClassifier(iterations=5, random_seed=0, ignored_features=[1, 2, 3])
model2 = CatBoostClassifier(iterations=5, random_seed=0)
model1.fit(train_pool)
model2.fit(train_pool)
predictions1 = model1.predict(test_pool)
predictions2 = model2.predict(test_pool)
assert not _check_data(predictions1, predictions2)
model1.save_model(OUTPUT_MODEL_PATH)
return compare_canonical_models(OUTPUT_MODEL_PATH)
示例15: train_catboost_model
def train_catboost_model(df, target, cat_features, params, verbose=True):
if not isinstance(df, DataFrame):
raise Exception('DataFrame object expected, but got ' + repr(df))
print 'features:', df.columns.tolist()
cat_features_index = list(df.columns.get_loc(feature) for feature in cat_features)
print 'cat features:', cat_features_index
model = CatBoostClassifier(**params)
model.fit(df, target, cat_features=cat_features_index, verbose=verbose)
return model