本文整理汇总了Python中catboost.Pool方法的典型用法代码示例。如果您正苦于以下问题:Python catboost.Pool方法的具体用法?Python catboost.Pool怎么用?Python catboost.Pool使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类catboost
的用法示例。
在下文中一共展示了catboost.Pool方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_data
# 需要导入模块: import catboost [as 别名]
# 或者: from catboost import Pool [as 别名]
def get_data(kind='array', n_rows=15, n_cols=49, fnames=None, seed=None):
"""
Generates random data with a specified type for the purposes
of testing grouping functionality of the wrapper.
"""
if kind == 'none':
return
np.random.seed(seed)
X = get_random_matrix(n_rows=n_rows, n_cols=n_cols)
if kind == 'array':
return X
elif kind == 'sparse':
return scipy.sparse.csr_matrix(X)
elif kind == 'frame' or kind == 'series':
if not fnames:
fnames = ['feature_{}'.format(i) for i in range(X.shape[-1])]
if kind == 'frame':
return pd.DataFrame(data=X, columns=fnames)
else:
idx = np.random.choice(np.arange(X.shape[0]))
return pd.DataFrame(data=X, columns=fnames).iloc[idx, :]
elif kind == 'data':
if not fnames:
group_names = ['feature_{}'.format(i) for i in range(X.shape[-1])]
else:
group_names = fnames
return DenseData(X, group_names)
elif kind == 'catboost.Pool':
return catboost.Pool(X)
else:
return 0
示例2: fit
# 需要导入模块: import catboost [as 别名]
# 或者: from catboost import Pool [as 别名]
def fit(self, X_train, y_train, X_val, y_val, categoricals=None):
results = dict()
self.all_nan = np.all(np.isnan(X_train), axis=0)
X_train = X_train[:, ~self.all_nan]
X_val = X_val[:, ~self.all_nan]
X_train = np.nan_to_num(X_train)
X_val = np.nan_to_num(X_val)
categoricals = [ind for ind in range(X_train.shape[1]) if isinstance(X_train[0,ind], str)]
early_stopping = 150 if X_train.shape[0]>10000 else max(round(150*10000/X_train.shape[0]), 10)
X_train_pooled = Pool(data=X_train, label=y_train, cat_features=categoricals)
X_val_pooled = Pool(data=X_val, label=y_val, cat_features=categoricals)
self.model = CatBoostClassifier(**self.config)
self.model.fit(X_train_pooled, eval_set=X_val_pooled, use_best_model=True, early_stopping_rounds=early_stopping)
pred_train = self.model.predict_proba(X_train)
pred_val = self.model.predict_proba(X_val)
results["val_preds"] = pred_val.tolist()
results["labels"] = y_val.tolist()
try:
pred_train = np.argmax(pred_train, axis=1)
pred_val = np.argmax(pred_val, axis=1)
except:
print("==> No probabilities provided in predictions")
results["train_acc"] = metrics.accuracy_score(y_train, pred_train)
results["train_balanced_acc"] = metrics.balanced_accuracy_score(y_train, pred_train)
results["val_acc"] = metrics.accuracy_score(y_val, pred_val)
results["val_balanced_acc"] = metrics.balanced_accuracy_score(y_val, pred_val)
return results
示例3: validate_predict
# 需要导入模块: import catboost [as 别名]
# 或者: from catboost import Pool [as 别名]
def validate_predict(model,X,y,X_test,n_splits=10,seed=42,model_type='lgb',verbose=0, sample_weights=sample_weights):
preds = np.zeros((X.shape[0],3))
preds_test = np.zeros((X_test.shape[0],3))
cv_scores = []
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)
for idx_train, idx_val in skf.split(X, y):
print("(*) iterator")
X_train, X_val = X[idx_train,:], X[idx_val,:]
y_train, y_val = y[idx_train], y[idx_val]
if model_type == 'lgb':
model.fit(X_train, y_train,
eval_set=[(X_train, y_train), (X_val, y_val)],
verbose=verbose)
elif model_type == 'cb':
train_pool = CbPool(X_train, y_train)
val_pool = CbPool(X_val, y_val)
model.fit(train_pool, eval_set=val_pool)
else:
model.fit(X_train, y_train)
if hasattr(model, 'predict_proba'):
yhat_val = model.predict_proba(X_val)
preds_test = preds_test + model.predict_proba(X_test)
preds[idx_val] = yhat_val
else:
yhat_val = model.predict(X_val)
preds_test = preds_test + model.predict(X_test)
preds[idx_val] = yhat_val
if model_type == 'cb':
cv_scores.append(accuracy_score(y_val, np.argmax(yhat_val,axis=1)))
else:
cv_scores.append(accuracy_score(y_val, np.array(['agreed', 'disagreed', 'unrelated'])[np.argmax(yhat_val,axis=1)]))
print("local cv", np.mean(cv_scores), np.std(cv_scores))
if model_type == 'cb':
print(f"Val accuracy: {accuracy_score(y, np.argmax(preds,axis=1), sample_weight=sample_weights):.5f}")
else:
print(f"Val accuracy: {accuracy_score(y, np.array(['agreed', 'disagreed', 'unrelated'])[np.argmax(preds,axis=1)], sample_weight=sample_weights):.5f}")
preds_test /= n_splits
return preds, preds_test
### TRAIN - PREDICT ###
示例4: catboost_predict_class_probabilities
# 需要导入模块: import catboost [as 别名]
# 或者: from catboost import Pool [as 别名]
def catboost_predict_class_probabilities(
data_path: InputPath('CSV'),
model_path: InputPath('CatBoostModel'),
predictions_path: OutputPath(),
label_column: int = None,
):
'''Predict class probabilities with a CatBoost model.
Args:
data_path: Path for the data in CSV format.
model_path: Path for the trained model in binary CatBoostModel format.
label_column: Column containing the label data.
predictions_path: Output path for the predictions.
Outputs:
predictions: Predictions in text format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
import tempfile
from catboost import CatBoost, Pool
import numpy
if label_column:
column_descriptions = {label_column: 'Label'}
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
with open(column_description_path, 'w') as column_description_file:
for idx, kind in column_descriptions.items():
column_description_file.write('{}\t{}\n'.format(idx, kind))
else:
column_description_path = None
eval_data = Pool(
data_path,
column_description=column_description_path,
has_header=True,
delimiter=',',
)
model = CatBoost()
model.load_model(model_path)
predictions = model.predict(eval_data, prediction_type='Probability')
numpy.savetxt(predictions_path, predictions)
示例5: catboost_predict_values
# 需要导入模块: import catboost [as 别名]
# 或者: from catboost import Pool [as 别名]
def catboost_predict_values(
data_path: InputPath('CSV'),
model_path: InputPath('CatBoostModel'),
predictions_path: OutputPath(),
label_column: int = None,
):
'''Predict values with a CatBoost model.
Args:
data_path: Path for the data in CSV format.
model_path: Path for the trained model in binary CatBoostModel format.
label_column: Column containing the label data.
predictions_path: Output path for the predictions.
Outputs:
predictions: Predictions in text format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
import tempfile
from catboost import CatBoost, Pool
import numpy
if label_column:
column_descriptions = {label_column: 'Label'}
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
with open(column_description_path, 'w') as column_description_file:
for idx, kind in column_descriptions.items():
column_description_file.write('{}\t{}\n'.format(idx, kind))
else:
column_description_path = None
eval_data = Pool(
data_path,
column_description=column_description_path,
has_header=True,
delimiter=',',
)
model = CatBoost()
model.load_model(model_path)
predictions = model.predict(eval_data, prediction_type='RawFormulaVal')
numpy.savetxt(predictions_path, predictions)
示例6: catboost_predict_classes
# 需要导入模块: import catboost [as 别名]
# 或者: from catboost import Pool [as 别名]
def catboost_predict_classes(
data_path: InputPath('CSV'),
model_path: InputPath('CatBoostModel'),
predictions_path: OutputPath(),
label_column: int = None,
):
'''Predict classes using the CatBoost classifier model.
Args:
data_path: Path for the data in CSV format.
model_path: Path for the trained model in binary CatBoostModel format.
label_column: Column containing the label data.
predictions_path: Output path for the predictions.
Outputs:
predictions: Class predictions in text format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
import tempfile
from catboost import CatBoostClassifier, Pool
import numpy
if label_column:
column_descriptions = {label_column: 'Label'}
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
with open(column_description_path, 'w') as column_description_file:
for idx, kind in column_descriptions.items():
column_description_file.write('{}\t{}\n'.format(idx, kind))
else:
column_description_path = None
eval_data = Pool(
data_path,
column_description=column_description_path,
has_header=True,
delimiter=',',
)
model = CatBoostClassifier()
model.load_model(model_path)
predictions = model.predict(eval_data)
numpy.savetxt(predictions_path, predictions, fmt='%s')