Python catboost.Pool方法代码示例

本文整理汇总了Python中catboost.Pool方法的典型用法代码示例。如果您正苦于以下问题：Python catboost.Pool方法的具体用法？Python catboost.Pool怎么用？Python catboost.Pool使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类catboost的用法示例。

在下文中一共展示了catboost.Pool方法的6个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_data

# 需要导入模块: import catboost [as 别名]
# 或者: from catboost import Pool [as 别名]
def get_data(kind='array', n_rows=15, n_cols=49, fnames=None, seed=None):
    """
    Generates random data with a specified type for the purposes
    of testing grouping functionality of the wrapper.
    """

    if kind == 'none':
        return

    np.random.seed(seed)

    X = get_random_matrix(n_rows=n_rows, n_cols=n_cols)

    if kind == 'array':
        return X
    elif kind == 'sparse':
        return scipy.sparse.csr_matrix(X)
    elif kind == 'frame' or kind == 'series':
        if not fnames:
            fnames = ['feature_{}'.format(i) for i in range(X.shape[-1])]
        if kind == 'frame':
            return pd.DataFrame(data=X, columns=fnames)
        else:
            idx = np.random.choice(np.arange(X.shape[0]))
            return pd.DataFrame(data=X, columns=fnames).iloc[idx, :]
    elif kind == 'data':
        if not fnames:
            group_names = ['feature_{}'.format(i) for i in range(X.shape[-1])]
        else:
            group_names = fnames
        return DenseData(X, group_names)
    elif kind == 'catboost.Pool':
        return catboost.Pool(X)
    else:
        return 0

开发者ID:SeldonIO，项目名称:alibi，代码行数:37，代码来源:test_shap_wrappers.py

示例2: fit

# 需要导入模块: import catboost [as 别名]
# 或者: from catboost import Pool [as 别名]
def fit(self, X_train, y_train, X_val, y_val, categoricals=None):
        results = dict()

        self.all_nan = np.all(np.isnan(X_train), axis=0)
        X_train = X_train[:, ~self.all_nan]
        X_val = X_val[:, ~self.all_nan]

        X_train = np.nan_to_num(X_train)
        X_val = np.nan_to_num(X_val)

        categoricals = [ind for ind in range(X_train.shape[1]) if isinstance(X_train[0,ind], str)]

        early_stopping = 150 if X_train.shape[0]>10000 else max(round(150*10000/X_train.shape[0]), 10)

        X_train_pooled = Pool(data=X_train, label=y_train, cat_features=categoricals)
        X_val_pooled = Pool(data=X_val, label=y_val, cat_features=categoricals)

        self.model = CatBoostClassifier(**self.config)
        self.model.fit(X_train_pooled, eval_set=X_val_pooled, use_best_model=True, early_stopping_rounds=early_stopping)

        pred_train = self.model.predict_proba(X_train)
        pred_val = self.model.predict_proba(X_val)

        results["val_preds"] = pred_val.tolist()
        results["labels"] = y_val.tolist()

        try:
            pred_train = np.argmax(pred_train, axis=1)
            pred_val = np.argmax(pred_val, axis=1)
        except:
            print("==> No probabilities provided in predictions")

        results["train_acc"] = metrics.accuracy_score(y_train, pred_train)
        results["train_balanced_acc"] = metrics.balanced_accuracy_score(y_train, pred_train)
        results["val_acc"] = metrics.accuracy_score(y_val, pred_val)
        results["val_balanced_acc"] = metrics.balanced_accuracy_score(y_val, pred_val)

        return results

开发者ID:automl，项目名称:Auto-PyTorch，代码行数:40，代码来源:baselines.py

示例3: validate_predict

# 需要导入模块: import catboost [as 别名]
# 或者: from catboost import Pool [as 别名]
def validate_predict(model,X,y,X_test,n_splits=10,seed=42,model_type='lgb',verbose=0, sample_weights=sample_weights):
        
    preds = np.zeros((X.shape[0],3))
    preds_test = np.zeros((X_test.shape[0],3))
    cv_scores = []
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)
    for idx_train, idx_val in skf.split(X, y):
        print("(*) iterator")
        X_train, X_val = X[idx_train,:], X[idx_val,:]
        y_train, y_val = y[idx_train], y[idx_val]
        
        if model_type == 'lgb':
            model.fit(X_train, y_train,
                        eval_set=[(X_train, y_train), (X_val, y_val)],
                        verbose=verbose)
        elif model_type == 'cb':
            train_pool = CbPool(X_train, y_train)
            val_pool = CbPool(X_val, y_val)
            model.fit(train_pool, eval_set=val_pool)
        else:
            model.fit(X_train, y_train)
            
        if hasattr(model, 'predict_proba'):
            yhat_val = model.predict_proba(X_val)
            preds_test = preds_test + model.predict_proba(X_test)
            preds[idx_val] = yhat_val
        else:
            yhat_val = model.predict(X_val)
            preds_test = preds_test + model.predict(X_test)
            preds[idx_val] = yhat_val
        if model_type == 'cb':
            cv_scores.append(accuracy_score(y_val, np.argmax(yhat_val,axis=1)))
        else:
            cv_scores.append(accuracy_score(y_val, np.array(['agreed', 'disagreed', 'unrelated'])[np.argmax(yhat_val,axis=1)]))
    print("local cv", np.mean(cv_scores), np.std(cv_scores))
    if model_type == 'cb':
        print(f"Val accuracy: {accuracy_score(y, np.argmax(preds,axis=1), sample_weight=sample_weights):.5f}")
    else:
        print(f"Val accuracy: {accuracy_score(y, np.array(['agreed', 'disagreed', 'unrelated'])[np.argmax(preds,axis=1)], sample_weight=sample_weights):.5f}")
    preds_test /= n_splits       
    return preds, preds_test

### TRAIN - PREDICT ###

开发者ID:lampts，项目名称:wsdm19cup，代码行数:45，代码来源:train_predict_trees_batch1.py

示例4: catboost_predict_class_probabilities

# 需要导入模块: import catboost [as 别名]
# 或者: from catboost import Pool [as 别名]
def catboost_predict_class_probabilities(
    data_path: InputPath('CSV'),
    model_path: InputPath('CatBoostModel'),
    predictions_path: OutputPath(),

    label_column: int = None,
):
    '''Predict class probabilities with a CatBoost model.

    Args:
        data_path: Path for the data in CSV format.
        model_path: Path for the trained model in binary CatBoostModel format.
        label_column: Column containing the label data.
        predictions_path: Output path for the predictions.

    Outputs:
        predictions: Predictions in text format.

    Annotations:
        author: Alexey Volkov <alexey.volkov@ark-kun.com>
    '''
    import tempfile

    from catboost import CatBoost, Pool
    import numpy

    if label_column:
        column_descriptions = {label_column: 'Label'}
        column_description_path = tempfile.NamedTemporaryFile(delete=False).name
        with open(column_description_path, 'w') as column_description_file:
            for idx, kind in column_descriptions.items():
                column_description_file.write('{}\t{}\n'.format(idx, kind))
    else:
        column_description_path = None

    eval_data = Pool(
        data_path,
        column_description=column_description_path,
        has_header=True,
        delimiter=',',
    )

    model = CatBoost()
    model.load_model(model_path)

    predictions = model.predict(eval_data, prediction_type='Probability')
    numpy.savetxt(predictions_path, predictions)

开发者ID:kubeflow，项目名称:pipelines，代码行数:49，代码来源:component.py

示例5: catboost_predict_values

# 需要导入模块: import catboost [as 别名]
# 或者: from catboost import Pool [as 别名]
def catboost_predict_values(
    data_path: InputPath('CSV'),
    model_path: InputPath('CatBoostModel'),
    predictions_path: OutputPath(),

    label_column: int = None,
):
    '''Predict values with a CatBoost model.

    Args:
        data_path: Path for the data in CSV format.
        model_path: Path for the trained model in binary CatBoostModel format.
        label_column: Column containing the label data.
        predictions_path: Output path for the predictions.

    Outputs:
        predictions: Predictions in text format.

    Annotations:
        author: Alexey Volkov <alexey.volkov@ark-kun.com>
    '''
    import tempfile

    from catboost import CatBoost, Pool
    import numpy

    if label_column:
        column_descriptions = {label_column: 'Label'}
        column_description_path = tempfile.NamedTemporaryFile(delete=False).name
        with open(column_description_path, 'w') as column_description_file:
            for idx, kind in column_descriptions.items():
                column_description_file.write('{}\t{}\n'.format(idx, kind))
    else:
        column_description_path = None

    eval_data = Pool(
        data_path,
        column_description=column_description_path,
        has_header=True,
        delimiter=',',
    )

    model = CatBoost()
    model.load_model(model_path)

    predictions = model.predict(eval_data, prediction_type='RawFormulaVal')
    numpy.savetxt(predictions_path, predictions)

开发者ID:kubeflow，项目名称:pipelines，代码行数:49，代码来源:component.py

示例6: catboost_predict_classes

# 需要导入模块: import catboost [as 别名]
# 或者: from catboost import Pool [as 别名]
def catboost_predict_classes(
    data_path: InputPath('CSV'),
    model_path: InputPath('CatBoostModel'),
    predictions_path: OutputPath(),

    label_column: int = None,
):
    '''Predict classes using the CatBoost classifier model.

    Args:
        data_path: Path for the data in CSV format.
        model_path: Path for the trained model in binary CatBoostModel format.
        label_column: Column containing the label data.
        predictions_path: Output path for the predictions.

    Outputs:
        predictions: Class predictions in text format.

    Annotations:
        author: Alexey Volkov <alexey.volkov@ark-kun.com>
    '''
    import tempfile

    from catboost import CatBoostClassifier, Pool
    import numpy

    if label_column:
        column_descriptions = {label_column: 'Label'}
        column_description_path = tempfile.NamedTemporaryFile(delete=False).name
        with open(column_description_path, 'w') as column_description_file:
            for idx, kind in column_descriptions.items():
                column_description_file.write('{}\t{}\n'.format(idx, kind))
    else:
        column_description_path = None

    eval_data = Pool(
        data_path,
        column_description=column_description_path,
        has_header=True,
        delimiter=',',
    )

    model = CatBoostClassifier()
    model.load_model(model_path)

    predictions = model.predict(eval_data)
    numpy.savetxt(predictions_path, predictions, fmt='%s')

开发者ID:kubeflow，项目名称:pipelines，代码行数:49，代码来源:component.py

注：本文中的catboost.Pool方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。