当前位置: 首页>>代码示例>>Python>>正文


Python xgboost.DMatrix方法代码示例

本文整理汇总了Python中xgboost.DMatrix方法的典型用法代码示例。如果您正苦于以下问题:Python xgboost.DMatrix方法的具体用法?Python xgboost.DMatrix怎么用?Python xgboost.DMatrix使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在xgboost的用法示例。


在下文中一共展示了xgboost.DMatrix方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: fit

# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def fit(self):
        """
        Gets data and preprocess by prepare_data() function
        Trains with the selected parameters from grid search and saves the model
        """
        data = self.get_input()
        df_train, df_test = self.prepare_data(data)
        xtr, ytr = df_train.drop(['Value'], axis=1), df_train['Value'].values

        xgbtrain = xgb.DMatrix(xtr, ytr)
        reg_cv = self.grid_search(xtr, ytr)
        param = reg_cv.best_params_
        bst = xgb.train(dtrain=xgbtrain, params=param)

        # save model to file
        mlflow.sklearn.save_model(bst, "model")
        return df_test 
开发者ID:produvia,项目名称:ai-platform,代码行数:19,代码来源:runner.py

示例2: fit

# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def fit(self, X, y, X_valid, y_valid):
        logger.info('XGBoost, train data shape        {}'.format(X.shape))
        logger.info('XGBoost, validation data shape   {}'.format(X_valid.shape))
        logger.info('XGBoost, train labels shape      {}'.format(y.shape))
        logger.info('XGBoost, validation labels shape {}'.format(y_valid.shape))

        train = xgb.DMatrix(data=X,
                            label=y,
                            **self.dmatrix_parameters)
        valid = xgb.DMatrix(data=X_valid,
                            label=y_valid,
                            **self.dmatrix_parameters)
        self.estimator = xgb.train(params=self.booster_parameters,
                                   dtrain=train,
                                   evals=[(train, 'train'), (valid, 'valid')],
                                   **self.training_parameters)
        return self 
开发者ID:minerva-ml,项目名称:steppy-toolkit,代码行数:19,代码来源:models.py

示例3: get_libsvm_dmatrix

# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def get_libsvm_dmatrix(files_path, is_pipe=False):
    """Get DMatrix from libsvm file path.

    Pipe mode not currently supported for libsvm.

    :param files_path: File path where LIBSVM formatted training data resides, either directory or file
    :param is_pipe: Boolean to indicate if data is being read in pipe mode
    :return: xgb.DMatrix
    """
    if is_pipe:
        raise exc.UserError("Pipe mode not supported for LibSVM.")

    try:
        dmatrix = xgb.DMatrix(files_path)
    except Exception as e:
        raise exc.UserError("Failed to load libsvm data with exception:\n{}".format(e))

    return dmatrix 
开发者ID:aws,项目名称:sagemaker-xgboost-container,代码行数:20,代码来源:data_utils.py

示例4: _get_parquet_dmatrix_file_mode

# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def _get_parquet_dmatrix_file_mode(files_path):
    """Get Data Matrix from parquet data in file mode.

    :param files_path: File path where parquet formatted training data resides, either directory or file
    :return: xgb.DMatrix
    """
    try:
        table = pq.read_table(files_path)

        data = table.to_pandas()
        del table

        if type(data) is pd.DataFrame:
            # pyarrow.Table.to_pandas may produce NumPy array or pandas DataFrame
            data = data.to_numpy()

        dmatrix = xgb.DMatrix(data[:, 1:], label=data[:, 0])
        del data

        return dmatrix

    except Exception as e:
        raise exc.UserError("Failed to load parquet data with exception:\n{}".format(e)) 
开发者ID:aws,项目名称:sagemaker-xgboost-container,代码行数:25,代码来源:data_utils.py

示例5: libsvm_to_dmatrix

# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def libsvm_to_dmatrix(string_like):  # type: (bytes) -> xgb.DMatrix
    """Convert a LIBSVM string representation to a DMatrix object.
    Args:
        string_like (bytes): LIBSVM string.
    Returns:
        (xgb.DMatrix): XGBoost DataMatrix
    """
    temp_file_location = None
    try:
        with tempfile.NamedTemporaryFile(delete=False) as libsvm_file:
            temp_file_location = libsvm_file.name
            libsvm_file.write(string_like)

        dmatrix = xgb.DMatrix(temp_file_location)
    finally:
        if temp_file_location and os.path.exists(temp_file_location):
            os.remove(temp_file_location)

    return dmatrix 
开发者ID:aws,项目名称:sagemaker-xgboost-container,代码行数:21,代码来源:encoder.py

示例6: execute

# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def execute(cls, ctx, op):
        from xgboost import DMatrix

        raw_data = data = ctx[op.data.key]
        if isinstance(data, tuple):
            data = ToDMatrix.get_xgb_dmatrix(data)
        else:
            data = DMatrix(data)
        result = op.model.predict(data)

        if isinstance(op.outputs[0], DATAFRAME_CHUNK_TYPE):
            result = pd.DataFrame(result, index=raw_data.index)
        elif isinstance(op.outputs[0], SERIES_CHUNK_TYPE):
            result = pd.Series(result, index=raw_data.index, name='predictions')

        ctx[op.outputs[0].key] = result 
开发者ID:mars-project,项目名称:mars,代码行数:18,代码来源:predict.py

示例7: predict

# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def predict(self, df_test):
        """
         Makes prediction for the next 7 days electricity consumption.
        """
        # load model from file
        loaded_model = mlflow.sklearn.load_model("model")
        # make predictions for test data
        xts, yts = df_test.drop(['Value'], axis=1), df_test['Value'].values
        p = loaded_model.predict(xgb.DMatrix(xts))
        prediction = pd.DataFrame({'Prediction': p})

        mape, rmse, mae, r2 = ForecastRunner.evaluation_metrics(yts, p)
        print('MAPE: {}'.format(mape))
        print('RMSE: {}'.format(rmse))
        print('R2: {}'.format(r2))
        print('MAE: {}'.format(mae))
        mlflow.log_metric("MAPE", mape)
        mlflow.log_metric("RMSE", rmse)
        mlflow.log_metric("R2", r2)
        mlflow.log_metric("MAE", mae)
        ForecastRunner.plot_result(yts, p)
        self.save_output(df_test, prediction) 
开发者ID:produvia,项目名称:ai-platform,代码行数:24,代码来源:runner.py

示例8: train_xgb

# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def train_xgb(train_features, train_y, valid_features, valid_y, *,
              eta, num_boost_round):
    train_data = xgb.DMatrix(train_features, label=train_y)
    valid_data = xgb.DMatrix(valid_features, label=valid_y)
    params = {
        'eta': eta,
        'objective': 'binary:logistic',
        'gamma': 0.01,
        'max_depth': 8,
    }
    print(params)
    eval_list = [(valid_data, 'eval')]
    return xgb.train(
        params, train_data, num_boost_round, eval_list,
        early_stopping_rounds=20,
        verbose_eval=10,
    ) 
开发者ID:lopuhin,项目名称:kaggle-kuzushiji-2019,代码行数:19,代码来源:level2.py

示例9: test_local_csv_transform

# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def test_local_csv_transform(self):
    """Test transfrom from local csv files."""

    cmd = ['python ' + os.path.join(CODE_PATH, 'transform.py'),
           '--csv=' + self.csv_input_filepath,
           '--analysis=' + self.analysis_dir,
           '--prefix=features',
           '--output=' + self.output_dir]
    print('cmd ', ' '.join(cmd))
    subprocess.check_call(' '.join(cmd), shell=True)

    # Verify transformed file.
    libsvm_filepath = os.path.join(self.output_dir, 'features-00000-of-00001.libsvm')
    dtrain = xgb.DMatrix(libsvm_filepath)
    self.assertTrue(2056, dtrain.num_col())
    self.assertTrue(3, dtrain.num_row())

    # Verify featuremap file.
    featuremap_filepath = os.path.join(self.output_dir, 'featuremap-00000-of-00001.txt')
    df = pd.read_csv(featuremap_filepath, names=['index', 'description'])
    pd.util.testing.assert_series_equal(pd.Series(range(1, 2056), name='index'), df['index'])
    expected_descriptions = ['cat_col=Sunday', 'cat_col=Monday', 'img_col image feature 1000',
                             'num_col', 'text_col has "blue"']
    self.assertTrue(all(x in df['description'].values for x in expected_descriptions)) 
开发者ID:googledatalab,项目名称:pydatalab,代码行数:26,代码来源:test_transform.py

示例10: fit

# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def fit(self, X, y, x_val=None, y_val=None):

        dtrain = xgb.DMatrix(X, label=y)
        if x_val is not None:
            dtest = xgb.DMatrix(x_val, label=y_val)
            watchlist = [(dtrain, 'train'), (dtest, 'validation')]
            self.clf = xgb.train(params=self.params,
                                 dtrain=dtrain,
                                 num_boost_round=self.num_round,
                                 early_stopping_rounds=self.early_stopping_rounds,
                                 evals=watchlist,
                                 verbose_eval=self.verbose)
        else:
            self.clf = xgb.train(params=self.params,
                                 dtrain=dtrain,
                                 num_boost_round=self.num_round,
                                 early_stopping_rounds=self.early_stopping_rounds)
        return 
开发者ID:mpearmain,项目名称:gestalt,代码行数:20,代码来源:wrap_xgb.py

示例11: setUpClass

# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def setUpClass(self):
        if not _HAS_XGBOOST:
            return
        if not _HAS_SKLEARN:
            return

        # Load data and train model
        scikit_data = load_boston()
        self.X = scikit_data.data.astype("f").astype("d")
        self.dtrain = xgboost.DMatrix(
            scikit_data.data,
            label=scikit_data.target,
            feature_names=scikit_data.feature_names,
        )
        self.feature_names = scikit_data.feature_names
        self.output_name = "target" 
开发者ID:apple,项目名称:coremltools,代码行数:18,代码来源:test_boosted_trees_regression_numeric.py

示例12: setUpClass

# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        if not _HAS_XGBOOST:
            return
        if not _HAS_SKLEARN:
            return

        scikit_data = load_boston()
        dtrain = xgboost.DMatrix(
            scikit_data.data,
            label=scikit_data.target,
            feature_names=scikit_data.feature_names,
        )
        xgb_model = xgboost.train({}, dtrain, 1)

        # Save the data and the model
        self.scikit_data = scikit_data
        self.xgb_model = xgb_model
        self.feature_names = self.scikit_data.feature_names 
开发者ID:apple,项目名称:coremltools,代码行数:23,代码来源:test_boosted_trees_regression.py

示例13: setUpClass

# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        import numpy as np

        scikit_data = load_boston()
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        dtrain = xgboost.DMatrix(
            scikit_data.data, label=target, feature_names=scikit_data.feature_names
        )
        self.xgb_model = xgboost.train({}, dtrain)
        self.target = target

        # Save the data and the model
        self.scikit_data = scikit_data
        self.n_classes = len(np.unique(self.target)) 
开发者ID:apple,项目名称:coremltools,代码行数:21,代码来源:test_boosted_trees_classifier.py

示例14: evaluate

# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def evaluate(features):
    dtrain = xgb.DMatrix(tr_x[features], label=tr_y)
    dvalid = xgb.DMatrix(va_x[features], label=va_y)
    params = {'objective': 'binary:logistic', 'silent': 1, 'random_state': 71}
    num_round = 10  # 実際にはもっと多いround数が必要
    early_stopping_rounds = 3
    watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
    model = xgb.train(params, dtrain, num_round,
                      evals=watchlist, early_stopping_rounds=early_stopping_rounds,
                      verbose_eval=0)
    va_pred = model.predict(dvalid)
    score = log_loss(va_y, va_pred)

    return score


# ---------------------------------
# Greedy Forward Selection
# ---------------------------------- 
开发者ID:ghmagazine,项目名称:kagglebook,代码行数:21,代码来源:ch06-06-wrapper.py

示例15: train

# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def train(self, tr_x, tr_y, va_x=None, va_y=None):

        # データのセット
        validation = va_x is not None
        dtrain = xgb.DMatrix(tr_x, label=tr_y)
        if validation:
            dvalid = xgb.DMatrix(va_x, label=va_y)

        # ハイパーパラメータの設定
        params = dict(self.params)
        num_round = params.pop('num_round')

        # 学習
        if validation:
            early_stopping_rounds = params.pop('early_stopping_rounds')
            watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
            self.model = xgb.train(params, dtrain, num_round, evals=watchlist,
                                   early_stopping_rounds=early_stopping_rounds)
        else:
            watchlist = [(dtrain, 'train')]
            self.model = xgb.train(params, dtrain, num_round, evals=watchlist) 
开发者ID:ghmagazine,项目名称:kagglebook,代码行数:23,代码来源:model_xgb.py


注:本文中的xgboost.DMatrix方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。