本文整理汇总了Python中xgboost.DMatrix方法的典型用法代码示例。如果您正苦于以下问题:Python xgboost.DMatrix方法的具体用法?Python xgboost.DMatrix怎么用?Python xgboost.DMatrix使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类xgboost
的用法示例。
在下文中一共展示了xgboost.DMatrix方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: fit
# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def fit(self):
"""
Gets data and preprocess by prepare_data() function
Trains with the selected parameters from grid search and saves the model
"""
data = self.get_input()
df_train, df_test = self.prepare_data(data)
xtr, ytr = df_train.drop(['Value'], axis=1), df_train['Value'].values
xgbtrain = xgb.DMatrix(xtr, ytr)
reg_cv = self.grid_search(xtr, ytr)
param = reg_cv.best_params_
bst = xgb.train(dtrain=xgbtrain, params=param)
# save model to file
mlflow.sklearn.save_model(bst, "model")
return df_test
示例2: fit
# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def fit(self, X, y, X_valid, y_valid):
logger.info('XGBoost, train data shape {}'.format(X.shape))
logger.info('XGBoost, validation data shape {}'.format(X_valid.shape))
logger.info('XGBoost, train labels shape {}'.format(y.shape))
logger.info('XGBoost, validation labels shape {}'.format(y_valid.shape))
train = xgb.DMatrix(data=X,
label=y,
**self.dmatrix_parameters)
valid = xgb.DMatrix(data=X_valid,
label=y_valid,
**self.dmatrix_parameters)
self.estimator = xgb.train(params=self.booster_parameters,
dtrain=train,
evals=[(train, 'train'), (valid, 'valid')],
**self.training_parameters)
return self
示例3: get_libsvm_dmatrix
# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def get_libsvm_dmatrix(files_path, is_pipe=False):
"""Get DMatrix from libsvm file path.
Pipe mode not currently supported for libsvm.
:param files_path: File path where LIBSVM formatted training data resides, either directory or file
:param is_pipe: Boolean to indicate if data is being read in pipe mode
:return: xgb.DMatrix
"""
if is_pipe:
raise exc.UserError("Pipe mode not supported for LibSVM.")
try:
dmatrix = xgb.DMatrix(files_path)
except Exception as e:
raise exc.UserError("Failed to load libsvm data with exception:\n{}".format(e))
return dmatrix
示例4: _get_parquet_dmatrix_file_mode
# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def _get_parquet_dmatrix_file_mode(files_path):
"""Get Data Matrix from parquet data in file mode.
:param files_path: File path where parquet formatted training data resides, either directory or file
:return: xgb.DMatrix
"""
try:
table = pq.read_table(files_path)
data = table.to_pandas()
del table
if type(data) is pd.DataFrame:
# pyarrow.Table.to_pandas may produce NumPy array or pandas DataFrame
data = data.to_numpy()
dmatrix = xgb.DMatrix(data[:, 1:], label=data[:, 0])
del data
return dmatrix
except Exception as e:
raise exc.UserError("Failed to load parquet data with exception:\n{}".format(e))
示例5: libsvm_to_dmatrix
# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def libsvm_to_dmatrix(string_like): # type: (bytes) -> xgb.DMatrix
"""Convert a LIBSVM string representation to a DMatrix object.
Args:
string_like (bytes): LIBSVM string.
Returns:
(xgb.DMatrix): XGBoost DataMatrix
"""
temp_file_location = None
try:
with tempfile.NamedTemporaryFile(delete=False) as libsvm_file:
temp_file_location = libsvm_file.name
libsvm_file.write(string_like)
dmatrix = xgb.DMatrix(temp_file_location)
finally:
if temp_file_location and os.path.exists(temp_file_location):
os.remove(temp_file_location)
return dmatrix
示例6: execute
# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def execute(cls, ctx, op):
from xgboost import DMatrix
raw_data = data = ctx[op.data.key]
if isinstance(data, tuple):
data = ToDMatrix.get_xgb_dmatrix(data)
else:
data = DMatrix(data)
result = op.model.predict(data)
if isinstance(op.outputs[0], DATAFRAME_CHUNK_TYPE):
result = pd.DataFrame(result, index=raw_data.index)
elif isinstance(op.outputs[0], SERIES_CHUNK_TYPE):
result = pd.Series(result, index=raw_data.index, name='predictions')
ctx[op.outputs[0].key] = result
示例7: predict
# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def predict(self, df_test):
"""
Makes prediction for the next 7 days electricity consumption.
"""
# load model from file
loaded_model = mlflow.sklearn.load_model("model")
# make predictions for test data
xts, yts = df_test.drop(['Value'], axis=1), df_test['Value'].values
p = loaded_model.predict(xgb.DMatrix(xts))
prediction = pd.DataFrame({'Prediction': p})
mape, rmse, mae, r2 = ForecastRunner.evaluation_metrics(yts, p)
print('MAPE: {}'.format(mape))
print('RMSE: {}'.format(rmse))
print('R2: {}'.format(r2))
print('MAE: {}'.format(mae))
mlflow.log_metric("MAPE", mape)
mlflow.log_metric("RMSE", rmse)
mlflow.log_metric("R2", r2)
mlflow.log_metric("MAE", mae)
ForecastRunner.plot_result(yts, p)
self.save_output(df_test, prediction)
示例8: train_xgb
# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def train_xgb(train_features, train_y, valid_features, valid_y, *,
eta, num_boost_round):
train_data = xgb.DMatrix(train_features, label=train_y)
valid_data = xgb.DMatrix(valid_features, label=valid_y)
params = {
'eta': eta,
'objective': 'binary:logistic',
'gamma': 0.01,
'max_depth': 8,
}
print(params)
eval_list = [(valid_data, 'eval')]
return xgb.train(
params, train_data, num_boost_round, eval_list,
early_stopping_rounds=20,
verbose_eval=10,
)
示例9: test_local_csv_transform
# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def test_local_csv_transform(self):
"""Test transfrom from local csv files."""
cmd = ['python ' + os.path.join(CODE_PATH, 'transform.py'),
'--csv=' + self.csv_input_filepath,
'--analysis=' + self.analysis_dir,
'--prefix=features',
'--output=' + self.output_dir]
print('cmd ', ' '.join(cmd))
subprocess.check_call(' '.join(cmd), shell=True)
# Verify transformed file.
libsvm_filepath = os.path.join(self.output_dir, 'features-00000-of-00001.libsvm')
dtrain = xgb.DMatrix(libsvm_filepath)
self.assertTrue(2056, dtrain.num_col())
self.assertTrue(3, dtrain.num_row())
# Verify featuremap file.
featuremap_filepath = os.path.join(self.output_dir, 'featuremap-00000-of-00001.txt')
df = pd.read_csv(featuremap_filepath, names=['index', 'description'])
pd.util.testing.assert_series_equal(pd.Series(range(1, 2056), name='index'), df['index'])
expected_descriptions = ['cat_col=Sunday', 'cat_col=Monday', 'img_col image feature 1000',
'num_col', 'text_col has "blue"']
self.assertTrue(all(x in df['description'].values for x in expected_descriptions))
示例10: fit
# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def fit(self, X, y, x_val=None, y_val=None):
dtrain = xgb.DMatrix(X, label=y)
if x_val is not None:
dtest = xgb.DMatrix(x_val, label=y_val)
watchlist = [(dtrain, 'train'), (dtest, 'validation')]
self.clf = xgb.train(params=self.params,
dtrain=dtrain,
num_boost_round=self.num_round,
early_stopping_rounds=self.early_stopping_rounds,
evals=watchlist,
verbose_eval=self.verbose)
else:
self.clf = xgb.train(params=self.params,
dtrain=dtrain,
num_boost_round=self.num_round,
early_stopping_rounds=self.early_stopping_rounds)
return
示例11: setUpClass
# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def setUpClass(self):
if not _HAS_XGBOOST:
return
if not _HAS_SKLEARN:
return
# Load data and train model
scikit_data = load_boston()
self.X = scikit_data.data.astype("f").astype("d")
self.dtrain = xgboost.DMatrix(
scikit_data.data,
label=scikit_data.target,
feature_names=scikit_data.feature_names,
)
self.feature_names = scikit_data.feature_names
self.output_name = "target"
示例12: setUpClass
# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def setUpClass(self):
"""
Set up the unit test by loading the dataset and training a model.
"""
if not _HAS_XGBOOST:
return
if not _HAS_SKLEARN:
return
scikit_data = load_boston()
dtrain = xgboost.DMatrix(
scikit_data.data,
label=scikit_data.target,
feature_names=scikit_data.feature_names,
)
xgb_model = xgboost.train({}, dtrain, 1)
# Save the data and the model
self.scikit_data = scikit_data
self.xgb_model = xgb_model
self.feature_names = self.scikit_data.feature_names
示例13: setUpClass
# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def setUpClass(self):
"""
Set up the unit test by loading the dataset and training a model.
"""
from sklearn.datasets import load_boston
import numpy as np
scikit_data = load_boston()
t = scikit_data.target
target = np.digitize(t, np.histogram(t)[1]) - 1
dtrain = xgboost.DMatrix(
scikit_data.data, label=target, feature_names=scikit_data.feature_names
)
self.xgb_model = xgboost.train({}, dtrain)
self.target = target
# Save the data and the model
self.scikit_data = scikit_data
self.n_classes = len(np.unique(self.target))
示例14: evaluate
# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def evaluate(features):
dtrain = xgb.DMatrix(tr_x[features], label=tr_y)
dvalid = xgb.DMatrix(va_x[features], label=va_y)
params = {'objective': 'binary:logistic', 'silent': 1, 'random_state': 71}
num_round = 10 # 実際にはもっと多いround数が必要
early_stopping_rounds = 3
watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
model = xgb.train(params, dtrain, num_round,
evals=watchlist, early_stopping_rounds=early_stopping_rounds,
verbose_eval=0)
va_pred = model.predict(dvalid)
score = log_loss(va_y, va_pred)
return score
# ---------------------------------
# Greedy Forward Selection
# ----------------------------------
示例15: train
# 需要导入模块: import xgboost [as 别名]
# 或者: from xgboost import DMatrix [as 别名]
def train(self, tr_x, tr_y, va_x=None, va_y=None):
# データのセット
validation = va_x is not None
dtrain = xgb.DMatrix(tr_x, label=tr_y)
if validation:
dvalid = xgb.DMatrix(va_x, label=va_y)
# ハイパーパラメータの設定
params = dict(self.params)
num_round = params.pop('num_round')
# 学習
if validation:
early_stopping_rounds = params.pop('early_stopping_rounds')
watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
self.model = xgb.train(params, dtrain, num_round, evals=watchlist,
early_stopping_rounds=early_stopping_rounds)
else:
watchlist = [(dtrain, 'train')]
self.model = xgb.train(params, dtrain, num_round, evals=watchlist)