本文整理汇总了Python中lightgbm.Dataset方法的典型用法代码示例。如果您正苦于以下问题:Python lightgbm.Dataset方法的具体用法?Python lightgbm.Dataset怎么用?Python lightgbm.Dataset使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lightgbm
的用法示例。
在下文中一共展示了lightgbm.Dataset方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: optimize_hyperparam
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import Dataset [as 别名]
def optimize_hyperparam(self, X, y, test_size=.2, n_eval=100):
X_trn, X_val, y_trn, y_val = train_test_split(X, y, test_size=test_size, shuffle=self.shuffle)
train_data = lgb.Dataset(X_trn, label=y_trn)
valid_data = lgb.Dataset(X_val, label=y_val)
def objective(hyperparams):
model = lgb.train({**self.params, **hyperparams}, train_data, self.n_est,
valid_data, early_stopping_rounds=self.n_stop, verbose_eval=0)
score = model.best_score["valid_0"][self.metric] * self.loss_sign
return {'loss': score, 'status': STATUS_OK, 'model': model}
trials = Trials()
best = hyperopt.fmin(fn=objective, space=self.space, trials=trials,
algo=tpe.suggest, max_evals=n_eval, verbose=1,
rstate=self.random_state)
hyperparams = space_eval(self.space, best)
return hyperparams, trials
示例2: run_lgb
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import Dataset [as 别名]
def run_lgb(train_X, train_y, val_X, val_y, test_X):
params = {
"objective": "regression",
"metric": "rmse",
"num_leaves": 30,
"learning_rate": 0.1,
"bagging_fraction": 0.7,
"feature_fraction": 0.7,
"bagging_frequency": 5,
"bagging_seed": 2018,
"verbosity": -1
}
lgtrain = lgb.Dataset(train_X, label=train_y)
lgval = lgb.Dataset(val_X, label=val_y)
evals_result = {}
model = lgb.train(params, lgtrain, 10000, valid_sets=[lgval], early_stopping_rounds=100, verbose_eval=20,
evals_result=evals_result)
pred_test_y = model.predict(test_X, num_iteration=model.best_iteration)
return pred_test_y, model, evals_result
# Splitting the data for model training#
示例3: fit
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import Dataset [as 别名]
def fit(self, X, y, X_valid, y_valid):
self._check_target_shape_and_type(y, 'y')
self._check_target_shape_and_type(y_valid, 'y_valid')
y = self._format_target(y)
y_valid = self._format_target(y_valid)
logger.info('LightGBM transformer, train data shape {}'.format(X.shape))
logger.info('LightGBM transformer, validation data shape {}'.format(X_valid.shape))
logger.info('LightGBM transformer, train labels shape {}'.format(y.shape))
logger.info('LightGBM transformer, validation labels shape {}'.format(y_valid.shape))
data_train = lgb.Dataset(data=X,
label=y,
**self.dataset_parameters)
data_valid = lgb.Dataset(data=X_valid,
label=y_valid,
**self.dataset_parameters)
self.estimator = lgb.train(params=self.booster_parameters,
train_set=data_train,
valid_sets=[data_train, data_valid],
valid_names=['data_train', 'data_valid'],
**self.training_parameters)
return self
示例4: train_lgb
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import Dataset [as 别名]
def train_lgb(train_features, train_y, valid_features, valid_y, *,
lr, num_boost_round):
train_data = lgb.Dataset(train_features, train_y)
valid_data = lgb.Dataset(valid_features, valid_y, reference=train_data)
params = {
'objective': 'binary',
'metric': 'binary_logloss',
'learning_rate': lr,
'bagging_fraction': 0.8,
'bagging_freq': 5,
'feature_fraction': 0.9,
'min_data_in_leaf': 20,
'num_leaves': 41,
'scale_pos_weight': 1.2,
'lambda_l2': 1,
}
print(params)
return lgb.train(
params=params,
train_set=train_data,
num_boost_round=num_boost_round,
early_stopping_rounds=20,
valid_sets=[valid_data],
verbose_eval=10,
)
示例5: test_cpu
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import Dataset [as 别名]
def test_cpu(self):
lgb_train = lgb.Dataset('/input/tests/data/lgb_train.bin')
lgb_eval = lgb.Dataset('/input/tests/data/lgb_test.bin', reference=lgb_train)
params = {
'task': 'train',
'boosting_type': 'gbdt',
'objective': 'regression',
'metric': {'l2', 'auc'},
'num_leaves': 31,
'learning_rate': 0.05,
'feature_fraction': 0.9,
'bagging_fraction': 0.8,
'bagging_freq': 5,
'verbose': 0
}
# Run only one round for faster test
gbm = lgb.train(params,
lgb_train,
num_boost_round=1,
valid_sets=lgb_eval,
early_stopping_rounds=1)
self.assertEqual(1, gbm.best_iteration)
示例6: test_gpu
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import Dataset [as 别名]
def test_gpu(self):
lgb_train = lgb.Dataset('/input/tests/data/lgb_train.bin')
lgb_eval = lgb.Dataset('/input/tests/data/lgb_test.bin', reference=lgb_train)
params = {
'boosting_type': 'gbdt',
'objective': 'regression',
'metric': 'auc',
'num_leaves': 31,
'learning_rate': 0.05,
'feature_fraction': 0.9,
'bagging_fraction': 0.8,
'bagging_freq': 5,
'verbose': 1,
'device': 'gpu'
}
# Run only one round for faster test
gbm = lgb.train(params,
lgb_train,
num_boost_round=1,
valid_sets=lgb_eval,
early_stopping_rounds=1)
self.assertEqual(1, gbm.best_iteration)
示例7: train_and_predict
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import Dataset [as 别名]
def train_and_predict(self, train, valid, weight, categorical_features: List[str], target: str, params: dict) \
-> Tuple[Booster, dict]:
if type(train) != pd.DataFrame or type(valid) != pd.DataFrame:
raise ValueError('Parameter train and valid must be pandas.DataFrame')
if list(train.columns) != list(valid.columns):
raise ValueError('Train and valid must have a same column list')
predictors = train.columns.drop(target)
if weight is None:
d_train = lgb.Dataset(train[predictors], label=train[target].values)
else:
print(weight)
d_train = lgb.Dataset(train[predictors], label=train[target].values, weight=weight)
d_valid = lgb.Dataset(valid[predictors], label=valid[target].values)
eval_results = {}
model: Booster = lgb.train(params['model_params'],
d_train,
categorical_feature=categorical_features,
valid_sets=[d_train, d_valid],
valid_names=['train', 'valid'],
evals_result=eval_results,
**params['train_params'])
return model, eval_results
示例8: get_dataset
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import Dataset [as 别名]
def get_dataset(self, X, y, free_raw_data=True):
"""
convert data into lightgbm consumable format
Parameters
----------
X: string, numpy array, pandas DataFrame, scipy.sparse or
list of numpy arrays
y: list, numpy 1-D array, pandas Series / one-column DataFrame \
or None, optional (default=None)
free_raw_data: bool, optional (default=True)
Return
------
lightgbm dataset
"""
return lightgbm.Dataset(
data=X, label=y,
feature_name=self.feature_name,
categorical_feature=self.categorical_feature,
free_raw_data=free_raw_data)
示例9: fit
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import Dataset [as 别名]
def fit(self, X, y, X_valid, y_valid, feature_names, categorical_features, **kwargs):
train = lgb.Dataset(X, label=y,
feature_name=feature_names,
categorical_feature=categorical_features
)
valid = lgb.Dataset(X_valid, label=y_valid,
feature_name=feature_names,
categorical_feature=categorical_features
)
evaluation_results = {}
self.estimator = lgb.train(self.model_params,
train,
valid_sets=[train, valid],
valid_names=['train', 'valid'],
evals_result=evaluation_results,
num_boost_round=self.training_params.number_boosting_rounds,
early_stopping_rounds=self.training_params.early_stopping_rounds,
verbose_eval=10,
feval=self.evaluation_function)
return self
示例10: fit
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import Dataset [as 别名]
def fit(self, X, y, X_valid, y_valid, feature_names, categorical_features, **kwargs):
train = lgb.Dataset(X, label=y,
feature_name=feature_names,
categorical_feature=categorical_features
)
valid = lgb.Dataset(X_valid, label=y_valid,
feature_name=feature_names,
categorical_feature=categorical_features
)
evaluation_results = {}
self.estimator = lgb.train(self.model_config,
train, valid_sets=[train, valid], valid_names=['train', 'valid'],
evals_result=evaluation_results,
num_boost_round=self.training_config.number_boosting_rounds,
early_stopping_rounds=self.training_config.early_stopping_rounds,
verbose_eval=self.model_config.verbose,
feval=self.evaluation_function)
return self
示例11: load_data
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import Dataset [as 别名]
def load_data(train_path='./data/regression.train', test_path='./data/regression.test'):
'''
Load or create dataset
'''
print('Load data...')
df_train = pd.read_csv(train_path, header=None, sep='\t')
df_test = pd.read_csv(test_path, header=None, sep='\t')
num = len(df_train)
split_num = int(0.9 * num)
y_train = df_train[0].values
y_test = df_test[0].values
y_eval = y_train[split_num:]
y_train = y_train[:split_num]
X_train = df_train.drop(0, axis=1).values
X_test = df_test.drop(0, axis=1).values
X_eval = X_train[split_num:, :]
X_train = X_train[:split_num, :]
# create dataset for lightgbm
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_eval, y_eval, reference=lgb_train)
return lgb_train, lgb_eval, X_test, y_test
示例12: train_breast_cancer
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import Dataset [as 别名]
def train_breast_cancer(config):
data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
train_x, test_x, train_y, test_y = train_test_split(
data, target, test_size=0.25)
train_set = lgb.Dataset(train_x, label=train_y)
test_set = lgb.Dataset(test_x, label=test_y)
gbm = lgb.train(
config,
train_set,
valid_sets=[test_set],
verbose_eval=False,
callbacks=[LightGBMCallback])
preds = gbm.predict(test_x)
pred_labels = np.rint(preds)
tune.report(
mean_accuracy=sklearn.metrics.accuracy_score(test_y, pred_labels),
done=True)
示例13: test_lgb_autolog_logs_metrics_with_multi_validation_data
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import Dataset [as 别名]
def test_lgb_autolog_logs_metrics_with_multi_validation_data(bst_params, train_set):
mlflow.lightgbm.autolog()
evals_result = {}
# If we use [train_set, train_set] here, LightGBM ignores the first dataset.
# To avoid that, create a new Dataset object.
valid_sets = [train_set, lgb.Dataset(train_set.data)]
valid_names = ['train', 'valid']
lgb.train(bst_params, train_set, num_boost_round=10, valid_sets=valid_sets,
valid_names=valid_names, evals_result=evals_result)
run = get_latest_run()
data = run.data
client = mlflow.tracking.MlflowClient()
for valid_name in valid_names:
metric_key = '{}-multi_logloss'.format(valid_name)
metric_history = [x.value for x in client.get_metric_history(run.info.run_id, metric_key)]
assert metric_key in data.metrics
assert len(metric_history) == 10
assert metric_history == evals_result[valid_name]['multi_logloss']
示例14: test_lgb_autolog_logs_metrics_with_multi_validation_data_and_metrics
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import Dataset [as 别名]
def test_lgb_autolog_logs_metrics_with_multi_validation_data_and_metrics(bst_params, train_set):
mlflow.lightgbm.autolog()
evals_result = {}
params = {'metric': ['multi_error', 'multi_logloss']}
params.update(bst_params)
valid_sets = [train_set, lgb.Dataset(train_set.data)]
valid_names = ['train', 'valid']
lgb.train(params, train_set, num_boost_round=10, valid_sets=valid_sets,
valid_names=valid_names, evals_result=evals_result)
run = get_latest_run()
data = run.data
client = mlflow.tracking.MlflowClient()
for valid_name in valid_names:
for metric_name in params['metric']:
metric_key = '{}-{}'.format(valid_name, metric_name)
metric_history = [x.value for x
in client.get_metric_history(run.info.run_id, metric_key)]
assert metric_key in data.metrics
assert len(metric_history) == 10
assert metric_history == evals_result[valid_name][metric_name]
示例15: objective
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import Dataset [as 别名]
def objective(trial):
data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.25)
dtrain = lgb.Dataset(train_x, label=train_y)
param = {
"objective": "binary",
"metric": "binary_logloss",
"verbosity": -1,
"boosting_type": "gbdt",
"lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 10.0),
"lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 10.0),
"num_leaves": trial.suggest_int("num_leaves", 2, 256),
"feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0),
"bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0),
"bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
"min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
}
gbm = lgb.train(param, dtrain)
preds = gbm.predict(valid_x)
pred_labels = np.rint(preds)
accuracy = sklearn.metrics.accuracy_score(valid_y, pred_labels)
return accuracy