本文整理汇总了Python中lightgbm.LGBMClassifier方法的典型用法代码示例。如果您正苦于以下问题:Python lightgbm.LGBMClassifier方法的具体用法?Python lightgbm.LGBMClassifier怎么用?Python lightgbm.LGBMClassifier使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lightgbm
的用法示例。
在下文中一共展示了lightgbm.LGBMClassifier方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Train
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import LGBMClassifier [as 别名]
def Train(data, modelcount, censhu, yanzhgdata):
model = lgbm.LGBMClassifier(boosting_type='gbdt', objective='binary', num_leaves=50,
learning_rate=0.1, n_estimators=modelcount, max_depth=censhu,
bagging_fraction=0.9, feature_fraction=0.9, reg_lambda=0.2)
model.fit(data[:, :-1], data[:, -1])
# 给出训练数据的预测值
train_out = model.predict(data[:, :-1])
# 计算f1度量
train_mse = fmse(data[:, -1], train_out)[0]
# 给出验证数据的预测值
add_yan = model.predict(yanzhgdata[:, :-1])
# 计算f1度量
add_mse = fmse(yanzhgdata[:, -1], add_yan)[0]
print(train_mse, add_mse)
return train_mse, add_mse
# 最终确定组合的函数
示例2: recspre
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import LGBMClassifier [as 别名]
def recspre(estrs, predata, datadict, zhe):
mo, ze = estrs.split('-')
model = lgbm.LGBMClassifier(boosting_type='gbdt', objective='binary', num_leaves=50,
learning_rate=0.1, n_estimators=int(mo), max_depth=int(ze),
bagging_fraction=0.9, feature_fraction=0.9, reg_lambda=0.2)
model.fit(datadict[zhe]['train'][:, :-1], datadict[zhe]['train'][:, -1])
# 预测
yucede = model.predict(predata[:, :-1])
# 计算混淆矩阵
print(ConfuseMatrix(predata[:, -1], yucede))
return fmse(predata[:, -1], yucede)
# 主函数
示例3: test_multi_class
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import LGBMClassifier [as 别名]
def test_multi_class():
estimator = lightgbm.LGBMClassifier(n_estimators=1, random_state=1,
max_depth=1)
estimator.fit(np.array([[1], [2], [3]]), np.array([1, 2, 3]))
assembler = assemblers.LightGBMModelAssembler(estimator)
actual = assembler.assemble()
exponent = ast.ExpExpr(
ast.NumVal(-1.0986122886681098),
to_reuse=True)
exponent_sum = ast.BinNumExpr(
ast.BinNumExpr(exponent, exponent, ast.BinNumOpType.ADD),
exponent,
ast.BinNumOpType.ADD,
to_reuse=True)
softmax = ast.BinNumExpr(exponent, exponent_sum, ast.BinNumOpType.DIV)
expected = ast.VectorVal([softmax] * 3)
assert utils.cmp_exprs(actual, expected)
示例4: test_cv_lgbm
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import LGBMClassifier [as 别名]
def test_cv_lgbm():
X, y = make_classification(n_samples=1024, n_features=20, class_sep=0.98, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
models = [LGBMClassifier(n_estimators=300) for _ in range(5)]
pred_oof, pred_test, scores, importance = cross_validate(models, X_train, y_train, X_test, cv=5,
eval_func=roc_auc_score,
fit_params={'early_stopping_rounds': 200})
print(scores)
assert len(scores) == 5 + 1
assert scores[-1] >= 0.85 # overall roc_auc
assert roc_auc_score(y_train, pred_oof) == scores[-1]
assert roc_auc_score(y_test, pred_test) >= 0.85 # test roc_auc
assert roc_auc_score(y, models[0].predict_proba(X)[:, 1]) >= 0.85 # make sure models are trained
assert len(importance) == 5
assert list(importance[0].columns) == ['feature', 'importance']
assert len(importance[0]) == 20
示例5: test_cv_lgbm_df
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import LGBMClassifier [as 别名]
def test_cv_lgbm_df():
X, y = make_classification_df(n_samples=1024, n_num_features=20, n_cat_features=1, class_sep=0.98, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
models = [LGBMClassifier(n_estimators=300) for _ in range(5)]
pred_oof, pred_test, scores, importance = cross_validate(models, X_train, y_train, X_test, cv=5,
eval_func=roc_auc_score)
print(scores)
assert len(scores) == 5 + 1
assert scores[-1] >= 0.85 # overall roc_auc
assert roc_auc_score(y_train, pred_oof) == scores[-1]
assert roc_auc_score(y_test, pred_test) >= 0.85 # test roc_auc
assert roc_auc_score(y_test, models[0].predict_proba(X_test)[:, 1]) >= 0.85 # make sure models are trained
assert len(importance) == 5
assert list(importance[0].columns) == ['feature', 'importance']
assert len(importance[0]) == 20 + 1
assert models[0].booster_.num_trees() < 300 # making sure early stopping worked
示例6: test_fit_params_callback
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import LGBMClassifier [as 别名]
def test_fit_params_callback():
X, y = make_classification(n_samples=1024, n_features=20, class_sep=0.98, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
models = [LGBMClassifier(n_estimators=300) for _ in range(5)]
sample_weights = np.random.randint(1, 10, size=len(X_train))
sample_weights = sample_weights / sample_weights.sum()
def fit_params(n: int, train_index: List[int], valid_index: List[int]):
return {
'early_stopping_rounds': 100,
'sample_weight': list(sample_weights[train_index]),
'eval_sample_weight': [list(sample_weights[valid_index])]
}
result_w_weight = cross_validate(models, X_train, y_train, X_test, cv=5,
eval_func=roc_auc_score, fit_params=fit_params)
result_wo_weight = cross_validate(models, X_train, y_train, X_test, cv=5,
eval_func=roc_auc_score, fit_params={'early_stopping_rounds': 50})
assert result_w_weight.scores[-1] != result_wo_weight.scores[-1]
示例7: __init__
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import LGBMClassifier [as 别名]
def __init__(self):
self._models = dict()
try:
import sklearn.ensemble
self._models['RandomForestClassifier'] = sklearn.ensemble.RandomForestClassifier
except ImportError:
pass
try:
import xgboost
self._models['XGBClassifier'] = xgboost.XGBClassifier
except ImportError:
pass
try:
import lightgbm
self._models['LGBMClassifier'] = lightgbm.LGBMClassifier
except ImportError:
pass
try:
import catboost
self._models['CatBoostClassifier'] = catboost.CatBoostClassifier
except ImportError:
pass
示例8: __call__
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import LGBMClassifier [as 别名]
def __call__(self, estimator):
fitted_estimator = estimator.fit(self.X_train, self.y_train)
if isinstance(estimator, (LinearClassifierMixin, SVC, NuSVC,
LightBaseClassifier)):
y_pred = estimator.decision_function(self.X_test)
elif isinstance(estimator, DecisionTreeClassifier):
y_pred = estimator.predict_proba(self.X_test.astype(np.float32))
elif isinstance(
estimator,
(ForestClassifier, XGBClassifier, LGBMClassifier)):
y_pred = estimator.predict_proba(self.X_test)
else:
y_pred = estimator.predict(self.X_test)
return self.X_test, y_pred, fitted_estimator
示例9: get_feature_importances
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import LGBMClassifier [as 别名]
def get_feature_importances(data, shuffle, cats=[], seed=None):
# Gather real features
train_features = [f for f in data if f not in [target] + cols2ignore]
# Shuffle target if required
y = data[target].copy()
if shuffle:
y = data[target].copy().sample(frac=1.0, random_state=seed + 4)
from h2oaicore.lightgbm_dynamic import got_cpu_lgb, got_gpu_lgb
import lightgbm as lgbm
if is_regression:
model = lgbm.LGBMRegressor(random_state=seed, importance_type=importance, **lgbm_params)
else:
model = lgbm.LGBMClassifier(random_state=seed, importance_type=importance, **lgbm_params)
y = LabelEncoder().fit_transform(y)
# Fit LightGBM in RF mode, yes it's quicker than sklearn RandomForest
model.fit(data[train_features], y, categorical_feature=cats)
# Get feature importances
imp_df = pd.DataFrame()
imp_df["feature"] = list(train_features)
imp_df["importance"] = model.feature_importances_
return imp_df
示例10: test_01_lgbm_classifier
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import LGBMClassifier [as 别名]
def test_01_lgbm_classifier(self):
print("\ntest 01 (lgbm classifier with preprocessing) [binary-class]\n")
model = LGBMClassifier()
pipeline_obj = Pipeline([
('scaler',MinMaxScaler()),
("model", model)
])
pipeline_obj.fit(self.X,self.Y_bin)
file_name = "test01lgbm.pmml"
lgb_to_pmml(pipeline_obj, self.features, 'Species', file_name)
model_name = self.adapa_utility.upload_to_zserver(file_name)
predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, self.test_file)
model_pred = pipeline_obj.predict(self.X)
model_prob = pipeline_obj.predict_proba(self.X)
self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)
示例11: test_02_lgbm_classifier
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import LGBMClassifier [as 别名]
def test_02_lgbm_classifier(self):
print("\ntest 02 (lgbm classifier with preprocessing) [multi-class]\n")
model = LGBMClassifier()
pipeline_obj = Pipeline([
('scaler',MaxAbsScaler()),
("model", model)
])
pipeline_obj.fit(self.X,self.Y)
file_name = "test02lgbm.pmml"
lgb_to_pmml(pipeline_obj, self.features, 'Species', file_name)
model_name = self.adapa_utility.upload_to_zserver(file_name)
predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, self.test_file)
model_pred = pipeline_obj.predict(self.X)
model_prob = pipeline_obj.predict_proba(self.X)
self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)
示例12: test_flofo_importance
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import LGBMClassifier [as 别名]
def test_flofo_importance():
df = generate_test_data(100000)
df.loc[df["A"] < df["A"].median(), "A"] = None
train_df, val_df = train_test_split(df, test_size=0.2, random_state=0)
val_df_checkpoint = val_df.copy()
features = ["A", "B", "C", "D"]
lgbm = LGBMClassifier(random_state=0, n_jobs=1)
lgbm.fit(train_df[features], train_df["binary_target"])
flofo = FLOFOImportance(lgbm, df, features, 'binary_target', scoring='roc_auc')
flofo_parallel = FLOFOImportance(lgbm, df, features, 'binary_target', scoring='roc_auc', n_jobs=3)
importance_df = flofo.get_importance()
importance_df_parallel = flofo_parallel.get_importance()
is_feature_order_same = importance_df["feature"].values == importance_df_parallel["feature"].values
plot_importance(importance_df)
assert is_feature_order_same.sum() == len(features), "Parallel FLOFO returned different result!"
assert val_df.equals(val_df_checkpoint), "LOFOImportance mutated the dataframe!"
assert len(features) == importance_df.shape[0], "Missing importance value for some features!"
assert importance_df["feature"].values[0] == "B", "Most important feature is different than B!"
示例13: test_feature_groups
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import LGBMClassifier [as 别名]
def test_feature_groups():
df = generate_test_data(1000, text=True)
features = ["A", "B", "C", "D"]
cv = CountVectorizer(ngram_range=(3, 3), analyzer="char")
feature_groups = dict()
feature_groups["names"] = cv.fit_transform(df["T"])
feature_groups["interactions"] = df[["A", "B"]].values*df[["C", "D"]].values
dataset = Dataset(df=df, target="binary_target", features=features, feature_groups=feature_groups)
lgbm = LGBMClassifier(random_state=0, n_jobs=4)
lofo = LOFOImportance(dataset, model=lgbm, cv=4, scoring='roc_auc')
importance_df = lofo.get_importance()
assert len(features) + len(feature_groups) == importance_df.shape[0], "Missing importance value for some features!"
assert importance_df["feature"].values[0] == "names", "Most important feature is different than 'names'!"
示例14: test_autoai_libs_tam_2
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import LGBMClassifier [as 别名]
def test_autoai_libs_tam_2(self):
from lale.lib.autoai_libs import TAM
import numpy as np
from lightgbm import LGBMClassifier
from sklearn.decomposition import PCA
from lale.operators import make_pipeline
pca = PCA(copy=False)
tam = TAM(tans_class=pca, name='pca', col_names=['a', 'b', 'c'], col_dtypes=[np.dtype('float32'), np.dtype('float32'), np.dtype('float32')])
lgbm_classifier = LGBMClassifier(class_weight='balanced', learning_rate=0.18)
pipeline = make_pipeline(tam, lgbm_classifier)
expected = \
"""from lale.lib.autoai_libs import TAM
import sklearn.decomposition.pca
import numpy as np
from lightgbm import LGBMClassifier
from lale.operators import make_pipeline
tam = TAM(tans_class=sklearn.decomposition.pca.PCA(copy=False, iterated_power='auto', n_components=None, random_state=None, svd_solver='auto', tol=0.0, whiten=False), name='pca', col_names=['a', 'b', 'c'], col_dtypes=[np.dtype('float32'), np.dtype('float32'), np.dtype('float32')])
lgbm_classifier = LGBMClassifier(class_weight='balanced', learning_rate=0.18)
pipeline = make_pipeline(tam, lgbm_classifier)"""
self._roundtrip(expected, lale.pretty_print.to_string(pipeline, combinators=False))
示例15: test_autoai_libs_t_no_op
# 需要导入模块: import lightgbm [as 别名]
# 或者: from lightgbm import LGBMClassifier [as 别名]
def test_autoai_libs_t_no_op(self):
from lale.lib.autoai_libs import TNoOp
from lightgbm import LGBMClassifier
from lale.operators import make_pipeline
t_no_op = TNoOp(name='no_action', datatypes='x', feat_constraints=[])
lgbm_classifier = LGBMClassifier(class_weight='balanced', learning_rate=0.18)
pipeline = make_pipeline(t_no_op, lgbm_classifier)
expected = \
"""from lale.lib.autoai_libs import TNoOp
from lightgbm import LGBMClassifier
from lale.operators import make_pipeline
t_no_op = TNoOp(name='no_action', datatypes='x', feat_constraints=[])
lgbm_classifier = LGBMClassifier(class_weight='balanced', learning_rate=0.18)
pipeline = make_pipeline(t_no_op, lgbm_classifier)"""
self._roundtrip(expected, lale.pretty_print.to_string(pipeline, combinators=False))