本文整理汇总了Python中sklearn.ensemble.ExtraTreesClassifier.set_params方法的典型用法代码示例。如果您正苦于以下问题:Python ExtraTreesClassifier.set_params方法的具体用法?Python ExtraTreesClassifier.set_params怎么用?Python ExtraTreesClassifier.set_params使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.ExtraTreesClassifier
的用法示例。
在下文中一共展示了ExtraTreesClassifier.set_params方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: random_forest_cross_validate
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import set_params [as 别名]
def random_forest_cross_validate(targets, features, nprocesses=-1):
cv = cross_validation.KFold(len(features), k=5, indices=False)
#iterate through the training and test cross validation segments and
#run the classifier on each one, aggregating the results into a list
results = []
for i, (traincv, testcv) in enumerate(cv):
cfr = ExtraTreesClassifier(
n_estimators=100,
max_features=None,
verbose=2,
compute_importances=True,
n_jobs=nprocesses,
random_state=0,
)
print "Fitting cross validation #{0}".format(i)
cfr.fit(features[traincv], targets[traincv])
print "Scoring cross validation #{0}".format(i)
cfr.set_params(n_jobs=1) # read in the features to predict, remove bad columns
score = cfr.score(features[testcv], targets[testcv])
print "Score for cross validation #{0}, score: {1}".format(i, score)
mean_diff = get_metric(cfr, features[testcv], targets[testcv])
print "Mean difference: {0}".format(mean_diff)
results.append(mean_diff)
print "Features importance"
features_list = []
for j, importance in enumerate(cfr.feature_importances_):
if importance > 0.0:
column = features.columns[j]
features_list.append((column, importance))
features_list = sorted(features_list, key=lambda x: x[1], reverse=True)
for j, tup in enumerate(features_list):
print j, tup
pickle.dump(features_list, open("important_features.p", 'wb'))
print "Mean difference: {0}".format(mean_diff)
results.append(mean_diff)
示例2: get_clf
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import set_params [as 别名]
def get_clf(X_train, Y_train, feat_indices=None, clf_used='rf', grid_search=False):
params_fixed = {
'rf': {
'random_state': 100,
'verbose': 1,
# 'verbose': 0,
'compute_importances': SETTINGS['IMPORTANCES']
},
'gbm': {
'random_state': 101,
'min_samples_split': 1,
'min_samples_leaf': 2,
'subsample': 0.5,
'verbose': 0
},
'lasso': {
# 'verbose': 1
},
'SGD': {
'verbose': 1
},
'elastic': {
},
'SVR': {
'verbose': True
}
}
for k, v in params_fixed.iteritems():
params[k].update(v)
clf = ESTIMATOR()
clf.set_params(**params[clf_used])
if grid_search:
return grid(clf, params_grid[clf_used], X_train, Y_train, 3)
else:
print_err("training start")
clf.fit(X_train, Y_train)
if SETTINGS['IMPORTANCES']:
if clf_used in ['rf', 'lasso']:
importances = clf.feature_importances_ if clf_used == 'rf' else clf.coef_
indices = np.argsort(importances)[::-1]
print_err("Feature ranking:")
for f, indf in enumerate(indices):
print_err("{0}. feature {1}: {2} ({3})".format(f + 1, indf, feat_indices[indf].encode("utf-8"), importances[indf]))
else:
for i, fk in enumerate(feat_indices):
print_err("{0}.".format(i+1), fk)
print_err("trained!")
return clf
示例3: getExtraTressClf
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import set_params [as 别名]
def getExtraTressClf(self, X, Y, param_list=-1):
clfName = "Extra_Trees"
## http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesClassifier.html
clf = ExtraTreesClassifier(
n_estimators=10,
criterion='gini',
max_depth=None,
min_samples_split=2,
min_samples_leaf=1,
min_weight_fraction_leaf=0.0,
max_features='auto',
max_leaf_nodes=None,
bootstrap=False,
oob_score=False,
n_jobs=1,
random_state=None,
verbose=0,
warm_start=False,
class_weight=None)
if self._gridSearchFlag == True:
log(clfName + " start searching param...")
tmpLowDepth = int(len(X.columns) * 0.7)
tmpHighDepth = int(len(X.columns) )
param_dist = {
"max_depth": sp_randint(tmpLowDepth, tmpHighDepth),
"max_features": sp_randf(0,1),
"min_samples_split": sp_randint(1, 11),
"min_samples_leaf": sp_randint(1, 11),
"bootstrap": [True, True],
"criterion": ["gini", "entropy"],
"oob_score":[True, True],
"n_estimators" : sp_randint(800, 1200),
}
clf = self.doRandomSearch(clfName, clf, param_dist, X, Y)
else:
if param_list != -1:
clf = ExtraTreesClassifier(param_list)
clf.set_params(**param_list)
clf.fit(X,Y)
return clf
示例4: len
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import set_params [as 别名]
if a_vals.dtype == "O":
train[a], tmp_indexer = pd.factorize(train[a])
test[b] = tmp_indexer.get_indexer(test[b])
else:
# For numeric columns, replace missing values with -999
tmp_len = len(train[a_vals.isnull()])
if tmp_len > 0:
train.loc[a_vals.isnull(), a] = -999
tmp_len = len(test[b_vals.isnull()])
if tmp_len > 0:
test.loc[b_vals.isnull(), b] = -999
# Training
t0 = time.time()
clf = ExtraTreesClassifier()
clf.set_params(**cfg[s]["estimator_params_etc"])
X, X_eval, y, y_eval = cv.train_test_split(train, target, test_size=0.4)
if cfg[s]["find_best"] == True:
model = utils.find_best_estimator(clf, X, y, cfg, section=s,
grid_search_params_key="gs_params_etc",
scoring="log_loss", verbosity=2)
logger.info(model)
else:
model = clf.fit(X, y)
logger.info("%.2f seconds to train %s" % ((time.time() - t0), model))
preds = model.predict_proba(X_eval)[:, 1]
log_loss = metrics.log_loss(y_eval, preds)
logger.info("Log loss : %.6f" % log_loss)