本文整理汇总了Python中sklearn.ensemble.RandomForestClassifier.set_params方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestClassifier.set_params方法的具体用法?Python RandomForestClassifier.set_params怎么用?Python RandomForestClassifier.set_params使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.RandomForestClassifier
的用法示例。
在下文中一共展示了RandomForestClassifier.set_params方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: exercise_2
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def exercise_2():
#connect to openml api
apikey = 'ca2397ea8a2cdd9707ef39d76576e786'
connector = APIConnector(apikey=apikey)
dataset = connector.download_dataset(44)
X, y, attribute_names = dataset.get_dataset(target=dataset.default_target_attribute, return_attribute_names=True)
kf = cross_validation.KFold(len(X), n_folds=10, shuffle=False, random_state=0)
error = []
error_mean = []
lst = [int(math.pow(2, i)) for i in range(0, 8)]
clf = RandomForestClassifier(oob_score=True,
max_features="auto",
random_state=0)
for i in lst:
error_mean = []
for train_index, test_index in kf:
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
clf.set_params(n_estimators=i)
clf.fit(X_train, y_train)
error_mean.append( zero_one_loss(y_test, clf.predict(X_test)) )
error.append( np.array(error_mean).mean() )
#plot
plt.style.use('ggplot')
plt.plot(lst, error, '#009999', marker='o')
plt.xticks(lst)
plt.show()
示例2: train_random_forest_with_params
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def train_random_forest_with_params(X, y, params):
model = RandomForestClassifier()
model.set_params(params)
model = model.fit(X, y)
score = model.score(X, y)
print "Model Trainning Score: %s" % score
return model
示例3: gridtrainfraction
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def gridtrainfraction(trainiter, rfparams):
''' read in data once for grid search, clear, then again for model fit'''
train = fractionate(trainiter, fraction=0.002)
clf = RandomForestClassifier(**rfparams)
grid = GridSearchCV(clf, param_grid=gridparams, scoring='log_loss', n_jobs=1)
X_train = train.drop('hotel_cluster', axis=1)
X = sparsify(pd.get_dummies(X_train.astype(str)))
y = train['hotel_cluster']
grid.fit(X,y)
print(grid.best_params_)
print(grid.grid_scores_)
train = None
X_train = None
X = None
y = None
clf = None
train = fractionate(trainiter, fraction=0.01)
X_train = train.drop('hotel_cluster', axis=1)
X = sparsify(pd.get_dummies(X_train.astype(str)))
y = train['hotel_cluster']
bestparams = grid.best_params_
clf = RandomForestClassifier(**rfparams)
clf.set_params(**bestparams)
clf.fit(X,y)
return clf
示例4: random_forest_cross_validate
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def random_forest_cross_validate(targets, features, nprocesses=-1):
num_cv = 5
cv = cross_validation.KFold(len(features), k=num_cv, indices=False)
#iterate through the training and test cross validation segments and
#run the classifier on each one, aggregating the results into a list
score_sum = 0.0
testcvs = None
for i, (traincv, testcv) in enumerate(cv):
cfr = RandomForestClassifier(
n_estimators=100,
max_features=None,
verbose=0,
compute_importances=True,
n_jobs=nprocesses,
random_state=0,
)
print "Fitting cross validation #{0}".format(i)
cfr.fit(features[traincv], targets[traincv])
print "Scoring cross validation #{0}".format(i)
cfr.set_params(n_jobs=1)
predicted = cfr.predict(features[testcv])
p = cfr.predict_proba(features[testcv])
score = cfr.score(features[testcv], targets[testcv])
score_sum += score
# add stuff to the dataframe so we can plot things
summer = 0.0
for j, pred in enumerate(predicted):
print predicted[j], targets[testcv][j]
summer += np.power((predicted[j] - targets[testcv][j]), 2)
print "score error: {0}".format(np.sqrt(summer)/len(testcv))
testcv = pd.DataFrame(features[testcv])
testcv['prediction'] = np.nan
testcv['prob'] = np.nan
for j, (ix, row) in enumerate(testcv.iterrows()):
print predicted[j], targets[testcv][j]
testcv['prediction'].ix[ix] = predicted[j]
if predicted[j] == 1:
testcv['prob'].ix[ix] = p[j][0]
else:
testcv['prob'].ix[ix] = p[j][1]
if testcvs is None:
testcvs = testcv
else:
testcvs.append(testcv)
print "Score for cross validation #{0}, score: {1}".format(i, score)
print "Features importance"
features_list = []
for j, importance in enumerate(cfr.feature_importances_):
if importance > 0.0:
column = features.columns[j]
features_list.append((column, importance))
features_list = sorted(features_list, key=lambda x: x[1], reverse=True)
for j, tup in enumerate(features_list):
print j, tup
pickle.dump(features_list, open("important_features.p", 'wb'))
print "Average Accuracy: {0}".format(float(score_sum)/float(num_cv))
return testcvs
示例5: test_parallel
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def test_parallel():
"""Check parallel computations."""
# Classification
forest = RandomForestClassifier(n_estimators=10, n_jobs=3, random_state=0)
forest.fit(iris.data, iris.target)
assert_true(10 == len(forest))
forest.set_params(n_jobs=1)
y1 = forest.predict(iris.data)
forest.set_params(n_jobs=2)
y2 = forest.predict(iris.data)
assert_array_equal(y1, y2)
# Regression
forest = RandomForestRegressor(n_estimators=10, n_jobs=3, random_state=0)
forest.fit(boston.data, boston.target)
assert_true(10 == len(forest))
forest.set_params(n_jobs=1)
y1 = forest.predict(boston.data)
forest.set_params(n_jobs=2)
y2 = forest.predict(boston.data)
assert_array_almost_equal(y1, y2, 3)
# Use all cores on the classification dataset
forest = RandomForestClassifier(n_jobs=-1)
forest.fit(iris.data, iris.target)
示例6: cross_val_warm
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def cross_val_warm(clf_name, X, y, n_estimators_grid=range(10, 500, 50), *params, **kwargs):
if "sklearn" in str(type(clf_name)):
c = clf_name
if clf_name == "random":
c = RandomForestClassifier(warm_start=True, oob_score=True, n_estimators=600, n_jobs=-1, *params, **kwargs)
elif clf_name == "bag":
c = BaggingClassifier(base_estimator=MultinomialNB(alpha=0.5, *params, **kwargs), n_estimators=100, n_jobs=-1, *params, **kwargs)
if clf_name == "extra":
c = ExtraTreesClassifier(*params, warm_start=True, **kwargs)
for n_est in np.sort(n_estimators_grid):
c.set_params(n_estimators=n_est)
c.fit(X, y)
print(str(n_est)+"\t"+str(c.oob_score_))
return c
示例7: load_and_test
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def load_and_test(token, test_data, num_classes, result):
"""Load and test
Args:
token (:obj:`str`): token representing this run
test_data (:obj:`tuple` of :obj:`numpy.array`): Tuple of testing feature and label
num_classes (:obj:`int`): Number of classes
result (:obj:`pyActLearn.performance.record.LearningResult`): LearningResult object to hold learning result
"""
model = RandomForestClassifier(n_estimators=20, criterion="entropy")
model.set_params(result.get_record_by_key(token)['model'])
# Test
predicted_y = model.predict(test_data[0])
predicted_proba = model.predict_proba(test_data[0])
return predicted_y, predicted_proba
示例8: submission
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def submission(test_values, train_values, train_labels):
X_train = train_values[train_values.columns.difference(['id'])]
y_train = train_labels["status_group"]
rf = RandomForestClassifier()
rf.set_params(**getBestParams(X_train, y_train))
rf.fit(X_train, y_train)
X_test = test_values[test_values.columns.difference(['id'])]
y_predict = rf.predict(X_test)
submission = pd.DataFrame(data=y_predict, # values
index=test_values["id"], # 1st column as index
columns=["status_group"]) # 1st row as the column names
submission.to_csv("../data/submission.csv")
示例9: find_best_value_for_parameter
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def find_best_value_for_parameter(X, y, other_parameter_values,
parameter_name,
first_level_values,
second_level_values):
grid = {parameter_name: first_level_values}
clf = RandomForestClassifier()
clf.set_params(**other_parameter_values)
grid_search = GridSearchCV(estimator = clf, param_grid = grid, scoring='roc_auc', cv=5, verbose=100)
grid_search.fit(X, y)
ind = find_index(grid_search.best_params_[parameter_name], first_level_values)
if(ind == -1):
return grid_search.best_params_[parameter_name]
else:
grid = {parameter_name: second_level_values[ind]}
grid_search = GridSearchCV(estimator = clf, param_grid = grid, scoring='roc_auc', cv=5, verbose=100)
grid_search.fit(X, y)
return grid_search.best_params_[parameter_name]
示例10: rfcTuning
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def rfcTuning(self, pX, change = 3):
n = pX.shape[0]
rfc = RandomForestClassifier()
best_auc = 0
best_param = None
for i in range( change ):
randp = np.random.random_sample(2)
param = {
'n_estimators': 50+int(100 * randp[0]),
'min_samples_split': 800+int(2500 * randp[1]),
'random_state': 2016,
'class_weight':'balanced'
}
rfc.set_params(**param)
auc = cross_val_score(rfc, pX, self.y, scoring='roc_auc').mean()
if auc > best_auc:
best_auc = auc
best_param = param
print 'random forest ' + str(best_auc)
return best_auc, RandomForestClassifier(**best_param)
示例11: variance_exercise3
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def variance_exercise3():
apikey = 'ca2397ea8a2cdd9707ef39d76576e786'
connector = APIConnector(apikey=apikey)
dataset = connector.download_dataset(44)
X, y, attribute_names = dataset.get_dataset(target=dataset.default_target_attribute, return_attribute_names=True)
kf = cross_validation.ShuffleSplit(len(X),n_iter=10, test_size=0.1, train_size=0.9, random_state=0)
total_variance = []
variance_fold = []
lst = [int(math.pow(2, i)) for i in range(0, 8)]
clf = RandomForestClassifier(oob_score=True,
max_features="auto",
random_state=0)
for i in lst:
variance_fold = []
clf.set_params(n_estimators=i)
for train_index, test_index in kf:
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
clf.fit(X_train, y_train)
predicted_elements = clf.predict(X_test)
# for i in range(0, len(y_test)):
variance_fold.append( predicted_elements )
total_variance.append( np.array(variance_fold).var() )
plt.style.use('ggplot')
plt.plot(lst, total_variance, '#009999', marker='o')
plt.xticks(lst)
plt.margins(0.02)
plt.xlabel('number of trees')
plt.ylabel('Variance')
plt.show()
示例12: train_rf
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def train_rf(train_X, train_y, dev_X, dev_y):
conf = get_config()
# Normalize data
scaler = StandardScaler()
if conf['normalize']:
train_X = scaler.fit_transform(train_X)
dev_X = scaler.transform(dev_X)
# Explore param classifier
clsf = RandomForestClassifier(random_state=0, n_jobs=8)
n_trees_opts = eval(conf['n_trees_opts'])
scores = np.zeros_like(n_trees_opts)
for i, n_trees in enumerate(n_trees_opts):
clsf.set_params(n_estimators=n_trees).fit(train_X, train_y)
pred_y = clsf.predict(dev_X)
pr, rc, f1, s = precision_recall_fscore_support(
dev_y, pred_y, average='micro')
scores[i] = f1
best_n_trees = n_trees_opts[scores.argmax()]
clsf.set_params(n_estimators=best_n_trees).fit(
np.vstack((train_X, dev_X)), np.hstack((train_y, dev_y)))
return clsf, scaler
示例13: exercise_1
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def exercise_1():
#connect to openml api
apikey = 'ca2397ea8a2cdd9707ef39d76576e786'
connector = APIConnector(apikey=apikey)
dataset = connector.download_dataset(44)
X, y, attribute_names = dataset.get_dataset(target=dataset.default_target_attribute, return_attribute_names=True)
error = []
lst = [int(math.pow(2, i)) for i in range(0, 8)]
# lst_2 = [i for i in range(1, 200)]
#train the classifier
clf = RandomForestClassifier(oob_score=True,
max_features="auto",
random_state=0)
#loop estimator parameter
for i in lst:
clf.set_params(n_estimators=i)
clf.fit(X, y)
error.append(1 - clf.oob_score_)
#plot
plt.style.use('ggplot')
plt.scatter(lst, error)
plt.xticks(lst)
plt.show()
示例14: bias_exercise3
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def bias_exercise3():
#connect to openml api
apikey = 'ca2397ea8a2cdd9707ef39d76576e786'
connector = APIConnector(apikey=apikey)
dataset = connector.download_dataset(44)
X, y, attribute_names = dataset.get_dataset(target=dataset.default_target_attribute, return_attribute_names=True)
kf = cross_validation.ShuffleSplit(len(X),n_iter=10, test_size=0.1, train_size=0.9, random_state=0)
error = []
error_mean = []
lst = [int(math.pow(2, i)) for i in range(0, 8)]
clf = RandomForestClassifier(oob_score=True,
max_features="auto",
random_state=0)
for i in lst:
error_mean = []
clf.set_params(n_estimators=i)
for train_index, test_index in kf:
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
clf.fit(X_train, y_train)
predicted_elements = clf.predict(X_test)
for i in range(0, len(y_test)):
error_mean.append( (y_test[i] - predicted_elements[i])*(y_test[i] - predicted_elements[i]) )
error.append( np.array(error_mean).mean() )
plt.style.use('ggplot')
plt.plot(lst, error, '#009999', marker='o')
plt.xticks(lst)
plt.margins(0.02)
plt.xlabel('number of trees')
plt.ylabel('Bias Squared')
plt.show()
示例15: __init__
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
class ForestParallel:
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
estimators = []
def __init__(self, n_cores=1, n_estimators=10, total_estimators=10,
criterion='gini', min_samples_split=2):
"""Initialize the parallel random forest."""
self.n_cores = n_cores
self.n_estimators = n_estimators #num trees to calculate at one time
self.total_estimators = total_estimators #used for master-slave
#load-balancing - should be divisible by n_estimators
self.criterion = criterion
self.forest = Forest(n_estimators=n_estimators, criterion=criterion,
min_samples_split=min_samples_split)
def fit(self, X, y):
"""Train the random forest in parallel."""
#distribute fitting task and gather all the estimators to all cores
self.forest.fit(X, y)
#TODO: decide between gather and allgather
self.estimators = self.comm.allgather(self.forest.estimators_)
#flatten list
self.estimators = [tree for sublist in self.estimators for tree in sublist]
self.forest.estimators_ = self.estimators
return self
def fitBalanced(self, X, y):
"""Train the random forest in parallel using load-balancing."""
#fit using master-slave paradigm for load-balancing
#gather all estimators to all cores
if self.rank==0:
self.master(X, y)
else:
self.slave(X, y)
return self
def master(self, X, y):
"""Dynamically assign work to other cores for training random forest."""
status = MPI.Status()
estimators = []
temp = None #buffer for estimators from single core
#send out initial tasks to slaves
for i in xrange(1,self.size):
self.comm.send(1, dest=i)
#train a single tree to set the forest parameters for convenience
self.forest.set_params(n_estimators=1)
self.forest.fit(X, y)
self.forest.set_params(n_estimators=self.total_estimators)
while(len(estimators) < self.total_estimators):
temp = None
temp = self.comm.recv(temp, MPI.ANY_SOURCE, MPI.ANY_TAG, status)
self.comm.send(1, dest=status.source) #send next task
estimators.extend(temp) #add estimators to total list
#close slaves by sending -1
for i in xrange(1,self.size):
self.comm.send(-1, dest=i)
#TODO: Bug: other cores don't successfully close because they are waiting
#to send back their newest forest
self.estimators = estimators
self.forest.estimators_ = self.estimators
return self
def slave(self, X, y):
"""Train a subset of the random forest."""
while(True):
ind = self.comm.recv(source=0)
#print ind
if ind==-1:
return self
self.forest.fit(X, y)
self.comm.send(self.forest.estimators_,dest=0,tag=ind)
return self
def predict(self, X):
"""Make predictions on just one core."""
if self.rank==0:
predictions = self.forest.predict(X)
return predictions
return None
def predictPar(self, X):
#predictions using all the cores
#TODO: Finish
if self.rank==0:
estimators = self.comm.scatter(self.forest.estimators_)
self.forest.estimators_ = estimators
predictions = self.forest.predict(X)
if self.rank==0:
predictions = self.comm.gather(predictions)
#.........这里部分代码省略.........