当前位置: 首页>>代码示例>>Python>>正文


Python RandomForestClassifier.set_params方法代码示例

本文整理汇总了Python中sklearn.ensemble.RandomForestClassifier.set_params方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestClassifier.set_params方法的具体用法?Python RandomForestClassifier.set_params怎么用?Python RandomForestClassifier.set_params使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.RandomForestClassifier的用法示例。


在下文中一共展示了RandomForestClassifier.set_params方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: exercise_2

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def exercise_2():
    #connect to openml api
    apikey = 'ca2397ea8a2cdd9707ef39d76576e786'
    connector = APIConnector(apikey=apikey)
    dataset = connector.download_dataset(44)
    X, y, attribute_names = dataset.get_dataset(target=dataset.default_target_attribute, return_attribute_names=True)

    kf = cross_validation.KFold(len(X), n_folds=10, shuffle=False, random_state=0)
    error = []
    error_mean = []
    lst = [int(math.pow(2, i)) for i in range(0, 8)]
    clf = RandomForestClassifier(oob_score=True,
                                   max_features="auto",
                                   random_state=0)
    for i in lst:
        error_mean = []
        for train_index, test_index in kf:
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            clf.set_params(n_estimators=i)
            clf.fit(X_train, y_train)
            error_mean.append( zero_one_loss(y_test, clf.predict(X_test)) )
        error.append( np.array(error_mean).mean() )
    #plot
    plt.style.use('ggplot')
    plt.plot(lst, error, '#009999', marker='o')
    plt.xticks(lst)
    plt.show()
开发者ID:rodmendozam,项目名称:Ass3_foundations,代码行数:30,代码来源:random_forest.py

示例2: train_random_forest_with_params

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def train_random_forest_with_params(X, y, params):
    model = RandomForestClassifier()
    model.set_params(params)
    model = model.fit(X, y)
    score = model.score(X, y)
    print "Model Trainning Score: %s" % score
    return model
开发者ID:21zhouyun,项目名称:KaggleOCR,代码行数:9,代码来源:random_forest_util.py

示例3: gridtrainfraction

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def gridtrainfraction(trainiter, rfparams):
    ''' read in data once for grid search, clear, then again for model fit'''
    train = fractionate(trainiter, fraction=0.002)
    clf = RandomForestClassifier(**rfparams)
    grid = GridSearchCV(clf, param_grid=gridparams, scoring='log_loss', n_jobs=1)
    X_train = train.drop('hotel_cluster', axis=1)
    X = sparsify(pd.get_dummies(X_train.astype(str)))
    y = train['hotel_cluster']
    grid.fit(X,y)
    
    print(grid.best_params_)
    print(grid.grid_scores_)
    
    train = None
    X_train = None
    X = None
    y = None
    clf = None
    
    train = fractionate(trainiter, fraction=0.01)
    X_train = train.drop('hotel_cluster', axis=1)
    X = sparsify(pd.get_dummies(X_train.astype(str)))
    y = train['hotel_cluster']
    bestparams = grid.best_params_
    clf = RandomForestClassifier(**rfparams)
    clf.set_params(**bestparams)
    clf.fit(X,y)
    return clf
开发者ID:dbricare,项目名称:kaggle,代码行数:30,代码来源:clf.py

示例4: random_forest_cross_validate

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def random_forest_cross_validate(targets, features, nprocesses=-1):
    num_cv = 5
    cv = cross_validation.KFold(len(features), k=num_cv, indices=False)
    #iterate through the training and test cross validation segments and
    #run the classifier on each one, aggregating the results into a list
    score_sum = 0.0
    testcvs = None
    for i, (traincv, testcv) in enumerate(cv):
        cfr = RandomForestClassifier(
            n_estimators=100,
            max_features=None,
            verbose=0,
            compute_importances=True,
            n_jobs=nprocesses,
            random_state=0,
        )
        print "Fitting cross validation #{0}".format(i)
        cfr.fit(features[traincv], targets[traincv])
        print "Scoring cross validation #{0}".format(i)
        cfr.set_params(n_jobs=1)
        predicted = cfr.predict(features[testcv])
        p = cfr.predict_proba(features[testcv])
        score = cfr.score(features[testcv], targets[testcv])
        score_sum += score
        # add stuff to the dataframe so we can plot things
        summer = 0.0
        for j, pred in enumerate(predicted):
            print predicted[j], targets[testcv][j]
            summer += np.power((predicted[j] - targets[testcv][j]), 2)
        print "score error: {0}".format(np.sqrt(summer)/len(testcv))
        testcv = pd.DataFrame(features[testcv])
        testcv['prediction'] = np.nan
        testcv['prob'] = np.nan
        for j, (ix, row) in enumerate(testcv.iterrows()):
            print predicted[j], targets[testcv][j]
            testcv['prediction'].ix[ix] = predicted[j]
            if predicted[j] == 1:
                testcv['prob'].ix[ix] = p[j][0]
            else:
                testcv['prob'].ix[ix] = p[j][1]
        if testcvs is None:
            testcvs = testcv
        else:
            testcvs.append(testcv)
        print "Score for cross validation #{0}, score: {1}".format(i, score)
        print "Features importance"
        features_list = []
        for j, importance in enumerate(cfr.feature_importances_):
            if importance > 0.0:
                column = features.columns[j]
                features_list.append((column, importance))
        features_list = sorted(features_list, key=lambda x: x[1], reverse=True)
        for j, tup in enumerate(features_list):
            print j, tup
        pickle.dump(features_list, open("important_features.p", 'wb'))
    print "Average Accuracy: {0}".format(float(score_sum)/float(num_cv))
    return testcvs
开发者ID:iwonasado,项目名称:kaggle,代码行数:59,代码来源:process_ncaa_tourney.py

示例5: test_parallel

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def test_parallel():
    """Check parallel computations."""
    # Classification
    forest = RandomForestClassifier(n_estimators=10, n_jobs=3, random_state=0)

    forest.fit(iris.data, iris.target)
    assert_true(10 == len(forest))

    forest.set_params(n_jobs=1)
    y1 = forest.predict(iris.data)
    forest.set_params(n_jobs=2)
    y2 = forest.predict(iris.data)
    assert_array_equal(y1, y2)

    # Regression
    forest = RandomForestRegressor(n_estimators=10, n_jobs=3, random_state=0)

    forest.fit(boston.data, boston.target)
    assert_true(10 == len(forest))

    forest.set_params(n_jobs=1)
    y1 = forest.predict(boston.data)
    forest.set_params(n_jobs=2)
    y2 = forest.predict(boston.data)
    assert_array_almost_equal(y1, y2, 3)

    # Use all cores on the classification dataset
    forest = RandomForestClassifier(n_jobs=-1)
    forest.fit(iris.data, iris.target)
开发者ID:DaveYuan,项目名称:recommendersystem,代码行数:31,代码来源:test_forest.py

示例6: cross_val_warm

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def cross_val_warm(clf_name, X, y, n_estimators_grid=range(10, 500, 50), *params, **kwargs):
    if "sklearn" in str(type(clf_name)):
        c = clf_name
    if clf_name == "random":
        c = RandomForestClassifier(warm_start=True, oob_score=True, n_estimators=600, n_jobs=-1, *params, **kwargs)
    elif clf_name == "bag":
        c = BaggingClassifier(base_estimator=MultinomialNB(alpha=0.5, *params, **kwargs), n_estimators=100, n_jobs=-1, *params, **kwargs)
    if clf_name == "extra":
        c = ExtraTreesClassifier(*params, warm_start=True, **kwargs)
    for n_est in np.sort(n_estimators_grid):
        c.set_params(n_estimators=n_est)
        c.fit(X, y)
        print(str(n_est)+"\t"+str(c.oob_score_))
    return c
开发者ID:MartinDelzant,项目名称:AdvBigData,代码行数:16,代码来源:cross_val.py

示例7: load_and_test

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def load_and_test(token, test_data, num_classes, result):
    """Load and test

    Args:
        token (:obj:`str`): token representing this run
        test_data (:obj:`tuple` of :obj:`numpy.array`): Tuple of testing feature and label
        num_classes (:obj:`int`): Number of classes
        result (:obj:`pyActLearn.performance.record.LearningResult`): LearningResult object to hold learning result
    """
    model = RandomForestClassifier(n_estimators=20, criterion="entropy")
    model.set_params(result.get_record_by_key(token)['model'])
    # Test
    predicted_y = model.predict(test_data[0])
    predicted_proba = model.predict_proba(test_data[0])
    return predicted_y, predicted_proba
开发者ID:TinghuiWang,项目名称:pyActLearn,代码行数:17,代码来源:b1_randomforest.py

示例8: submission

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def submission(test_values, train_values, train_labels):
    X_train = train_values[train_values.columns.difference(['id'])]
    y_train = train_labels["status_group"]

    rf = RandomForestClassifier()
    rf.set_params(**getBestParams(X_train, y_train))
    rf.fit(X_train, y_train)

    X_test = test_values[test_values.columns.difference(['id'])]
    y_predict = rf.predict(X_test)

    submission = pd.DataFrame(data=y_predict,  # values
                              index=test_values["id"],  # 1st column as index
                              columns=["status_group"])  # 1st row as the column names

    submission.to_csv("../data/submission.csv")
开发者ID:nmoraesmunter,项目名称:pumpitup,代码行数:18,代码来源:pumpitup.py

示例9: find_best_value_for_parameter

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def find_best_value_for_parameter(X, y, other_parameter_values,
                                  parameter_name,
                                 first_level_values,
                                 second_level_values):
    grid = {parameter_name: first_level_values}
    clf = RandomForestClassifier()
    clf.set_params(**other_parameter_values)
    grid_search = GridSearchCV(estimator = clf, param_grid = grid, scoring='roc_auc', cv=5, verbose=100)
    grid_search.fit(X, y)
    ind = find_index(grid_search.best_params_[parameter_name], first_level_values)
    if(ind == -1):
        return grid_search.best_params_[parameter_name]
    else:
        grid = {parameter_name: second_level_values[ind]}
        grid_search = GridSearchCV(estimator = clf, param_grid = grid, scoring='roc_auc', cv=5, verbose=100)
        grid_search.fit(X, y)
        return grid_search.best_params_[parameter_name]
开发者ID:abhishekcs,项目名称:LML_HW3,代码行数:19,代码来源:hw3p3_rf.py

示例10: rfcTuning

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
	def rfcTuning(self, pX, change = 3):
		n = pX.shape[0]
		rfc = RandomForestClassifier()
		best_auc = 0
		best_param = None
		for i in range( change ):		
			randp = np.random.random_sample(2)
			param = {
				'n_estimators':  50+int(100 * randp[0]),
				'min_samples_split': 800+int(2500 * randp[1]),
				'random_state': 2016,
				'class_weight':'balanced'
			}
			rfc.set_params(**param)
			auc = cross_val_score(rfc, pX, self.y, scoring='roc_auc').mean()
			if auc > best_auc:
				best_auc = auc
				best_param = param
		print 'random forest ' + str(best_auc)
		return best_auc, RandomForestClassifier(**best_param)
开发者ID:Gnostikoi,项目名称:orange,代码行数:22,代码来源:model_generator.py

示例11: variance_exercise3

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def variance_exercise3():
    apikey = 'ca2397ea8a2cdd9707ef39d76576e786'
    connector = APIConnector(apikey=apikey)
    dataset = connector.download_dataset(44)
    X, y, attribute_names = dataset.get_dataset(target=dataset.default_target_attribute, return_attribute_names=True)


    kf = cross_validation.ShuffleSplit(len(X),n_iter=10, test_size=0.1, train_size=0.9, random_state=0)
    total_variance = []
    variance_fold = []
    lst = [int(math.pow(2, i)) for i in range(0, 8)]

    clf = RandomForestClassifier(oob_score=True,
                                   max_features="auto",
                                   random_state=0)


    for i in lst:
        variance_fold = []
        clf.set_params(n_estimators=i)
        for train_index, test_index in kf:
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]

            clf.fit(X_train, y_train)


            predicted_elements = clf.predict(X_test)

            # for i in range(0, len(y_test)):
            variance_fold.append( predicted_elements )
        total_variance.append( np.array(variance_fold).var() )

    plt.style.use('ggplot')
    plt.plot(lst, total_variance, '#009999', marker='o')
    plt.xticks(lst)
    plt.margins(0.02)
    plt.xlabel('number of trees')
    plt.ylabel('Variance')
    plt.show()
开发者ID:rodmendozam,项目名称:Ass3_foundations,代码行数:42,代码来源:random_forest.py

示例12: train_rf

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def train_rf(train_X, train_y, dev_X, dev_y):
    conf = get_config()
    # Normalize data
    scaler = StandardScaler()
    if conf['normalize']:
        train_X = scaler.fit_transform(train_X)
        dev_X = scaler.transform(dev_X)

    # Explore param classifier
    clsf = RandomForestClassifier(random_state=0, n_jobs=8)
    n_trees_opts = eval(conf['n_trees_opts'])
    scores = np.zeros_like(n_trees_opts)
    for i, n_trees in enumerate(n_trees_opts):
        clsf.set_params(n_estimators=n_trees).fit(train_X, train_y)
        pred_y = clsf.predict(dev_X)
        pr, rc, f1, s = precision_recall_fscore_support(
            dev_y, pred_y, average='micro')
        scores[i] = f1

    best_n_trees = n_trees_opts[scores.argmax()]
    clsf.set_params(n_estimators=best_n_trees).fit(
        np.vstack((train_X, dev_X)), np.hstack((train_y, dev_y)))
    return clsf, scaler
开发者ID:ejake,项目名称:tensor-factorization,代码行数:25,代码来源:misc.py

示例13: exercise_1

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def exercise_1():
    #connect to openml api
    apikey = 'ca2397ea8a2cdd9707ef39d76576e786'
    connector = APIConnector(apikey=apikey)
    dataset = connector.download_dataset(44)
    X, y, attribute_names = dataset.get_dataset(target=dataset.default_target_attribute, return_attribute_names=True)

    error = []
    lst = [int(math.pow(2, i)) for i in range(0, 8)]
    # lst_2 = [i for i in range(1, 200)]
    #train the classifier
    clf = RandomForestClassifier(oob_score=True,
                                   max_features="auto",
                                   random_state=0)
    #loop estimator parameter
    for i in lst:
        clf.set_params(n_estimators=i)
        clf.fit(X, y)
        error.append(1 - clf.oob_score_)
    #plot
    plt.style.use('ggplot')
    plt.scatter(lst, error)
    plt.xticks(lst)
    plt.show()
开发者ID:rodmendozam,项目名称:Ass3_foundations,代码行数:26,代码来源:random_forest.py

示例14: bias_exercise3

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
def bias_exercise3():
    #connect to openml api
    apikey = 'ca2397ea8a2cdd9707ef39d76576e786'
    connector = APIConnector(apikey=apikey)
    dataset = connector.download_dataset(44)
    X, y, attribute_names = dataset.get_dataset(target=dataset.default_target_attribute, return_attribute_names=True)


    kf = cross_validation.ShuffleSplit(len(X),n_iter=10, test_size=0.1, train_size=0.9, random_state=0)
    error = []
    error_mean = []
    lst = [int(math.pow(2, i)) for i in range(0, 8)]

    clf = RandomForestClassifier(oob_score=True,
                                   max_features="auto",
                                   random_state=0)
    for i in lst:
        error_mean = []
        clf.set_params(n_estimators=i)
        for train_index, test_index in kf:
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            clf.fit(X_train, y_train)
            predicted_elements = clf.predict(X_test)

            for i in range(0, len(y_test)):
                error_mean.append( (y_test[i] - predicted_elements[i])*(y_test[i] - predicted_elements[i])  )
        error.append( np.array(error_mean).mean() )

    plt.style.use('ggplot')
    plt.plot(lst, error, '#009999', marker='o')
    plt.xticks(lst)
    plt.margins(0.02)
    plt.xlabel('number of trees')
    plt.ylabel('Bias Squared')
    plt.show()
开发者ID:rodmendozam,项目名称:Ass3_foundations,代码行数:38,代码来源:random_forest.py

示例15: __init__

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import set_params [as 别名]
class ForestParallel:
  comm = MPI.COMM_WORLD
  rank = comm.Get_rank()
  size = comm.Get_size()
  estimators = []
  
  def __init__(self, n_cores=1, n_estimators=10, total_estimators=10, 
      criterion='gini', min_samples_split=2):
    """Initialize the parallel random forest."""
    self.n_cores = n_cores
    self.n_estimators = n_estimators #num trees to calculate at one time
    self.total_estimators = total_estimators #used for master-slave
      #load-balancing - should be divisible by n_estimators
    self.criterion = criterion
    self.forest = Forest(n_estimators=n_estimators, criterion=criterion,
      min_samples_split=min_samples_split)
  
  def fit(self, X, y):
    """Train the random forest in parallel."""
    #distribute fitting task and gather all the estimators to all cores
    self.forest.fit(X, y)
    #TODO: decide between gather and allgather
    self.estimators = self.comm.allgather(self.forest.estimators_)
    #flatten list
    self.estimators = [tree for sublist in self.estimators for tree in sublist]
    self.forest.estimators_ = self.estimators
    return self
  
  def fitBalanced(self, X, y):
    """Train the random forest in parallel using load-balancing."""
    #fit using master-slave paradigm for load-balancing
    #gather all estimators to all cores
    if self.rank==0:
      self.master(X, y)
    else:
      self.slave(X, y)
    return self
  
  def master(self, X, y):
    """Dynamically assign work to other cores for training random forest."""
    status = MPI.Status()
    estimators = []
    temp = None  #buffer for estimators from single core
    
    #send out initial tasks to slaves
    for i in xrange(1,self.size):
      self.comm.send(1, dest=i)
    
    #train a single tree to set the forest parameters for convenience
    self.forest.set_params(n_estimators=1)
    self.forest.fit(X, y)
    self.forest.set_params(n_estimators=self.total_estimators)

    while(len(estimators) < self.total_estimators):
      temp = None
      temp = self.comm.recv(temp, MPI.ANY_SOURCE, MPI.ANY_TAG, status)
      self.comm.send(1, dest=status.source) #send next task
      
      estimators.extend(temp) #add estimators to total list
    
    #close slaves by sending -1
    for i in xrange(1,self.size):
      self.comm.send(-1, dest=i)
    #TODO: Bug: other cores don't successfully close because they are waiting
    #to send back their newest forest
    
    self.estimators = estimators
    self.forest.estimators_ = self.estimators
    
    return self
  
  def slave(self, X, y):
    """Train a subset of the random forest."""
    while(True):
      ind = self.comm.recv(source=0)
      #print ind
      if ind==-1:
        return self
      
      self.forest.fit(X, y)

      self.comm.send(self.forest.estimators_,dest=0,tag=ind)
    return self
  
  def predict(self, X):
    """Make predictions on just one core."""
    if self.rank==0:
      predictions = self.forest.predict(X)
      return predictions
    return None
  
  def predictPar(self, X):
    #predictions using all the cores
    #TODO: Finish
    if self.rank==0:
      estimators = self.comm.scatter(self.forest.estimators_)
    self.forest.estimators_ = estimators
    predictions = self.forest.predict(X)
    if self.rank==0:
      predictions = self.comm.gather(predictions)
#.........这里部分代码省略.........
开发者ID:samuelkim314,项目名称:CS205-Final-Project,代码行数:103,代码来源:ForestParallel.py


注:本文中的sklearn.ensemble.RandomForestClassifier.set_params方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。