当前位置: 首页>>代码示例>>Python>>正文


Python RandomForestClassifier.fit方法代码示例

本文整理汇总了Python中sklearn.ensemble.RandomForestClassifier.fit方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestClassifier.fit方法的具体用法?Python RandomForestClassifier.fit怎么用?Python RandomForestClassifier.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.RandomForestClassifier的用法示例。


在下文中一共展示了RandomForestClassifier.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: randomforest

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def randomforest(df1,df2):
	
	
	newsT=df1.L
	L= ['L']
	for x in L:
	 	del df1[x]
	news=df1
	TRAINING=df1.as_matrix(columns=None)
	TEST=newsT.as_matrix(columns=None)
	
	newsT=df2['L']
	L= ['L']
	for x in L:
	 	del df2[x]
	X_test=df2.as_matrix(columns=None)
	y_test=newsT.as_matrix(columns=None)

	clf = RandomForestClassifier(n_estimators=200)
	clf.fit(TRAINING, TEST)
	y_pred1 = clf.predict_proba(X_test)[:, 1]
	y_pred = clf.predict(X_test)
	recall_score(y_test, y_pred)
	precision_score(y_test, y_pred)
	precision_score(y_test, y_pred,pos_label=0)
	recall_score(y_test, y_pred,pos_label=0)
	roc_auc_score(y_test, y_pred1)
	print 'roc: ',roc_auc_score(y_test, y_pred1)
	print 'precision: ',precision_score(y_test, y_pred)
	print 'recall:', recall_score(y_test, y_pred)
	print 'precision Negatives: ',precision_score(y_test, y_pred,pos_label=0)
	print 'recall Negatives: ', recall_score(y_test, y_pred,pos_label=0)
	
	return roc_auc_score(y_test, y_pred1),precision_score(y_test, y_pred),recall_score(y_test, y_pred),precision_score(y_test, y_pred,pos_label=0), recall_score(y_test, y_pred,pos_label=0)
开发者ID:omedranoc,项目名称:ThesisPreprocessing,代码行数:36,代码来源:pruebalgorithm.py

示例2: TrainRandomForestVariance

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def TrainRandomForestVariance(p_subject, p_save):
	print "Welcome to TrainRandomForestVariance(" + p_subject + ", " + str(p_save) + ")"
	training_data_raw = pd.read_pickle(input_data_paths[p_subject])
	training_data = training_data_raw[["variance" in x or "classification" in x for x in training_data_raw.index]]

	# Ictal vs interictal
	forest_seizure = RandomForestClassifier(n_estimators = 500, n_jobs = 1, max_features="sqrt", max_depth=None, min_samples_split=1)
	y_seizure = [1 * (x > 0) for x in training_data.T["classification"]]
	forest_seizure.fit(training_data[:-1].T, y_seizure)

	# IctalA vs IctalB
	forest_early = RandomForestClassifier(n_estimators = 500, n_jobs = 1, max_features="sqrt", max_depth=None, min_samples_split=1)
	y_early = [1 * (x == 2) for x in training_data.T["classification"]]
	forest_early.fit(training_data[:-1].T, y_early)

	# Save models
	if p_save:
		saved_files = joblib.dump(forest_seizure, "RFV_" + p_subject + "_seizure.pkl")
		for saved_file in saved_files:
			os.system("mv " + saved_file + " /Users/dryu/Documents/DataScience/Seizures/data/models")
		saved_files = joblib.dump(forest_early, "RFV_" + p_subject + "_early.pkl")
		for saved_file in saved_files:
			os.system("mv " + saved_file + " /Users/dryu/Documents/DataScience/Seizures/data/models")

	return {"seizure":forest_seizure, "early":forest_early}
开发者ID:DryRun,项目名称:seizures,代码行数:27,代码来源:main.py

示例3: __init__

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
 def __init__(self, data, classes, tree_features, n_trees=100):
     self.n_features = np.shape(data)[1]
     n_rows = np.shape(data)[0]
     n_nans = np.sum(np.isnan(data), 0)
     data = data[:, n_nans < n_rows]
     self.n_features = np.shape(data)[1]
     
     n_nans = np.sum(np.isnan(data), 1)
     data = data[n_nans < self.n_features, :]
     self.n_rows = np.shape(data)[0]
     
     if (tree_features > self.n_features):
         tree_features = self.n_features
     
     self.col_list = np.zeros((n_trees, tree_features), dtype='int')
     self.n_trees = n_trees
     self.bags = []
     for i in range(n_trees):
         cols = sample(range(self.n_features), tree_features)
         cols.sort()
         self.col_list[i, :] = cols
         data_temp = data[:, cols]
         n_nans = np.sum(np.isnan(data_temp), 1)
         data_temp = data_temp[n_nans == 0, :]
         classes_temp = classes[n_nans == 0]
         #bag = BaggingClassifier(n_estimators=1, max_features=tree_features)
         bag = RandomForestClassifier(n_estimators=1, max_features=tree_features)
         bag.fit(data_temp, classes_temp)
         self.bags.append(bag)
         print(np.shape(data_temp))
开发者ID:Niederb,项目名称:python_machine_learning,代码行数:32,代码来源:half_random_forest.py

示例4: buildTreeClassifier

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def buildTreeClassifier(predictorColumns, structurestable = 'structures.csv',  targetcolumn = 'pointGroup', md = None):
    """
    Build a random forest-classifier model to predict some structure feature from compositional data.  Will return the model trained on all data, a confusion matrix calculated , and an average accuracy score. Also returns a label encoder object
    """
    df = pd.read_csv(structurestable)
    df = df.dropna()
    if('fracNobleGas' in df.columns):
        df = df[df['fracNobleGas'] <= 0]
    
    s = StandardScaler()
    le = LabelEncoder()
    
    X = s.fit_transform(df[predictorColumns].astype('float64'))
    y = le.fit_transform(df[targetcolumn].values)

    rfc = RandomForestClassifier(max_depth = md)
    acc = mean(cross_val_score(rfc, X, y))

    X_train, X_test, y_train, y_test = train_test_split(X,y)
    rfc.fit(X_train,y_train)
    y_predict = rfc.predict(X_test)
    cm = confusion_matrix(y_test, y_predict)
    
    cm = pd.DataFrame(cm, columns=le.classes_, index=le.classes_)

    rfc.fit(X, y)

    return rfc, cm, round(acc,2), le
开发者ID:rhsimplex,项目名称:matprojgeom,代码行数:30,代码来源:modelbuilder.py

示例5: random_forest_classify

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def random_forest_classify(train_data,train_label,test_data):
    rf = RandomForestClassifier(n_estimators=100)
    rf.fit(train_data, ravel(train_label))
    test_label=rf.predict(test_data)
    
    save_result(test_label,'sklearn_random_forest_classify_Result.csv')  
    return test_label 
开发者ID:fzhurd,项目名称:fzwork,代码行数:9,代码来源:digit_recognizer_main_v4h.py

示例6: run

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def run():
    mean_acc = 0.0
    mean_logloss = 0.0
    skf, X_all, labels = gen_cv()
    for fold, (test_index, train_index) in enumerate(skf, start=1):
        logger.info('at fold: {0}'.format(fold))
        logger.info('train samples: {0}, test samples: {1}'.format(len(train_index), len(test_index)))
        X_train, X_test = X_all[train_index], X_all[test_index]
        y_train, y_test = labels[train_index], labels[test_index]
        rfc = RandomForestClassifier(n_jobs=10, random_state=919)
        rfc.fit(X_train, y_train)
        y_test_predicted = rfc.predict(X_test)
        y_test_proba = rfc.predict_proba(X_test)
        # equals = y_test == y_test_predicted
        # acc = np.sum(equals) / float(len(equals))
        acc = accuracy_score(y_test, y_test_predicted)
        logger.info('test data predicted accuracy: {0}'.format(acc))
        # log loss -log P(yt|yp) = -(yt log(yp) + (1 - yt) log(1 - yp))
        logloss = log_loss(y_test, y_test_proba)
        logger.info('log loss at test data: {0}'.format(logloss))
        # logger.info('log loss at test data using label: {0}'.format(log_loss(y_test, y_test_predicted)))
        mean_acc += acc
        mean_logloss += logloss

    n_folds = skf.n_folds
    logger.info('mean acc: {0}'.format(mean_acc / n_folds))
    logger.info('mean log loss: {0}'.format(mean_logloss / n_folds))
开发者ID:junfenglx,项目名称:skip-thoughts,代码行数:29,代码来源:eval_snli_dataset.py

示例7: cls_create

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
 def cls_create(xs, ys):
     
     if algo == "SVM":
         classifier = svm.SVC(C = self.parm, probability=True)
         
     elif algo == "RF":
         classifier = RandomForestClassifier(n_estimators = int(self.parm), criterion='entropy',  n_jobs = 1)
     #
     #classifier = LDA()
     
     new_xs = xs
     
     """
     positive_count = len([y for y in ys if y > 0])
     if positive_count >= 20:
     
         #self.selector = svm.LinearSVC(C = 1, dual = False, penalty="l1")
         self.selector = LDA()
         new_xs = self.selector.fit_transform(xs, ys)
     else:
         self.selector = None
     """
     
     classifier.fit(new_xs, ys)
     probs = classifier.predict_proba(new_xs)            
     
     #self.pclassifier = svm.SVC(parm_val = 1.0)
     #self.pclassifier.fit(probs, ys)
     
     self.threshold, self.positive, self.negative = best_threshold_for_f1(probs, 20, ys)
     return classifier
开发者ID:simonhughes22,项目名称:PythonNlpResearch,代码行数:33,代码来源:Codes_ClassifyUsingVectorComposition_WordSpace.py

示例8: main

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def main():

    S, col_names_S = load_data(config.paths.training_data,
                               config.paths.cache_folder)
    Xs, Ys, col_names_S = extract_xy(S, col_names_S)

    a = RandomForestClassifier(n_estimators=1)
    a.fit(Xs.toarray(), Ys.toarray().ravel())
    best_features = a.feature_importances_
    max_ind, max_val = max(enumerate(best_features), key=operator.itemgetter(1))
    print best_features
    print max_ind, max_val

    print Xs.shape
    print Ys.shape
    param_range = [1, 3, 5, 7, 10, 15, 20, 30, 60, 80]
    train_scores, test_scores = validation_curve(RandomForestClassifier(criterion='entropy'), Xs, Ys.toarray().ravel(),
                                                 'n_estimators', param_range)

    print train_scores
    print test_scores
    train_mean = np.mean(train_scores, axis=1)
    train_std = np.std(train_scores, axis=1)
    test_mean = np.mean(test_scores, axis=1)
    test_std = np.std(test_scores, axis=1)

    plt.title("Validation Curve for Random Forest")
    plt.xlabel("Number of Trees")
    plt.ylabel("Score")
    plt.plot(param_range, train_mean, label="Training Score", color='r')
    plt.fill_between(param_range, train_mean - train_std, train_mean + train_std, alpha=0.2, color='r')
    plt.plot(param_range, test_mean, label="Test Score", color='b')
    plt.fill_between(param_range, test_mean - test_std, test_mean + test_std, alpha=0.2, color='b')
    plt.legend(loc="best")
    plt.show()
开发者ID:rmunoz12,项目名称:ml-kaggle-2016,代码行数:37,代码来源:plot_rf_cf.py

示例9: Random_Forest_classifier

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def Random_Forest_classifier(train_input_data,train_output_data,test_input_data,test_output_data):
    tree_list = []
    accuracy_percent = []
    for trees in range(10,200,10):
        clf = RandomForestClassifier(trees)
        clf.fit(train_input_data,train_output_data)
        predicted_output = clf.predict(test_input_data)
        error_list = []
        if isinstance(predicted_output,list) ==False:
            predicted_output = predicted_output.tolist()
        if isinstance(test_output_data,list) ==False:
            test_output_data = test_output_data.tolist()
        for i in range(len(test_output_data)):
            cur_univ_similarities =  similar_univs[similar_univs['univName'] == predicted_output[i]]
            cur_univ_similarity_list = cur_univ_similarities.values.tolist()
            cur_univ_similarity_list = [item for sublist in cur_univ_similarity_list for item in sublist]
            if test_output_data[i] in cur_univ_similarity_list[1:]:
                error_list.append(0)
            else:
                error_list.append(1)
        tree_list.append(trees)
        accuracy_percent.append(100 -((sum(error_list)/float(len(error_list))) * 100))
    tree_list = np.array(tree_list)
    accuracy_percent = np.array(accuracy_percent)
    plt.plot(tree_list,accuracy_percent)
    plt.xlabel('Number of trees')
    plt.ylabel('Percent of accuracy')
    plt.title('Varation of accuracy with trees')
    plt.grid(True)
    plt.savefig("rf1.png")
    plt.show()
    return predicted_output
开发者ID:aditya-sureshkumar,项目名称:University-Recommendation-System,代码行数:34,代码来源:model_building.py

示例10: rand_forest

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def rand_forest(train_bow,train_labels,test_bow,test_labels,bow_indexes):
    print("Training rndForest")
    rf_classifier=RandomForestClassifier()

    rf_classifier.fit(train_bow,train_labels)
    print("Testing rndForest")
    test(rf_classifier,"rf",test_bow,test_labels,bow_indexes)
开发者ID:wangk1,项目名称:research,代码行数:9,代码来源:classifiers_func.py

示例11: randomForest_eval_func

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
 def randomForest_eval_func(self, chromosome):
     n_estimators, max_features, window_size = self.decode_chromosome(chromosome)
     if self.check_log(n_estimators, max_features, window_size):
         return self.get_means_from_log(n_estimators, max_features, window_size)[0]
     folded_dataset = self.create_folded_dataset(window_size)
     indim = 21 * (2 * window_size + 1)
     mean_AUC = 0
     mean_decision_value = 0
     mean_mcc = 0
     sample_size_over_thousand_flag = False
     for test_fold in xrange(self.fold):
         test_labels, test_dataset, train_labels, train_dataset = folded_dataset.get_test_and_training_dataset(test_fold)
         if len(test_labels) + len(train_labels) > 1000:
             sample_size_over_thousand_flag = True
         clf = RandomForestClassifier(n_estimators=n_estimators, max_features=max_features)
         clf.fit(train_dataset, train_labels)
         probas = clf.predict_proba(test_dataset)
         decision_values = map(lambda x: x[1], probas) # Probability of being binding residue
         AUC, decision_value_and_max_mcc = validate_performance.calculate_AUC(decision_values, test_labels)
         mean_AUC += AUC
         mean_decision_value += decision_value_and_max_mcc[0]
         mean_mcc += decision_value_and_max_mcc[1]
         if sample_size_over_thousand_flag:
             break
     if not sample_size_over_thousand_flag:
         mean_AUC /= self.fold
         mean_decision_value /= self.fold
         mean_mcc /= self.fold
     self.write_log(n_estimators, max_features, window_size, mean_AUC, mean_decision_value, mean_mcc)
     self.add_log(n_estimators, max_features, window_size, mean_AUC, mean_decision_value, mean_mcc)
     return mean_AUC
开发者ID:clclcocoro,项目名称:MLwithGA,代码行数:33,代码来源:cross_validation.py

示例12: __init__

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
class Model:
    """Abstraction for gibberish model. Two methods: fit and predict."""    
    def __init__(self, X, y, ntrees=500):
        """Get data and fit model."""
        self.clf = RandomForestClassifier(n_estimators=ntrees)
        self.ntrees = ntrees
        self.clf = self.clf.fit(X, y)
        self.version = 0

    def fit(self, X, y):
        """Updates model with data X, y."""
        self.clf = RandomForestClassifier(n_estimators=self.ntrees)
        self.clf = self.clf.fit(X, y)
        print("updating model from " + str(self.version) + " to " + str(self.version + 1) + ".")
        self.version += 1
        return(self)

    def predict(self, X):
        """Predict classification for X"""
        prediction = self.clf.predict(X)
        print("using version " + str(self.version))
        return(prediction)
        
    def __repr__(self):
        return("<Model(version='%s')>" % (self.version))
开发者ID:talkdatatome,项目名称:gibberish,代码行数:27,代码来源:model.py

示例13: fit

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
    def fit(self, x, y):
        models = []
        preds = np.zeros((len(x), self.n_channels + self.n_features))

        # create channel based models
        for i in xrange(self.n_channels):
            print('training channel model {}'.format(i))
            model = LogisticRegression()
            feats = x[:, (i * self.n_features):((i + 1) * self.n_features)]
            model.fit(feats, y)
            models.append(model)
            preds[:, i] = model.predict(feats)

        # create band based models
        for i in xrange(self.n_features):
            print('training band model {}'.format(i))
            model = LogisticRegression()
            feats = x[:, i:(self.n_channels * self.n_features):self.n_features]
            model.fit(feats, y)
            models.append(model)
            preds[:, self.n_channels + i] = model.predict(feats)

        # create integrating forest
        top_classifier = RandomForestClassifier()
        top_classifier.fit(preds, y)

        self.models = models
        self.c = top_classifier
开发者ID:yueranyuan,项目名称:vector_edu,代码行数:30,代码来源:ensemble.py

示例14: onescore

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def onescore(X, Y, Xtest):
    clf = RandomForestClassifier(oob_score=True, n_jobs=-1, n_estimators=1000, max_features=300, random_state=0)
    clf.fit(X, Y)
    print "oob_score = ", clf.oob_score_
    print clf.get_params()
    ytest = clf.predict(Xtest)
    output(ytest, "try_004.csv")
开发者ID:kbai,项目名称:us,代码行数:9,代码来源:try_004.py

示例15: buildModel

# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def buildModel(df):
	train_y = df['arr_del15'][:train_len]
	train_x = df[cols][:train_len]

	# transform categorical features
	train_x['unique_carrier'] = pd.factorize(train_x['unique_carrier'])[0]
	train_x['dep_conditions'] = pd.factorize(train_x['dep_conditions'])[0]
	train_x['arr_conditions'] = pd.factorize(train_x['arr_conditions'])[0]
	
	pd.set_option('display.max_rows', 500)
	print(train_x)

	# train_x['origin'] = pd.factorize(train_x['origin'])[0]
	#	train_x['dest'] = pd.factorize(train_x['dest'])[0]
	# print(train_x)
	train_x = enc.fit_transform(train_x)
	print(train_x.shape)

	# Create Random Forest classifier with 50 trees
	clf_rf = RandomForestClassifier(n_estimators=50, n_jobs=-1)
	clf_rf.fit(train_x.toarray(), train_y)

	del train_x, train_y
	print("Model built")
	return clf_rf
开发者ID:nora-lu,项目名称:Predict-Airline-Delay,代码行数:27,代码来源:modeling.py


注:本文中的sklearn.ensemble.RandomForestClassifier.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。