本文整理汇总了Python中sklearn.ensemble.RandomForestClassifier.fit方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestClassifier.fit方法的具体用法?Python RandomForestClassifier.fit怎么用?Python RandomForestClassifier.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.RandomForestClassifier
的用法示例。
在下文中一共展示了RandomForestClassifier.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: randomforest
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def randomforest(df1,df2):
newsT=df1.L
L= ['L']
for x in L:
del df1[x]
news=df1
TRAINING=df1.as_matrix(columns=None)
TEST=newsT.as_matrix(columns=None)
newsT=df2['L']
L= ['L']
for x in L:
del df2[x]
X_test=df2.as_matrix(columns=None)
y_test=newsT.as_matrix(columns=None)
clf = RandomForestClassifier(n_estimators=200)
clf.fit(TRAINING, TEST)
y_pred1 = clf.predict_proba(X_test)[:, 1]
y_pred = clf.predict(X_test)
recall_score(y_test, y_pred)
precision_score(y_test, y_pred)
precision_score(y_test, y_pred,pos_label=0)
recall_score(y_test, y_pred,pos_label=0)
roc_auc_score(y_test, y_pred1)
print 'roc: ',roc_auc_score(y_test, y_pred1)
print 'precision: ',precision_score(y_test, y_pred)
print 'recall:', recall_score(y_test, y_pred)
print 'precision Negatives: ',precision_score(y_test, y_pred,pos_label=0)
print 'recall Negatives: ', recall_score(y_test, y_pred,pos_label=0)
return roc_auc_score(y_test, y_pred1),precision_score(y_test, y_pred),recall_score(y_test, y_pred),precision_score(y_test, y_pred,pos_label=0), recall_score(y_test, y_pred,pos_label=0)
示例2: TrainRandomForestVariance
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def TrainRandomForestVariance(p_subject, p_save):
print "Welcome to TrainRandomForestVariance(" + p_subject + ", " + str(p_save) + ")"
training_data_raw = pd.read_pickle(input_data_paths[p_subject])
training_data = training_data_raw[["variance" in x or "classification" in x for x in training_data_raw.index]]
# Ictal vs interictal
forest_seizure = RandomForestClassifier(n_estimators = 500, n_jobs = 1, max_features="sqrt", max_depth=None, min_samples_split=1)
y_seizure = [1 * (x > 0) for x in training_data.T["classification"]]
forest_seizure.fit(training_data[:-1].T, y_seizure)
# IctalA vs IctalB
forest_early = RandomForestClassifier(n_estimators = 500, n_jobs = 1, max_features="sqrt", max_depth=None, min_samples_split=1)
y_early = [1 * (x == 2) for x in training_data.T["classification"]]
forest_early.fit(training_data[:-1].T, y_early)
# Save models
if p_save:
saved_files = joblib.dump(forest_seizure, "RFV_" + p_subject + "_seizure.pkl")
for saved_file in saved_files:
os.system("mv " + saved_file + " /Users/dryu/Documents/DataScience/Seizures/data/models")
saved_files = joblib.dump(forest_early, "RFV_" + p_subject + "_early.pkl")
for saved_file in saved_files:
os.system("mv " + saved_file + " /Users/dryu/Documents/DataScience/Seizures/data/models")
return {"seizure":forest_seizure, "early":forest_early}
示例3: __init__
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def __init__(self, data, classes, tree_features, n_trees=100):
self.n_features = np.shape(data)[1]
n_rows = np.shape(data)[0]
n_nans = np.sum(np.isnan(data), 0)
data = data[:, n_nans < n_rows]
self.n_features = np.shape(data)[1]
n_nans = np.sum(np.isnan(data), 1)
data = data[n_nans < self.n_features, :]
self.n_rows = np.shape(data)[0]
if (tree_features > self.n_features):
tree_features = self.n_features
self.col_list = np.zeros((n_trees, tree_features), dtype='int')
self.n_trees = n_trees
self.bags = []
for i in range(n_trees):
cols = sample(range(self.n_features), tree_features)
cols.sort()
self.col_list[i, :] = cols
data_temp = data[:, cols]
n_nans = np.sum(np.isnan(data_temp), 1)
data_temp = data_temp[n_nans == 0, :]
classes_temp = classes[n_nans == 0]
#bag = BaggingClassifier(n_estimators=1, max_features=tree_features)
bag = RandomForestClassifier(n_estimators=1, max_features=tree_features)
bag.fit(data_temp, classes_temp)
self.bags.append(bag)
print(np.shape(data_temp))
示例4: buildTreeClassifier
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def buildTreeClassifier(predictorColumns, structurestable = 'structures.csv', targetcolumn = 'pointGroup', md = None):
"""
Build a random forest-classifier model to predict some structure feature from compositional data. Will return the model trained on all data, a confusion matrix calculated , and an average accuracy score. Also returns a label encoder object
"""
df = pd.read_csv(structurestable)
df = df.dropna()
if('fracNobleGas' in df.columns):
df = df[df['fracNobleGas'] <= 0]
s = StandardScaler()
le = LabelEncoder()
X = s.fit_transform(df[predictorColumns].astype('float64'))
y = le.fit_transform(df[targetcolumn].values)
rfc = RandomForestClassifier(max_depth = md)
acc = mean(cross_val_score(rfc, X, y))
X_train, X_test, y_train, y_test = train_test_split(X,y)
rfc.fit(X_train,y_train)
y_predict = rfc.predict(X_test)
cm = confusion_matrix(y_test, y_predict)
cm = pd.DataFrame(cm, columns=le.classes_, index=le.classes_)
rfc.fit(X, y)
return rfc, cm, round(acc,2), le
示例5: random_forest_classify
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def random_forest_classify(train_data,train_label,test_data):
rf = RandomForestClassifier(n_estimators=100)
rf.fit(train_data, ravel(train_label))
test_label=rf.predict(test_data)
save_result(test_label,'sklearn_random_forest_classify_Result.csv')
return test_label
示例6: run
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def run():
mean_acc = 0.0
mean_logloss = 0.0
skf, X_all, labels = gen_cv()
for fold, (test_index, train_index) in enumerate(skf, start=1):
logger.info('at fold: {0}'.format(fold))
logger.info('train samples: {0}, test samples: {1}'.format(len(train_index), len(test_index)))
X_train, X_test = X_all[train_index], X_all[test_index]
y_train, y_test = labels[train_index], labels[test_index]
rfc = RandomForestClassifier(n_jobs=10, random_state=919)
rfc.fit(X_train, y_train)
y_test_predicted = rfc.predict(X_test)
y_test_proba = rfc.predict_proba(X_test)
# equals = y_test == y_test_predicted
# acc = np.sum(equals) / float(len(equals))
acc = accuracy_score(y_test, y_test_predicted)
logger.info('test data predicted accuracy: {0}'.format(acc))
# log loss -log P(yt|yp) = -(yt log(yp) + (1 - yt) log(1 - yp))
logloss = log_loss(y_test, y_test_proba)
logger.info('log loss at test data: {0}'.format(logloss))
# logger.info('log loss at test data using label: {0}'.format(log_loss(y_test, y_test_predicted)))
mean_acc += acc
mean_logloss += logloss
n_folds = skf.n_folds
logger.info('mean acc: {0}'.format(mean_acc / n_folds))
logger.info('mean log loss: {0}'.format(mean_logloss / n_folds))
示例7: cls_create
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def cls_create(xs, ys):
if algo == "SVM":
classifier = svm.SVC(C = self.parm, probability=True)
elif algo == "RF":
classifier = RandomForestClassifier(n_estimators = int(self.parm), criterion='entropy', n_jobs = 1)
#
#classifier = LDA()
new_xs = xs
"""
positive_count = len([y for y in ys if y > 0])
if positive_count >= 20:
#self.selector = svm.LinearSVC(C = 1, dual = False, penalty="l1")
self.selector = LDA()
new_xs = self.selector.fit_transform(xs, ys)
else:
self.selector = None
"""
classifier.fit(new_xs, ys)
probs = classifier.predict_proba(new_xs)
#self.pclassifier = svm.SVC(parm_val = 1.0)
#self.pclassifier.fit(probs, ys)
self.threshold, self.positive, self.negative = best_threshold_for_f1(probs, 20, ys)
return classifier
开发者ID:simonhughes22,项目名称:PythonNlpResearch,代码行数:33,代码来源:Codes_ClassifyUsingVectorComposition_WordSpace.py
示例8: main
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def main():
S, col_names_S = load_data(config.paths.training_data,
config.paths.cache_folder)
Xs, Ys, col_names_S = extract_xy(S, col_names_S)
a = RandomForestClassifier(n_estimators=1)
a.fit(Xs.toarray(), Ys.toarray().ravel())
best_features = a.feature_importances_
max_ind, max_val = max(enumerate(best_features), key=operator.itemgetter(1))
print best_features
print max_ind, max_val
print Xs.shape
print Ys.shape
param_range = [1, 3, 5, 7, 10, 15, 20, 30, 60, 80]
train_scores, test_scores = validation_curve(RandomForestClassifier(criterion='entropy'), Xs, Ys.toarray().ravel(),
'n_estimators', param_range)
print train_scores
print test_scores
train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)
plt.title("Validation Curve for Random Forest")
plt.xlabel("Number of Trees")
plt.ylabel("Score")
plt.plot(param_range, train_mean, label="Training Score", color='r')
plt.fill_between(param_range, train_mean - train_std, train_mean + train_std, alpha=0.2, color='r')
plt.plot(param_range, test_mean, label="Test Score", color='b')
plt.fill_between(param_range, test_mean - test_std, test_mean + test_std, alpha=0.2, color='b')
plt.legend(loc="best")
plt.show()
示例9: Random_Forest_classifier
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def Random_Forest_classifier(train_input_data,train_output_data,test_input_data,test_output_data):
tree_list = []
accuracy_percent = []
for trees in range(10,200,10):
clf = RandomForestClassifier(trees)
clf.fit(train_input_data,train_output_data)
predicted_output = clf.predict(test_input_data)
error_list = []
if isinstance(predicted_output,list) ==False:
predicted_output = predicted_output.tolist()
if isinstance(test_output_data,list) ==False:
test_output_data = test_output_data.tolist()
for i in range(len(test_output_data)):
cur_univ_similarities = similar_univs[similar_univs['univName'] == predicted_output[i]]
cur_univ_similarity_list = cur_univ_similarities.values.tolist()
cur_univ_similarity_list = [item for sublist in cur_univ_similarity_list for item in sublist]
if test_output_data[i] in cur_univ_similarity_list[1:]:
error_list.append(0)
else:
error_list.append(1)
tree_list.append(trees)
accuracy_percent.append(100 -((sum(error_list)/float(len(error_list))) * 100))
tree_list = np.array(tree_list)
accuracy_percent = np.array(accuracy_percent)
plt.plot(tree_list,accuracy_percent)
plt.xlabel('Number of trees')
plt.ylabel('Percent of accuracy')
plt.title('Varation of accuracy with trees')
plt.grid(True)
plt.savefig("rf1.png")
plt.show()
return predicted_output
示例10: rand_forest
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def rand_forest(train_bow,train_labels,test_bow,test_labels,bow_indexes):
print("Training rndForest")
rf_classifier=RandomForestClassifier()
rf_classifier.fit(train_bow,train_labels)
print("Testing rndForest")
test(rf_classifier,"rf",test_bow,test_labels,bow_indexes)
示例11: randomForest_eval_func
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def randomForest_eval_func(self, chromosome):
n_estimators, max_features, window_size = self.decode_chromosome(chromosome)
if self.check_log(n_estimators, max_features, window_size):
return self.get_means_from_log(n_estimators, max_features, window_size)[0]
folded_dataset = self.create_folded_dataset(window_size)
indim = 21 * (2 * window_size + 1)
mean_AUC = 0
mean_decision_value = 0
mean_mcc = 0
sample_size_over_thousand_flag = False
for test_fold in xrange(self.fold):
test_labels, test_dataset, train_labels, train_dataset = folded_dataset.get_test_and_training_dataset(test_fold)
if len(test_labels) + len(train_labels) > 1000:
sample_size_over_thousand_flag = True
clf = RandomForestClassifier(n_estimators=n_estimators, max_features=max_features)
clf.fit(train_dataset, train_labels)
probas = clf.predict_proba(test_dataset)
decision_values = map(lambda x: x[1], probas) # Probability of being binding residue
AUC, decision_value_and_max_mcc = validate_performance.calculate_AUC(decision_values, test_labels)
mean_AUC += AUC
mean_decision_value += decision_value_and_max_mcc[0]
mean_mcc += decision_value_and_max_mcc[1]
if sample_size_over_thousand_flag:
break
if not sample_size_over_thousand_flag:
mean_AUC /= self.fold
mean_decision_value /= self.fold
mean_mcc /= self.fold
self.write_log(n_estimators, max_features, window_size, mean_AUC, mean_decision_value, mean_mcc)
self.add_log(n_estimators, max_features, window_size, mean_AUC, mean_decision_value, mean_mcc)
return mean_AUC
示例12: __init__
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
class Model:
"""Abstraction for gibberish model. Two methods: fit and predict."""
def __init__(self, X, y, ntrees=500):
"""Get data and fit model."""
self.clf = RandomForestClassifier(n_estimators=ntrees)
self.ntrees = ntrees
self.clf = self.clf.fit(X, y)
self.version = 0
def fit(self, X, y):
"""Updates model with data X, y."""
self.clf = RandomForestClassifier(n_estimators=self.ntrees)
self.clf = self.clf.fit(X, y)
print("updating model from " + str(self.version) + " to " + str(self.version + 1) + ".")
self.version += 1
return(self)
def predict(self, X):
"""Predict classification for X"""
prediction = self.clf.predict(X)
print("using version " + str(self.version))
return(prediction)
def __repr__(self):
return("<Model(version='%s')>" % (self.version))
示例13: fit
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def fit(self, x, y):
models = []
preds = np.zeros((len(x), self.n_channels + self.n_features))
# create channel based models
for i in xrange(self.n_channels):
print('training channel model {}'.format(i))
model = LogisticRegression()
feats = x[:, (i * self.n_features):((i + 1) * self.n_features)]
model.fit(feats, y)
models.append(model)
preds[:, i] = model.predict(feats)
# create band based models
for i in xrange(self.n_features):
print('training band model {}'.format(i))
model = LogisticRegression()
feats = x[:, i:(self.n_channels * self.n_features):self.n_features]
model.fit(feats, y)
models.append(model)
preds[:, self.n_channels + i] = model.predict(feats)
# create integrating forest
top_classifier = RandomForestClassifier()
top_classifier.fit(preds, y)
self.models = models
self.c = top_classifier
示例14: onescore
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def onescore(X, Y, Xtest):
clf = RandomForestClassifier(oob_score=True, n_jobs=-1, n_estimators=1000, max_features=300, random_state=0)
clf.fit(X, Y)
print "oob_score = ", clf.oob_score_
print clf.get_params()
ytest = clf.predict(Xtest)
output(ytest, "try_004.csv")
示例15: buildModel
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import fit [as 别名]
def buildModel(df):
train_y = df['arr_del15'][:train_len]
train_x = df[cols][:train_len]
# transform categorical features
train_x['unique_carrier'] = pd.factorize(train_x['unique_carrier'])[0]
train_x['dep_conditions'] = pd.factorize(train_x['dep_conditions'])[0]
train_x['arr_conditions'] = pd.factorize(train_x['arr_conditions'])[0]
pd.set_option('display.max_rows', 500)
print(train_x)
# train_x['origin'] = pd.factorize(train_x['origin'])[0]
# train_x['dest'] = pd.factorize(train_x['dest'])[0]
# print(train_x)
train_x = enc.fit_transform(train_x)
print(train_x.shape)
# Create Random Forest classifier with 50 trees
clf_rf = RandomForestClassifier(n_estimators=50, n_jobs=-1)
clf_rf.fit(train_x.toarray(), train_y)
del train_x, train_y
print("Model built")
return clf_rf