本文整理汇总了Python中sklearn.ensemble.RandomForestClassifier.score方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestClassifier.score方法的具体用法?Python RandomForestClassifier.score怎么用?Python RandomForestClassifier.score使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.RandomForestClassifier
的用法示例。
在下文中一共展示了RandomForestClassifier.score方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: RF
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import score [as 别名]
class RF(object):
def __init__(self, train, test, trees):
self.trees = trees
self.train = train
self.test = test
self.features, self.label = self.find_features()
self.forest = self.train_forest()
def find_features(self):
features = self.train.columns.values.tolist()
label = features.pop(0)
return features, label
def train_forest(self):
self.forest = RandomForestClassifier( n_estimators = self.trees, bootstrap = False, max_features = 'sqrt')
self.forest.fit( self.train[self.features], self.train[self.label] )
return self.forest
def score_forest(self):
print self.forest.score( self.test[self.features], self.test[self.label] )
def predict_test(self):
return self.forest.predict( self.test[self.features] )
示例2: test_iris
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import score [as 别名]
def test_iris():
"""Check consistency on dataset iris."""
for c in ("gini", "entropy"):
# Random forest
clf = RandomForestClassifier(n_estimators=10, criterion=c,
random_state=1)
clf.fit(iris.data, iris.target)
score = clf.score(iris.data, iris.target)
assert score > 0.9, "Failed with criterion %s and score = %f" % (c,
score)
clf = RandomForestClassifier(n_estimators=10, criterion=c,
max_features=2, random_state=1)
clf.fit(iris.data, iris.target)
score = clf.score(iris.data, iris.target)
assert score > 0.5, "Failed with criterion %s and score = %f" % (c,
score)
# Extra-trees
clf = ExtraTreesClassifier(n_estimators=10, criterion=c,
random_state=1)
clf.fit(iris.data, iris.target)
score = clf.score(iris.data, iris.target)
assert score > 0.9, "Failed with criterion %s and score = %f" % (c,
score)
clf = ExtraTreesClassifier(n_estimators=10, criterion=c,
max_features=2, random_state=1)
clf.fit(iris.data, iris.target)
score = clf.score(iris.data, iris.target)
assert score > 0.9, "Failed with criterion %s and score = %f" % (c,
score)
示例3: predictTitanic
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import score [as 别名]
def predictTitanic(train,test,predictors):
# Predictions using logistic regression
logistic = linear_model.LogisticRegression()
logistic.fit(train[predictors],train["Survived"])
print " THe score for logistic regression is"
print logistic.score(train[predictors],train["Survived"])
# Predictions using scikit learn svm
clf = svm.SVC()
clf.fit(train[predictors],train["Survived"])
print "THe score for SVM is"
print clf.score(train[predictors],train["Survived"])
predictions_svm = clf.predict(test[predictors])
#Predictions using random forest models
numEstimators = 100
model = RandomForestClassifier(numEstimators)
model.fit(train[predictors],train["Survived"])
print "THe score for RF is"
print model.score(train[predictors],train["Survived"])
predictions_RFM = model.predict(test[predictors])
submission = pd.DataFrame({
"PassengerId": test["PassengerId"],
"Survived": predictions_RFM
})
submission.to_csv('submission_RFM.csv', index=False)
示例4: run_random_forest
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import score [as 别名]
def run_random_forest(data, _max_depth):
(feature_train, feature_test, label_train, label_test) = train_test_split(data[:, 0:-1], data[:, -1].astype(int),
test_size=0.25)
# TODO: Vary Number of Estimators
rfc = RandomForestClassifier(n_estimators=500, criterion='gini', max_depth=_max_depth, max_features='auto',
bootstrap=True, oob_score=True, n_jobs=4, verbose = 1)
rfc.fit(feature_train, label_train)
training_error = rfc.score(feature_train, label_train)
#cross_validation_score = cross_val_score(rfc, feature_train, label_train, cv=10)
testing_error = rfc.score(feature_test, label_test)
out_of_bag_error = rfc.oob_score_
print "Random Forest Results for Max Depth:", _max_depth
print "Training Accuracy:", training_error
#print "10-fold Cross Validation Accuracy: %0.2f (+/- %0.2f)" % (cross_validation_score.mean(), cross_validation_score.std() * 2)
print "Testing Accuracy:", testing_error
print "Out of Bag Accuracy:", out_of_bag_error
feature_importance = rfc.feature_importances_
stddev = np.std([tree.feature_importances_ for tree in rfc.estimators_], axis=0)
indices = np.argsort(feature_importance)[::-1]
# Print the feature ranking
print("Feature ranking:")
for f in range(len(feature_importance)):
print("%d. feature %d (%f)" % (f + 1, indices[f], feature_importance[indices[f]]))
plot_feature_importance(feature_importance, indices, stddev, "random-forest-feature-importance-depth-" + str(_max_depth))
示例5: check_accuracy
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import score [as 别名]
def check_accuracy(X, y):
X_train, X_test, y_train, y_test = \
cross_validation.train_test_split(X, y, test_size=0.40, random_state=0)
clf = RandomForestClassifier(n_estimators=100, max_depth=None)
clf.fit(X_train, y_train)
print "Score on test set1: ", clf.score(X_test, y_test)
clf = ExtraTreesClassifier(n_estimators=100, max_depth=None, min_samples_split=1, random_state=0)
clf.fit(X_train, y_train)
print "Score on test set2: ", clf.score(X_test, y_test)
# clf = MLPClassifier(algorithm='l-bfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
# clf.fit(X_train, y_train)
# print "Score on test set: ", clf.score(X_test, y_test)
clf = AdaBoostClassifier(n_estimators=100)
clf.fit(X_train, y_train)
print "Score on test set3 : ", clf.score(X_test, y_test)
clf1 = ExtraTreesClassifier(n_estimators=50, max_depth=None, min_samples_split=1, random_state=0)
clf2 = RandomForestClassifier(n_estimators=50, random_state=0)
clf3 = AdaBoostClassifier(n_estimators=50)
clf4 = GaussianNB()
clf = VotingClassifier(estimators=[('et', clf1), ('rf', clf2), ('abc', clf3), ('gn', clf4)], voting='hard')
clf.fit(X_train, y_train)
print "Score on test set 4: ", clf.score(X_test, y_test)
示例6: randomforest_info
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import score [as 别名]
def randomforest_info(self, max_trees = 1000, step = 40, k_folds = 5):
print 'Characterising R_forest. Looping through trees: ',
self.treedata = np.zeros((max_trees/step, 10))
for i,n_trees in enumerate(np.arange(0, max_trees,step)):
if n_trees == 0:
n_trees = 1
print n_trees,
r_forest = RandomForestClassifier(n_estimators=n_trees, n_jobs=5, max_depth=None, min_samples_split=1, random_state=0)
scores = cross_validation.cross_val_score(r_forest, self.iss_features, self.labels, cv=k_folds,n_jobs=5)
r_forest_full = RandomForestClassifier(n_estimators=n_trees, n_jobs=5, max_depth=None, min_samples_split=1, random_state=0)
r_forest_full.fit(self.iss_features,self.labels)
self.treedata[i,0] = n_trees
self.treedata[i,1] = scores.mean()
self.treedata[i,2] = scores.std()
# now add the test dataset - score
self.treedata[i,3] = r_forest_full.score(self.iss_validation_features, self.validation_labels)
r_forest_lda = RandomForestClassifier(n_estimators=n_trees, n_jobs=5, max_depth=None, min_samples_split=1, random_state=0)
r_forest_lda_full = RandomForestClassifier(n_estimators=n_trees, n_jobs=5, max_depth=None, min_samples_split=1, random_state=0)
r_forest_lda_full.fit(self.lda_iss_features,self.labels)
lda_scores = cross_validation.cross_val_score(r_forest_lda, self.lda_iss_features, self.labels, cv=k_folds,n_jobs=5)
self.treedata[i,4] = lda_scores.mean()
self.treedata[i,5] = lda_scores.std()
self.treedata[i,6] = r_forest_lda_full.score(self.lda_iss_validation_features, self.validation_labels)
print self.treedata[i,6]
r_forest_pca = RandomForestClassifier(n_estimators=n_trees, n_jobs=5, max_depth=None, min_samples_split=1, random_state=0)
r_forest_pca_full = RandomForestClassifier(n_estimators=n_trees, n_jobs=5, max_depth=None, min_samples_split=1, random_state=0)
r_forest_pca_full.fit(self.pca_iss_features,self.labels)
pca_scores = cross_validation.cross_val_score(r_forest_pca, self.pca_iss_features, self.labels, cv=k_folds,n_jobs=5)
self.treedata[i,7] = pca_scores.mean()
self.treedata[i,8] = pca_scores.std()
self.treedata[i,9] = r_forest_pca_full.score(self.pca_iss_validation_features, self.validation_labels)
示例7: RF_results
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import score [as 别名]
def RF_results():
print "--------------RandomForestClassifier-----------------"
rang = [None, 10, 20, 50, 100, 200, 400]
print "--------------With HOG-----------------"
ans = []
print "n_estimators Accuracy"
for i in rang:
clf = RandomForestClassifier(n_estimators=i)
clf.fit(X_train_hog, y_train)
mean_accuracy = clf.score(X_test_hog, y_test)
print i, " ", mean_accuracy
ans.append('('+str(i)+", "+str(mean_accuracy)+')')
print ans
print "\n--------------Without HOG-----------------"
ans = []
print "n_estimators Accuracy"
for i in rang:
clf = RandomForestClassifier(n_estimators=i)
clf.fit(X_train, y_train)
mean_accuracy = clf.score(X_test, y_test)
print i, " ", mean_accuracy
ans.append('('+str(i)+", "+str(mean_accuracy)+')')
print ans
示例8: cross_validation
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import score [as 别名]
def cross_validation(X, y):
#fig = plt.figure()
#ax = fig.add_subplot(111, projection='3d')
assert(len(y) == len(X))
# Split the dataset in two equal parts
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)
depth = [8, 16, 32, 64]
split = [1, 2, 4, 8, 16, 32, 64]
best_score = 0
best_train_score = 0
best_param = None
for d in depth:
for s in split:
model = RandomForestClassifier(n_estimators=500, criterion="entropy", max_features="sqrt", max_depth=d, min_samples_split=s, n_jobs=-1)
model = model.fit(X_train, y_train)
print "Depth: %d split: %d" % (d, s)
print "Model trainning score:"
score_train = model.score(X_train, y_train)
print score_train
#ax.scatter(d, s, score_train, c='b', marker='o')
print "Model test score:"
score_test = model.score(X_test, y_test)
print score_test
#ax.scatter(d, s, score_test, c='r', marker='^')
if score_test > best_score:
best_score = score_test
best_train_score = score_train
best_param = model.get_params()
print "=================="
print best_train_score
print best_score
print best_param
return best_param
示例9: randomforest
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import score [as 别名]
def randomforest():
df = pd.read_csv("train.csv")
cleanfloat(df)
feature_columns_to_use = ['Pclass','Sex','Age','SibSp','Fare','Parch','Embarked']
X = df[feature_columns_to_use]
y = df['Survived']
forest = RandomForestClassifier(n_estimators=100)
forest = forest.fit( X,y )
print forest.score(X, y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
model2 = RandomForestClassifier(n_estimators = 100)
model2.fit(X_train, y_train)
predicted = model2.predict(X_test)
print metrics.accuracy_score(y_test, predicted)
dftest = pd.read_csv("test.csv")
cleanfloat(dftest)
X = dftest[feature_columns_to_use]
output = forest.predict(X).astype(int)
result = {'PassengerId':dftest.PassengerId, 'Survived':output}
dfresult = pd.DataFrame(result)
dfresult.to_csv("result.csv",index=False)
示例10: Model_RF
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import score [as 别名]
class Model_RF(object):
def __init__(self,model,parameter = {"n_estimators" :10, "max_depth" :5, "max_features":10, "CV_size": 0}):
self.train = model.train
self.test = model.test
self.CVsize = float(parameter["CV_size"].get())
train = np.array(self.train)
self.X_train = train[:, :-1]
self.y_train = train[:, -1]
self.X_train,self.X_CV,self.y_train,self.y_CV = train_test_split(self.X_train, self.y_train, test_size=self.CVsize)
if self.CVsize == 0:
self.clf = RandomForestClassifier(n_estimators = int(parameter["n_estimators"].get()), max_features = parameter["max_features"].get(), max_depth = int(parameter["max_depth"].get()))
self.model = model
def fit(self):
self.clf.fit(self.X_train,self.y_train)
def score(self):
pre = self.clf.predict(self.X_train)
truth = self.y_train
print ("score: " + str(self.clf.score(self.X_train,truth)))
print ("f1: " + str(f1_score(truth,pre, average=None)))
print ("AUC score: " + str(roc_auc_score(truth,pre)))
def save_results(self):
pre = self.model.clf.predict(self.model.test)
df = pd.DataFrame({"predict":pre})
fileName = tkFileDialog.asksaveasfilename()
df.to_csv(fileName)
def crossValidation(self):
estimatorList = [10,50,100,200,500]
maxFeatList = ["sqrt","log2",None]
bestScore = [0,0,None]
bestF1ScoreNeg = [0,0,None]
bestF1ScorePos = [0,0,None]
for e in estimatorList:
for maxFeat in maxFeatList:
self.clf = RandomForestClassifier(n_estimators = e, max_features = maxFeat)
self.clf.fit(self.X_train,self.y_train)
pre = self.clf.predict(self.X_CV)
truth = self.y_CV
score = self.clf.score(self.X_CV,truth)
if score > bestScore[0]:
bestScore[0] = score
bestScore[1] = e
bestScore[2] = maxFeat
f1pos = f1_score(truth,pre, average=None)[1]
if f1pos > bestF1ScorePos[0]:
bestF1ScorePos[0] = f1pos
bestF1ScorePos[1] = e
bestF1ScorePos[2] = maxFeat
f1neg = f1_score(truth,pre, average=None)[0]
if f1neg > bestF1ScoreNeg[0]:
bestF1ScoreNeg[0] = f1neg
bestF1ScoreNeg[1] = e
bestF1ScoreNeg[2] = maxFeat
print ("Best [score,n_estimators,max_features] on Cross Validation set: " + str(bestScore))
print ("Best [f1(pos),n_estimators,max_features] on Cross Validation set: " + str(bestF1ScorePos))
print ("Best [f1(neg),n_estimators,max_features] on Cross Validation set" + str(bestF1ScoreNeg))
示例11: randforpat
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import score [as 别名]
def randforpat():
df = pd.read_csv("train.csv")
cleanpatsy(df)
y, X = dmatrices('Survived ~ Pclass + Sex + Age + SibSp + Parch + Fare + Cabin + Embarked',df, return_type="dataframe")
y = np.ravel(y)
forest = RandomForestClassifier(n_estimators=100)
forest = forest.fit( X,y )
print forest.score(X, y)
# # evaluate the model by splitting into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
model2 = RandomForestClassifier(n_estimators = 100)
model2.fit(X_train, y_train)
predicted = model2.predict(X_test)
print metrics.accuracy_score(y_test, predicted)
dftest = pd.read_csv("test.csv")
cleanpatsy(dftest)
X = dmatrix('Pclass + Sex + Age + SibSp + Parch + Fare + Cabin + Embarked',dftest, return_type="dataframe")
output = forest.predict(X).astype(int)
result = {'PassengerId':dftest.PassengerId, 'Survived':output}
dfresult = pd.DataFrame(result)
dfresult.to_csv("result.csv",index=False)
示例12: main
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import score [as 别名]
def main():
train = csv_io.read_data("{}/Data/train.csv".format(os.getcwd()), True)
target = [float(x[0]) for x in train]
# Remove the target from the training
train = [x[1:] for x in train]
# Remove the categoricals that I can't convert
for x in train:
del x[1]
del x[1]
del x[5]
del x[6]
cats = preprocess.enum_categ_data(train, "f", 10)
preprocess.strf_to_floats(train, missing="average")
# test = csv_io.read_data("{}/Data/test.csv".format(os.getcwd()), True)
#
# # Remove the categoricals that I can't convert
# for x in test:
# del x[1]
# del x[1]
# del x[5]
# del x[6]
# I can't just run enum_categ_data on test data, need to match the right cat to the right index!!!
# cats = preprocess.enum_categ_data(test, 'f', 10)
# preprocess.strf_to_floats(test, missing='average')
rf = RandomForestClassifier(n_estimators=100, min_samples_split=2)
rf.fit(train, target)
print rf.score(train, target)
示例13: RFclassifier
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import score [as 别名]
def RFclassifier(n,hog_train_x,train_y,hog_test_x,test_y):
clf = RandomForestClassifier(n_estimators=n, n_jobs=-1)
clf.fit(hog_train_x,train_y)
print 'Accuracy for Random Forest classifier on test set with',n,'trees:'
print clf.score(hog_test_x,test_y)
示例14: main
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import score [as 别名]
def main():
# 辞書の読み込み
dictionary = corpus.get_dictionary(create_flg=False)
# 記事の読み込み
contents = corpus.get_contents()
# 特徴抽出
data_train = []
label_train = []
for file_name, content in contents.items():
data_train.append(corpus.get_vector(dictionary, content))
label_train.append(corpus.get_class_id(file_name))
# 分類器
estimator = RandomForestClassifier()
# 学習
estimator.fit(data_train, label_train)
# 学習したデータを予測にかけてみる(ズルなので正答率高くないとおかしい)
print("==== 学習データと予測データが一緒の場合")
print(estimator.score(data_train, label_train))
# 学習データと試験データに分けてみる
data_train_s, data_test_s, label_train_s, label_test_s = train_test_split(data_train, label_train, test_size=0.5)
# 分類器をもう一度定義
estimator2 = RandomForestClassifier()
# 学習
estimator2.fit(data_train_s, label_train_s)
print("==== 学習データと予測データが違う場合")
print(estimator2.score(data_test_s, label_test_s))
示例15: TestRandForest
# 需要导入模块: from sklearn.ensemble import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.RandomForestClassifier import score [as 别名]
def TestRandForest(dat, lab):
'''
This function finds the optimal parameters for the classifier
Parameters:
----------
dat: numpy array with all records
lab: numpy array with class labels of all records
Returns:
--------
par: optimal parameters for the classifier
'''
# RF parameters. Will choose one based on which does best on the validation set
# n_estimators, max_features
est = range(15, 41, 5)
feat = range(2, 8, 1)
par = [(e,f) for e in est for f in feat]
# want to try different ensembles to get error bar on score
num = 10
seed = np.random.randint(1000000, size = num)
valScore = np.zeros((num, len(par)))
testScore = np.zeros((num, len(par)))
for nv in xrange(0,num):
print 'Ensemble:', nv + 1
# split training data into train, validation, test (60, 20, 20)
xTrain, xTmp, yTrain, yTmp = cross_validation.train_test_split(dat, lab,
test_size = 0.4,
random_state = seed[nv])
xVal, xTest, yVal, yTest = cross_validation.train_test_split(xTmp, yTmp,
test_size = 0.5,
random_state = seed[nv])
# now train RF for each parameter combination
for i in xrange(0,len(par)):
clf = RandomForestClassifier(n_estimators=par[i][0],
max_features = par[i][1],
min_samples_split = 1)
clf = clf.fit(xTrain, yTrain)
valScore[nv,i] = clf.score(xVal, yVal)
testScore[nv,i] = clf.score(xTest, yTest)
# Find optimal parameters
tmp = np.argmax(np.mean(valScore, axis = 0))
print
print 'Optimal parameters (num_estimators, max_features):', par[tmp]
print ('Mean | Std Score (Validation set):', np.mean(valScore, axis = 0)[tmp],
'|', np.std(valScore, axis = 0)[tmp])
print ('Mean | Std Score (Test set):', np.mean(testScore, axis = 0)[tmp],
'|', np.std(testScore, axis = 0)[tmp])
# Return optimal parameters
return par[tmp]