本文整理汇总了Python中sklearn.ensemble.forest.RandomForestClassifier.predict方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestClassifier.predict方法的具体用法?Python RandomForestClassifier.predict怎么用?Python RandomForestClassifier.predict使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.forest.RandomForestClassifier
的用法示例。
在下文中一共展示了RandomForestClassifier.predict方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: model_pred
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
def model_pred(trainX,trainY,testX,model_type):
if model_type == "rf":
clf = RandomForestClassifier(n_estimators = 500,n_jobs = 20)
clf.fit(trainX,trainY)
pred = clf.predict(testX)
if model_type == "gbdt":
clf = GradientBoostingClassifier(n_estimators=6,learning_rate=0.9,random_state=0)
clf.fit(trainX,trainY)
pred = clf.predict(testX)
if model_type == "fusion":
prob = np.zeros(len(testX))
params = [100,200,300,400,500]
for param in params:
clf = RandomForestClassifier(n_estimators = param,n_jobs = 20,bootstrap=True)
clf.fit(trainX,trainY)
prob += clf.predict(testX)
'''
params = [1,2,3,4,5,6,7,8,9,10]
for param in params:
clf = GradientBoostingClassifier(n_estimators=param,learning_rate=0.9,random_state=0)
clf.fit(trainX,trainY)
prob += clf.predict(testX)
'''
pred = list(prob >= 3)
print "the pos rate is:",float(sum(pred))/len(pred)
return pred
示例2: RandomForestClassifer
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
def RandomForestClassifer(self):
'''
Function to do RandomForest Classifer.
'''
train_Array = self.titanic_train_frame.values
self.test_Array = self.titanic_test_frame.values
randomForest = RandomForestClassifier(n_estimators = 100, n_jobs = -1)
randomForest.fit(train_Array[0::,1::],train_Array[0::,0])
self.predicted_probability = randomForest.predict(self.test_Array[0::,0::])
self.predicted_probability_list = self.predicted_probability.tolist()
开发者ID:malaikannan,项目名称:Kaggle_TitanicPredictionChallenge,代码行数:13,代码来源:TitanicPrediction_LogisticRegression.py
示例3: just_pred
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
def just_pred(x, y):
xlen = len(x)
i = range(xlen)
np.random.shuffle(i)
trainpct = 0.7
trainlen = int(trainpct * xlen)
testlen = xlen - trainlen
xtrain = x.ix[:trainlen,:]
ytrain = y.ix[:trainlen]
xtest = x.ix[trainlen:,:]
ytest = y.ix[trainlen:]
rf = RandomForestClassifier()
rf.fit(xtrain, ytrain)
ypred = rf.predict(xtest)
return ytest, ypred
示例4: crossval
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
def crossval(x, y, k=5):
for i in range(k):
i = range(len(X))
np.random.shuffle(i)
xlen = len(x)
trainpct = 0.7
trainlen = int(trainpct * xlen)
testlen = xlen - trainlen
xtrain = x.ix[:trainlen,:]
ytrain = y.ix[:trainlen]
xtest = x.ix[trainlen:,:]
ytest = y.ix[trainlen:]
rf = RandomForestClassifier()
rf.fit(xtrain, ytrain)
ypred = rf.predict(xtest)
print ypred
示例5:
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
print "Confusion matrix:"
print metrics.confusion_matrix(dat_clean.genre, predicted)
#####################
data_tree = dat_clean.iloc[:,[3,4,5,6,7,8,9,10,13,14,15]]
clf = clf.fit(data_tree, dat_clean.genre)
# Visualize tree
dot_data = StringIO.StringIO()
tree.export_graphviz(clf, out_file=dot_data, feature_names=list(data_tree.columns.values))
graph = pydot.graph_from_dot_data(dot_data.getvalue())
graph.write_pdf('dectree.pdf')
# Repeat on test set
y_test_pred = clf.predict(X_test)
print "Accuracy Test: {0:.3f}".format(metrics.accuracy_score(y_test, y_test_pred))
print
print "Classification report:"
print metrics.classification_report(y_test, y_test_pred)
print
print "Confusion matrix:"
print metrics.confusion_matrix(y_test, y_test_pred)
# Measure performance
y_pred = clf.predict_proba(X_train)
# Repeat on test set
y_test_pred = clf.predict_proba(X_test)
tt = g_test.as_matrix()
示例6: runns
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
def runns(resp_var, size_of_test_data,dataset,positive_class,predictor_var, n_estimators,important_features,dealing_with_nulls):
dataset = pd.read_csv('raw_data.csv', low_memory=False) # For testing purposes
#----DATA PREPROCESSING
#-------dealing with NULL values in the data
#----------remove the rows in which the response is null
dataset=dataset.dropna(subset=[resp_var])
#----------dealing with nulls
dataset=deal_with_nulls(dealing_with_nulls,dataset)
#----FEATURE SELECTION
#-------get predictors important in predicting the response
#-----------transform categorical predictors to dummy variables
predictors=dataset[predictor_var]
predictors=pd.get_dummies(predictors)
#-----------balance the classes in the response var
ros = RandomOverSampler(random_state=0)
resp=dataset[resp_var]
prds, resp = ros.fit_sample(predictors, resp)
#-----------fit the random forest classifier to give us the important predictors
rf_clf = RandomForestClassifier(n_estimators=n_estimators)
rf_clf.fit(prds,resp)
#-------get the important predictors
feature_imp = pd.Series(rf_clf.feature_importances_,
index=list(predictors.iloc[:,0:])).sort_values(ascending=False)
#-------names of the important predictors
important_predictor_names = feature_imp.index[0:important_features]
#-------subset the data to get only the important predictors and the response
resp=pd.DataFrame(data=resp,columns=[resp_var])
predictors=pd.DataFrame(prds,columns=list(predictors))
dataset=pd.concat([resp,predictors],axis=1)
#---------------------------------------------------------
#----MODEL TRAINING
#--------Remove the response variables from the features variables - axis 1 refers to the columns
m_data= dataset.drop(resp_var, axis = 1,inplace=False)
# Response variables are the values we want to predict
resp_var = np.array(dataset[resp_var])
dataset = pd.get_dummies(m_data)
# Saving feature names for later use
feature_list = list(m_data.columns)
# Convert to numpy array
dataset = np.array(dataset)
# Split the data into training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(dataset, resp_var, test_size = float(size_of_test_data), random_state = 402)
# Instantiate model with n_estimators decision trees
clf = RandomForestClassifier(n_jobs = 1,n_estimators = n_estimators, random_state = 142)
# Train the model on training data
clf.fit(train_features, train_labels)
# evaluation
predicted = clf.predict(test_features)
pred_prob = clf.predict_proba(test_features)
accuracy = accuracy_score(test_labels, predicted)
#confusion matrix
cnf = (confusion_matrix(test_labels,predicted))
#precision score
precision = precision_score(test_labels,predicted,pos_label=positive_class)
#avg pres
avg_precision = average_precision_score(test_labels,pred_prob[:,[1]])
#recall score
rec = recall_score(test_labels,predicted,pos_label=positive_class)
#f1 scorea
fscore = f1_score(test_labels,predicted,pos_label=positive_class)
#fbeta score
fbeta = fbeta_score(test_labels,predicted,beta=0.5)
#hamming_loss
hamming = hamming_loss(test_labels,predicted)
#jaccard similarity score
jaccard = jaccard_similarity_score(test_labels,predicted)
#logloss
logloss = log_loss(test_labels,predicted)
#zero-oneloss
zero_one = zero_one_loss(test_labels,predicted)
#auc roc
area_under_roc = roc_auc_score(test_labels,pred_prob[:,[1]])
#cohen_score
cohen = cohen_kappa_score(test_labels,predicted)
#mathews corr
mathews = matthews_corrcoef(test_labels,predicted)
# Variable importances from the important features selection stage
variable_importance_list = list(zip(prds, feature_imp))
output={"accuracy":accuracy,"precision":precision,"average precision":avg_precision,"recall":rec,"fscore":fscore,"fbeta":fbeta,"hamming":hamming,"jaccard":jaccard,"logloss":logloss,"zero_one":zero_one,"area_under_roc":area_under_roc,"cohen":cohen,"mathews":mathews}
output=json.dumps(output)
return output
示例7: flatten
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
import autopath
from datasets import training_set, test_set
from util import convert_gray_scale, flatten
Xr,Yr = training_set
Xe,Ye = test_set
Xr = flatten(convert_gray_scale(Xr))
Xe = flatten(convert_gray_scale(Xe))
rf = RandomForestClassifier(n_estimators=100, verbose=3, oob_score=True, compute_importances=True)
rf.fit(Xr, Yr)
Yp = rf.predict(Xe)
print np.mean(Yp == Ye)
Ypp = rf.predict_proba(Xe).max(axis=1)
plt.figure(1)
plt.clf()
plt.hist(Ypp[Yp == Ye], 50, color='b', normed=True, alpha=0.4,
label='classified')
plt.hist(Ypp[Yp != Ye], 50, color='r', normed=True, alpha=0.4,
label='misclassified')
plt.legend(loc='upper left')
plt.draw()
plt.show()
plt.figure(3)
示例8: StratifiedKFold
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
mask = classifications != -1
print mask.sum()
X = images[mask, ...].reshape(mask.sum(), np.prod(images.shape[1::]))
print X.shape
Y = classifications[mask]
acc = []
acc_correct = []
acc_incorrect = []
acc_x_incorrect = []
k_fold = 8
for train_inx, valid_inx in StratifiedKFold(Y, k_fold):
rf = RandomForestClassifier(n_estimators=100, verbose=0, oob_score=True, compute_importances=True)
rf.fit(X[train_inx], Y[train_inx])
Yp = rf.predict(X[valid_inx])
correct = Yp== Y[valid_inx]
rf.predict_proba(X[valid_inx])
p_correct = rf.predict_proba(X[valid_inx]).max(axis=1)
acc_correct.append(p_correct[correct])
acc_incorrect.append(p_correct[~correct])
score = correct.mean()
print score
acc.append(score)
acc_x_incorrect.append([images[mask][valid_inx[~correct]],
Y[valid_inx[~correct]],
Yp[~correct]])
print 'score', np.mean(acc)
示例9: pressure
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
#download the file
raw_data=urllib.urlopen(url)
#get data, add column names and index
feature_names=["times pregnant", "plasma glucose conc.", "distolic blood pressure (mm Hg)", "triceps skin fold thickness (mm)", "2-hour serum insulin (mu U/ml)", "body mass index (kg/m^2)", "diabetes pedigree function", "age (years)", "target"]
dataset=pd.DataFrame.from_csv(raw_data)
dataset=dataset.reset_index()
dataset.columns=feature_names
#split into train and test set
train, test=train_test_split(dataset, test_size=0.3)
#normalize data
df_scaled_train=pd.DataFrame(preprocessing.scale(train), columns=feature_names)
df_scaled_test=pd.DataFrame(preprocessing.scale(test), columns=feature_names)
model=RandomForestClassifier(n_estimators = 100, oob_score = True, random_state =10, max_features = "auto", min_samples_leaf = 20)
#train model
#if getting this error, it is because a matrix with 1 column
#is being passed in when a 1d array is expected. ravel() will work.
#DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). if name == 'main':
#To resolve this error, convert label values to int or str as float is not a valid label-type
#raise ValueError("Unknown label type: %r" % y) ValueError: Unknown label type: array
model.fit(df_scaled_train.ix[:,'times pregnant':'age (years)'], np.asarray(df_scaled_train.ix[:,'target'].astype(int)))
print "Accuracy:", model.score(df_scaled_test.ix[:,'times pregnant':'age (years)'], np.asarray(df_scaled_test.ix[:,'target'].astype(int)))
#predict output
predicted=model.predict(df_scaled_test.ix[:,'times pregnant':'age (years)'])
print predicted
示例10: read
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
from sklearn.ensemble.forest import RandomForestClassifier
def read(fname):
labels, data = [],[]
with open(fname) as f:
for s in f:
ss = s.split()
labels.append(int(ss[-1]))
data.append(map(float, ss[:-2]))
return labels, data
trainset = read('./trainset')
testset = read('./testset')
clf = RandomForestClassifier(n_estimators=10)
clf.fit(trainset[1], trainset[0])
print clf.predict(testset[1])
print testset[0]
示例11: train_test_split
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
x_train, x_test, y_train, y_test = train_test_split(authorship[features], authorship.Author_num.values, test_size=0.4, random_state=123)
# Fit Model
etclf = ExtraTreesClassifier(n_estimators=20, max_depth=10, verbose=1)
etclf.fit(x_train, y_train)
# Print Confusion Matrix
metrics.confusion_matrix(etclf.predict(x_test), y_test)
from sklearn.ensemble.forest import RandomForestClassifier
rdclf = RandomForestClassifier(n_estimators=20, max_depth=10)
rdclf.fit(x_train, y_train)
metrics.confusion_matrix(rdclf.predict(x_test), y_test)
from sklearn.ensemble.weight_boosting import AdaBoostClassifier
adaclf = AdaBoostClassifier(n_estimators=20)
adaclf.fit(x_train, y_train)
metrics.confusion_matrix(adaclf.predict(x_test), y_test)
metrics.confusion_matrix(etclf.predict(x_test), y_test)
metrics.confusion_matrix(rdclf.predict(x_test), y_test)
metrics.confusion_matrix(adaclf.predict(x_test), y_test)
示例12: roc_curve
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
y_predict = m.predict(X_test)
fpr, tpr, thresh = roc_curve(y_test, y_predict, pos_label=1)
auc = roc_auc_score(y_test, y_predict)
print 'AUC: ', auc
print 'Percentage of players that will have TJ in 2014: ',np.mean(y_predict)
return fpr, tpr, auc
rf_fpr, rf_tpr, rf_auc = evaluate_model(RandomForestClassifier)
svc_fpr, svc_tpr, svc_auc = evaluate_model(SVC)
RFC2 = RandomForestClassifier(n_estimators = 10)
RFC2.fit(X, y)
predict_players['predictions']=RFC2.predict(predict_players[X_cols])
predict_players.to_csv('testing.csv')
print 'Players that RF thinks will have TJ in 2014', predict_players['m1_name'][predict_players['predictions']==1]
the_doomed = predict_players['m1_name'][predict_players['predictions']==1]
injuries2014 = pd.read_csv('.\\intermediate data\\injuries2014.csv')
for each_doomed_person in the_doomed.values:
if each_doomed_person in injuries2014.values:
print each_doomed_person, 'has in fact undergone TJ in 2014!'
else:
print each_doomed_person, "did not end up having TJ in 2014..."
for each_injured_person in injuries2014[injuries2014.columns[1]].values:
示例13:
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
for train, test in kf:
y_train = []
x_train = []
for i in train:
y_train.append(features[i][6])
tmp = [features[i][0], features[i][1], features[i][2], features[i][3], features[i][4], features[i][5]]
x_train.append(tmp)
y_test = []
x_test = []
for i in test:
y_test.append(features[i][6])
tmp = [features[i][0], features[i][1], features[i][2], features[i][3], features[i][4], features[i][5]]
x_test.append(tmp)
rf.fit(x_train, y_train)
rfPredTest = rf.predict(x_test)
rfPrecisionTest = precision_score(y_test, rfPredTest)
rfRecallTest = recall_score(y_test, rfPredTest)
rfF1Test = f1_score(y_test, rfPredTest)
rfAvgPrecision += rfPrecisionTest
rfAvgRecall += rfRecallTest
rfAvgF1 += rfF1Test
print "RF completed in ", time.time() - start, " s"
print "rf:\n Precision {}\n Recall {}\n F1 {}\n".format(rfAvgPrecision / 5, rfAvgRecall / 5, rfAvgF1 / 5)
示例14: print
# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
from sklearn.ensemble.forest import RandomForestClassifier
from sklearn.metrics.classification import classification_report
import pandas as pd
__author__ = 'semyon'
print("reading")
csv = pd.read_csv("data/train.csv")
print("slicing")
train_features = csv.ix[:, 'x23':'x61'].fillna(0).as_matrix()
train_true = csv['y'].tolist()
trtrfe = train_features[:35000, :]
trtrtrue = train_true[:35000]
trtefe = train_features[35000:, :]
trtetrue = train_true[35000:]
print("learning")
for depth in [7, 10, 12, 15, 20, 30, 50, 70]:
for leaf_samples in [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 20, 40, 60, 150]:
# model = GradientBoostingClassifier(n_estimators=10, max_depth=depth, min_samples_leaf=leaf_samples, verbose=1)
model = RandomForestClassifier(n_estimators=50, max_depth=depth, min_samples_leaf=leaf_samples, verbose=0,
n_jobs=4)
model.fit(trtrfe, trtrtrue)
# mean accuracy on the given test data and labels
# print depth, '\t', leaf_samples, '\t', model.score(trtefe, trtetrue)
predicted = model.predict(trtefe)
print(classification_report(trtetrue, predicted))