当前位置: 首页>>代码示例>>Python>>正文


Python RandomForestClassifier.predict方法代码示例

本文整理汇总了Python中sklearn.ensemble.forest.RandomForestClassifier.predict方法的典型用法代码示例。如果您正苦于以下问题:Python RandomForestClassifier.predict方法的具体用法?Python RandomForestClassifier.predict怎么用?Python RandomForestClassifier.predict使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.forest.RandomForestClassifier的用法示例。


在下文中一共展示了RandomForestClassifier.predict方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: model_pred

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
def model_pred(trainX,trainY,testX,model_type):
    if model_type == "rf":
        clf = RandomForestClassifier(n_estimators = 500,n_jobs = 20)
        clf.fit(trainX,trainY)
        pred = clf.predict(testX)
    if model_type == "gbdt":
        clf = GradientBoostingClassifier(n_estimators=6,learning_rate=0.9,random_state=0)
        clf.fit(trainX,trainY)
        pred = clf.predict(testX)
    if model_type == "fusion":
        prob = np.zeros(len(testX))
        params = [100,200,300,400,500]
        for param in params:
            clf = RandomForestClassifier(n_estimators = param,n_jobs = 20,bootstrap=True)
            clf.fit(trainX,trainY)
            prob += clf.predict(testX)
        '''
        params = [1,2,3,4,5,6,7,8,9,10]
        for param in params:
            clf = GradientBoostingClassifier(n_estimators=param,learning_rate=0.9,random_state=0)
            clf.fit(trainX,trainY)
            prob += clf.predict(testX)
        '''
        pred = list(prob >= 3)
    print "the pos rate is:",float(sum(pred))/len(pred)
    return pred
开发者ID:tearf001,项目名称:ucloud,代码行数:28,代码来源:model2.py

示例2: RandomForestClassifer

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
 def RandomForestClassifer(self):
     
     '''
     Function to do RandomForest Classifer.
     '''
     train_Array = self.titanic_train_frame.values
     self.test_Array = self.titanic_test_frame.values
     randomForest = RandomForestClassifier(n_estimators = 100, n_jobs = -1)
     randomForest.fit(train_Array[0::,1::],train_Array[0::,0])
     self.predicted_probability = randomForest.predict(self.test_Array[0::,0::])
     self.predicted_probability_list = self.predicted_probability.tolist()
开发者ID:malaikannan,项目名称:Kaggle_TitanicPredictionChallenge,代码行数:13,代码来源:TitanicPrediction_LogisticRegression.py

示例3: just_pred

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
def just_pred(x, y):
    xlen = len(x)
    i = range(xlen)
    np.random.shuffle(i)
    trainpct = 0.7
    trainlen = int(trainpct * xlen)
    testlen = xlen - trainlen
    xtrain = x.ix[:trainlen,:]
    ytrain = y.ix[:trainlen]
    xtest = x.ix[trainlen:,:]
    ytest = y.ix[trainlen:]
    rf = RandomForestClassifier()
    rf.fit(xtrain, ytrain)
    ypred = rf.predict(xtest)
    return ytest, ypred
开发者ID:coreyabshire,项目名称:color-names,代码行数:17,代码来源:pscvread.py

示例4: crossval

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
def crossval(x, y, k=5):
    for i in range(k):
        i = range(len(X))
        np.random.shuffle(i)
        xlen = len(x)
        trainpct = 0.7
        trainlen = int(trainpct * xlen)
        testlen = xlen - trainlen
        xtrain = x.ix[:trainlen,:]
        ytrain = y.ix[:trainlen]
        xtest = x.ix[trainlen:,:]
        ytest = y.ix[trainlen:]
        rf = RandomForestClassifier()
        rf.fit(xtrain, ytrain)
        ypred = rf.predict(xtest)
        print ypred
开发者ID:coreyabshire,项目名称:color-names,代码行数:18,代码来源:pscvread.py

示例5:

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
print "Confusion matrix:"
print metrics.confusion_matrix(dat_clean.genre, predicted)

#####################
data_tree = dat_clean.iloc[:,[3,4,5,6,7,8,9,10,13,14,15]]
clf = clf.fit(data_tree, dat_clean.genre)

# Visualize tree
dot_data = StringIO.StringIO()
tree.export_graphviz(clf, out_file=dot_data, feature_names=list(data_tree.columns.values))
graph = pydot.graph_from_dot_data(dot_data.getvalue())
graph.write_pdf('dectree.pdf')


# Repeat on test set
y_test_pred = clf.predict(X_test)
print "Accuracy Test: {0:.3f}".format(metrics.accuracy_score(y_test, y_test_pred))
print
print "Classification report:"
print metrics.classification_report(y_test, y_test_pred)
print 
print "Confusion matrix:"
print metrics.confusion_matrix(y_test, y_test_pred)

# Measure performance
y_pred = clf.predict_proba(X_train)

# Repeat on test set
y_test_pred = clf.predict_proba(X_test)

tt = g_test.as_matrix()
开发者ID:JPLindsen,项目名称:GA_Data_Science_Project,代码行数:33,代码来源:model_fit.py

示例6: runns

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
	def runns(resp_var, size_of_test_data,dataset,positive_class,predictor_var, n_estimators,important_features,dealing_with_nulls):
		dataset = pd.read_csv('raw_data.csv', low_memory=False) # For testing purposes
		#----DATA PREPROCESSING
		#-------dealing with NULL values in the data
		#----------remove the rows in which the response is null

		dataset=dataset.dropna(subset=[resp_var])
		#----------dealing with nulls
		dataset=deal_with_nulls(dealing_with_nulls,dataset)
		#----FEATURE SELECTION
		#-------get predictors important in predicting the response
		#-----------transform categorical predictors to dummy variables
		predictors=dataset[predictor_var]
		predictors=pd.get_dummies(predictors)
		#-----------balance the classes in the response var
		ros = RandomOverSampler(random_state=0)
		resp=dataset[resp_var]
		prds, resp = ros.fit_sample(predictors, resp)
		#-----------fit the random forest classifier to give us the important predictors
		rf_clf = RandomForestClassifier(n_estimators=n_estimators)
		rf_clf.fit(prds,resp)
		#-------get the important predictors
		feature_imp = pd.Series(rf_clf.feature_importances_,
						index=list(predictors.iloc[:,0:])).sort_values(ascending=False)
		#-------names of the important predictors
		important_predictor_names = feature_imp.index[0:important_features]
		#-------subset the data to get only the important predictors and the response
		resp=pd.DataFrame(data=resp,columns=[resp_var])
		predictors=pd.DataFrame(prds,columns=list(predictors))
		dataset=pd.concat([resp,predictors],axis=1)
		#---------------------------------------------------------
		#----MODEL TRAINING
		#--------Remove the response variables from the features variables - axis 1 refers to the columns
		m_data= dataset.drop(resp_var, axis = 1,inplace=False) 
		# Response variables are the values we want to predict
		resp_var = np.array(dataset[resp_var])

		dataset = pd.get_dummies(m_data)
		
		# Saving feature names for later use
		feature_list = list(m_data.columns)
		# Convert to numpy array
		dataset = np.array(dataset)

		# Split the data into training and testing sets
		train_features, test_features, train_labels, test_labels = train_test_split(dataset, resp_var, test_size = float(size_of_test_data), random_state = 402)

		# Instantiate model with n_estimators decision trees
		clf = RandomForestClassifier(n_jobs = 1,n_estimators = n_estimators, random_state = 142)

		# Train the model on training data
		clf.fit(train_features, train_labels)
		# evaluation
		predicted = clf.predict(test_features)
		pred_prob = clf.predict_proba(test_features)
		
		accuracy = accuracy_score(test_labels, predicted)
		#confusion matrix
		cnf = (confusion_matrix(test_labels,predicted))
		#precision score
		precision = precision_score(test_labels,predicted,pos_label=positive_class)
		#avg pres
		avg_precision = average_precision_score(test_labels,pred_prob[:,[1]])
		#recall score
		rec = recall_score(test_labels,predicted,pos_label=positive_class)
		#f1 scorea
		fscore = f1_score(test_labels,predicted,pos_label=positive_class)
		#fbeta score
		fbeta = fbeta_score(test_labels,predicted,beta=0.5)
		#hamming_loss
		hamming = hamming_loss(test_labels,predicted)
		#jaccard similarity score
		jaccard = jaccard_similarity_score(test_labels,predicted)
		#logloss
		logloss = log_loss(test_labels,predicted)
		#zero-oneloss
		zero_one = zero_one_loss(test_labels,predicted)
		#auc roc 
		area_under_roc = roc_auc_score(test_labels,pred_prob[:,[1]])
		#cohen_score
		cohen = cohen_kappa_score(test_labels,predicted)
		#mathews corr
		mathews = matthews_corrcoef(test_labels,predicted)
		# Variable importances from the important features selection stage
		variable_importance_list = list(zip(prds, feature_imp))
		output={"accuracy":accuracy,"precision":precision,"average precision":avg_precision,"recall":rec,"fscore":fscore,"fbeta":fbeta,"hamming":hamming,"jaccard":jaccard,"logloss":logloss,"zero_one":zero_one,"area_under_roc":area_under_roc,"cohen":cohen,"mathews":mathews}
		output=json.dumps(output)
		return output
开发者ID:ghollah,项目名称:ServingMLAPIs,代码行数:90,代码来源:random_forest.py

示例7: flatten

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
import autopath
from datasets import training_set, test_set
from util import convert_gray_scale, flatten


Xr,Yr = training_set
Xe,Ye = test_set

Xr = flatten(convert_gray_scale(Xr))
Xe = flatten(convert_gray_scale(Xe))

rf = RandomForestClassifier(n_estimators=100, verbose=3, oob_score=True, compute_importances=True)
rf.fit(Xr, Yr)

Yp = rf.predict(Xe)
print np.mean(Yp == Ye)

Ypp = rf.predict_proba(Xe).max(axis=1)

plt.figure(1)
plt.clf()
plt.hist(Ypp[Yp == Ye], 50, color='b', normed=True, alpha=0.4,
         label='classified')
plt.hist(Ypp[Yp != Ye], 50, color='r', normed=True, alpha=0.4,
         label='misclassified')
plt.legend(loc='upper left')
plt.draw()
plt.show()

plt.figure(3)
开发者ID:matthagy,项目名称:sc2_label_models,代码行数:32,代码来源:gs_random_forest.py

示例8: StratifiedKFold

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
mask = classifications != -1
print mask.sum()
X = images[mask, ...].reshape(mask.sum(), np.prod(images.shape[1::]))
print X.shape
Y = classifications[mask]

acc = []
acc_correct = []
acc_incorrect = []
acc_x_incorrect = []
k_fold = 8
for train_inx, valid_inx in StratifiedKFold(Y, k_fold):
    rf = RandomForestClassifier(n_estimators=100, verbose=0, oob_score=True, compute_importances=True)
    rf.fit(X[train_inx], Y[train_inx])
    Yp = rf.predict(X[valid_inx])
    correct = Yp== Y[valid_inx]
    rf.predict_proba(X[valid_inx])
    p_correct = rf.predict_proba(X[valid_inx]).max(axis=1)
    acc_correct.append(p_correct[correct])
    acc_incorrect.append(p_correct[~correct])

    score = correct.mean()
    print score
    acc.append(score)

    acc_x_incorrect.append([images[mask][valid_inx[~correct]],
                            Y[valid_inx[~correct]],
                            Yp[~correct]])

print 'score', np.mean(acc)
开发者ID:matthagy,项目名称:sc2_timer,代码行数:32,代码来源:rf_digits.py

示例9: pressure

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
#download the file
raw_data=urllib.urlopen(url)

#get data, add column names and index
feature_names=["times pregnant", "plasma glucose conc.", "distolic blood pressure (mm Hg)", "triceps skin fold thickness (mm)", "2-hour serum insulin (mu U/ml)", "body mass index (kg/m^2)", "diabetes pedigree function", "age (years)", "target"]
dataset=pd.DataFrame.from_csv(raw_data)
dataset=dataset.reset_index()
dataset.columns=feature_names

#split into train and test set
train, test=train_test_split(dataset, test_size=0.3)

#normalize data
df_scaled_train=pd.DataFrame(preprocessing.scale(train), columns=feature_names)
df_scaled_test=pd.DataFrame(preprocessing.scale(test), columns=feature_names)

model=RandomForestClassifier(n_estimators = 100, oob_score = True, random_state =10, max_features = "auto", min_samples_leaf = 20)

#train model
#if getting this error, it is because a matrix with 1 column
#is being passed in when a 1d array is expected. ravel() will work.
#DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). if name == 'main':
#To resolve this error, convert label values to int or str as float is not a valid label-type
#raise ValueError("Unknown label type: %r" % y) ValueError: Unknown label type: array
model.fit(df_scaled_train.ix[:,'times pregnant':'age (years)'], np.asarray(df_scaled_train.ix[:,'target'].astype(int)))
print "Accuracy:", model.score(df_scaled_test.ix[:,'times pregnant':'age (years)'], np.asarray(df_scaled_test.ix[:,'target'].astype(int)))

#predict output
predicted=model.predict(df_scaled_test.ix[:,'times pregnant':'age (years)'])
print predicted
开发者ID:anooshac,项目名称:machine-learning-projects,代码行数:32,代码来源:PimaIndianClassification.py

示例10: read

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
from sklearn.ensemble.forest import RandomForestClassifier

def read(fname):
    labels, data = [],[]
    with open(fname) as f:
        for s in f:
            ss = s.split()
            labels.append(int(ss[-1]))
            data.append(map(float, ss[:-2]))
    return labels, data

trainset = read('./trainset')
testset  = read('./testset')

clf = RandomForestClassifier(n_estimators=10)
clf.fit(trainset[1], trainset[0])
print clf.predict(testset[1])
print testset[0]
开发者ID:Daiver,项目名称:HRandomForest,代码行数:20,代码来源:tmp.py

示例11: train_test_split

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
x_train, x_test, y_train, y_test = train_test_split(authorship[features], authorship.Author_num.values, test_size=0.4, random_state=123)


# Fit Model
etclf = ExtraTreesClassifier(n_estimators=20, max_depth=10, verbose=1)
etclf.fit(x_train, y_train)

# Print Confusion Matrix
metrics.confusion_matrix(etclf.predict(x_test), y_test)


from sklearn.ensemble.forest import RandomForestClassifier

rdclf = RandomForestClassifier(n_estimators=20, max_depth=10)
rdclf.fit(x_train, y_train)
metrics.confusion_matrix(rdclf.predict(x_test), y_test)


from sklearn.ensemble.weight_boosting import AdaBoostClassifier

adaclf = AdaBoostClassifier(n_estimators=20)
adaclf.fit(x_train, y_train)
metrics.confusion_matrix(adaclf.predict(x_test), y_test)




metrics.confusion_matrix(etclf.predict(x_test), y_test)
metrics.confusion_matrix(rdclf.predict(x_test), y_test)
metrics.confusion_matrix(adaclf.predict(x_test), y_test)
开发者ID:AkiraKane,项目名称:GA_Data_Science,代码行数:32,代码来源:Class+13+Classwork.py

示例12: roc_curve

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
  y_predict = m.predict(X_test)
  fpr, tpr, thresh = roc_curve(y_test, y_predict, pos_label=1)
  auc = roc_auc_score(y_test, y_predict)

  print 'AUC: ', auc
  print 'Percentage of players that will have TJ in 2014: ',np.mean(y_predict)
  return fpr, tpr, auc


rf_fpr, rf_tpr, rf_auc = evaluate_model(RandomForestClassifier)
svc_fpr, svc_tpr, svc_auc = evaluate_model(SVC)


RFC2 = RandomForestClassifier(n_estimators = 10)
RFC2.fit(X, y)
predict_players['predictions']=RFC2.predict(predict_players[X_cols])
predict_players.to_csv('testing.csv')

print 'Players that RF thinks will have TJ in 2014', predict_players['m1_name'][predict_players['predictions']==1]

the_doomed = predict_players['m1_name'][predict_players['predictions']==1]
injuries2014 = pd.read_csv('.\\intermediate data\\injuries2014.csv')

for each_doomed_person in the_doomed.values:
  if each_doomed_person in injuries2014.values:
    print each_doomed_person, 'has in fact undergone TJ in 2014!'
  else:
    print each_doomed_person, "did not end up having TJ in 2014..."


for each_injured_person in injuries2014[injuries2014.columns[1]].values:
开发者ID:julbright,项目名称:tj_research,代码行数:33,代码来源:TJ_2.py

示例13:

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
for train, test in kf:
    y_train = []
    x_train = []
    for i in train:
        y_train.append(features[i][6])
        tmp = [features[i][0], features[i][1], features[i][2], features[i][3], features[i][4], features[i][5]]
        x_train.append(tmp)
        
    y_test = []
    x_test = []  
    for i in test:
        y_test.append(features[i][6])
        tmp = [features[i][0], features[i][1], features[i][2], features[i][3], features[i][4], features[i][5]]
        x_test.append(tmp)
       
    rf.fit(x_train, y_train)
    rfPredTest = rf.predict(x_test)
    rfPrecisionTest = precision_score(y_test, rfPredTest)
    rfRecallTest = recall_score(y_test, rfPredTest)
    rfF1Test = f1_score(y_test, rfPredTest)
    rfAvgPrecision += rfPrecisionTest
    rfAvgRecall += rfRecallTest
    rfAvgF1 += rfF1Test

print "RF completed in ", time.time() - start, " s"
print "rf:\n Precision {}\n Recall {}\n F1 {}\n".format(rfAvgPrecision / 5, rfAvgRecall / 5, rfAvgF1 / 5)

 


开发者ID:korvin14,项目名称:twitterFollowBack,代码行数:28,代码来源:dbscan.py

示例14: print

# 需要导入模块: from sklearn.ensemble.forest import RandomForestClassifier [as 别名]
# 或者: from sklearn.ensemble.forest.RandomForestClassifier import predict [as 别名]
from sklearn.ensemble.forest import RandomForestClassifier
from sklearn.metrics.classification import classification_report
import pandas as pd
__author__ = 'semyon'


print("reading")
csv = pd.read_csv("data/train.csv")

print("slicing")
train_features = csv.ix[:, 'x23':'x61'].fillna(0).as_matrix()
train_true = csv['y'].tolist()

trtrfe = train_features[:35000, :]
trtrtrue = train_true[:35000]

trtefe = train_features[35000:, :]
trtetrue = train_true[35000:]

print("learning")

for depth in [7, 10, 12, 15, 20, 30, 50, 70]:
    for leaf_samples in [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 20, 40, 60, 150]:
        # model = GradientBoostingClassifier(n_estimators=10, max_depth=depth, min_samples_leaf=leaf_samples, verbose=1)
        model = RandomForestClassifier(n_estimators=50, max_depth=depth, min_samples_leaf=leaf_samples, verbose=0,
                                       n_jobs=4)
        model.fit(trtrfe, trtrtrue)
        # mean accuracy on the given test data and labels
        # print depth, '\t', leaf_samples, '\t', model.score(trtefe, trtetrue)
        predicted = model.predict(trtefe)
        print(classification_report(trtetrue, predicted))
开发者ID:SammyVimes,项目名称:storage_systems__classifier,代码行数:33,代码来源:test.py


注:本文中的sklearn.ensemble.forest.RandomForestClassifier.predict方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。