本文整理汇总了Python中sklearn.ensemble.ExtraTreesClassifier.fit方法的典型用法代码示例。如果您正苦于以下问题:Python ExtraTreesClassifier.fit方法的具体用法?Python ExtraTreesClassifier.fit怎么用?Python ExtraTreesClassifier.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.ExtraTreesClassifier
的用法示例。
在下文中一共展示了ExtraTreesClassifier.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: plotFeatureImportances
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def plotFeatureImportances(x, y, fieldNames, numTrees = 100):
print fieldNames
# fit
forest = ExtraTreesClassifier(n_estimators=numTrees, compute_importances=True, random_state=0)
forest.fit(x, y)
# get importances
importances = forest.feature_importances_
print sum(importances)
std = np.std([tree.feature_importances_ for tree in forest.estimators_], axis=0)
indices = np.argsort(importances)[::-1]
# present
numFeatures = len(importances)
print 'feature ranking:'
for i in xrange(numFeatures):
print '%d. feature %d (%s) has importance %f' % (i+1, indices[i], fieldNames[indices[i]], importances[indices[i]])
xtickLabels = [fieldNames[i] for i in indices]
pylab.figure()
pylab.title('Feature Importances From A Random Forest with %s trees' % numTrees)
pylab.bar(xrange(numFeatures), importances[indices], color='r', yerr=std[indices], align='center')
pylab.xticks(xrange(numFeatures), xtickLabels)
pylab.xlim([-1, numFeatures])
pylab.show()
示例2: train_tree
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def train_tree():
word_vector_hash = knn.word_vectors(training, vector_length, False)
sku_vectors, class_labels, _, sku_hash = knn.data(adapt1, vector_length, 'all', word_vector_hash)
xtrees = ExtraTreesClassifier(n_estimators=1, max_depth=None, min_samples_split=1, random_state=0)
model2 = xtrees.fit(sku_vectors, class_labels)
sku_vectors, class_labels, _, sku_hash = knn.data(adapt2, vector_length, 'all', word_vector_hash)
xtrees = ExtraTreesClassifier(n_estimators=1, max_depth=None, min_samples_split=1, random_state=0)
model3 = xtrees.fit(sku_vectors, class_labels)
sku_vectors, class_labels, _, sku_hash = knn.data(adapt3, vector_length, 'all', word_vector_hash)
xtrees = ExtraTreesClassifier(n_estimators=1, max_depth=None, min_samples_split=1, random_state=0)
model4 = xtrees.fit(sku_vectors, class_labels)
# Non-adaptive data
sku_vectors, class_labels, _, sku_hash = knn.data(training, vector_length, False, word_vector_hash)
model2 = ConfidenceDecorator(model2, sku_vectors, class_labels)
model3 = ConfidenceDecorator(model3, sku_vectors, class_labels)
model4 = ConfidenceDecorator(model4, sku_vectors, class_labels)
xtrees = ExtraTreesClassifier(n_estimators=1, max_depth=None, min_samples_split=1, random_state=0)
model1 = xtrees.fit(sku_vectors, class_labels)
model1 = ConfidenceDecorator(model1, sku_vectors, class_labels)
forest = RandomForestClassifier(n_estimators=3, max_depth=None, min_samples_split=1, random_state=0)
model5 = forest.fit(sku_vectors, class_labels)
model5 = ConfidenceDecorator(model5, sku_vectors, class_labels)
#neigh = neighbors.KNeighborsClassifier(n_neighbors=10, warn_on_equidistant=False, weights="distance")
#model6 = neigh.fit(sku_vectors, class_labels)
#model6 = ConfidenceDecorator(model6, sku_vectors, class_labels)
models = [model1, model2, model3, model4, model5]# model6]
return models, word_vector_hash
示例3: main
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def main():
# Define the known data points or "training" data
explanatory_fields = "d100 dd0 dd5 fday ffp gsdd5 gsp map mat_tenths mmax_tenths mmindd0 mmin_tenths mtcm_tenths mtwm_tenths sday".split()
explanatory_rasters = [os.path.join(TRAINING_DIR, "current_" + r + ".img") for r in explanatory_fields]
response_shapes = os.path.join(TRAINING_DIR, "DF.shp")
# Load the training rasters using the sampled subset
try:
cached = json.load(open("_cached_training.json"))
train_xs = np.array(cached['train_xs'])
train_y = np.array(cached['train_y'])
except IOError:
train_xs, train_y = load_training_vector(response_shapes,
explanatory_rasters, response_field='GRIDCODE')
cache = {'train_xs': train_xs.tolist(), 'train_y': train_y.tolist()}
with open("_cached_training.json", 'w') as fh:
fh.write(json.dumps(cache))
print(train_xs.shape, train_y.shape)
# Train the classifier
clf = ExtraTreesClassifier(n_estimators=120, n_jobs=3)
clf.fit(train_xs, train_y)
print(clf)
evaluate_clf(clf, train_xs, train_y, feature_names=explanatory_fields)
示例4: top_importances
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def top_importances(features_df=None, labels_df=None, top_N=10):
''' Finds the top N importances using the ExtraTreesClassifier.
Finds the top N importances of a dataframe of features and a dataframe
of labels using the ExtraTreesClassifier.
Args:
features_df: Pandas dataframe of features used to predict.
labels_df: Pandas dataframe of labels to be predicted.
top_N: interger value of the top N most importance features to return.
Returns:
Pandas dataframe containing the top N importances and their
importance scores.
'''
reducer = ExtraTreesClassifier(n_estimators=2000, bootstrap=False,
oob_score=False, max_features=.10,
min_samples_split=10, min_samples_leaf=2,
criterion='gini')
reducer.fit(features_df, labels_df)
scores = pd.DataFrame(reducer.feature_importances_,
index=features_df.columns)
scores.columns = ['Importances']
scores = scores.sort(['Importances'], ascending=False)
return scores[0:top_N]
示例5: train_random_forest
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def train_random_forest(X_train,y_train,**kwargs):
from sklearn.ensemble import ExtraTreesClassifier
n_estimators = kwargs.pop('n_estimators',300)
max_features = kwargs.pop('max_features','auto')
n_jobs = kwargs.pop('n_jobs',-1)
verbose = kwargs.pop('verbose',0)
tuned_params = kwargs.pop('tuned_params',None)
# initialize baseline classifier
clf = ExtraTreesClassifier(n_estimators=n_estimators,random_state=42,
n_jobs=n_jobs,verbose=verbose,criterion='gini',
max_features=max_features,oob_score=True,
bootstrap=True)
if tuned_params is not None: # optimize if desired
from sklearn.grid_search import GridSearchCV
cv = GridSearchCV(clf,tuned_params,cv=5,scoring='roc_auc',
n_jobs=n_jobs,verbose=verbose,refit=True)
cv.fit(X_train, y_train)
clf = cv.best_estimator_
else: # otherwise train with the specified parameters (no tuning)
clf.fit(X_train,y_train)
return clf
示例6: kfold_cv
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def kfold_cv(X_train, y_train,idx,k):
kf = StratifiedKFold(y_train,n_folds=k)
xx=[]
count=0
for train_index, test_index in kf:
count+=1
X_train_cv, X_test_cv = X_train[train_index,:],X_train[test_index,:]
gc.collect()
y_train_cv, y_test_cv = y_train[train_index],y_train[test_index]
y_pred=np.zeros(X_test_cv.shape[0])
m=0
for j in range(m):
clf=xgb_classifier(eta=0.1,min_child_weight=20,col=0.5,subsample=0.7,depth=5,num_round=200,seed=j*77,gamma=0.1)
y_pred+=clf.train_predict(X_train_cv,(y_train_cv),X_test_cv,y_test=(y_test_cv))
#y_pred/=m;
clf=ExtraTreesClassifier(n_estimators=700,max_features= 50,criterion= 'entropy',min_samples_split= 3,
max_depth= 60, min_samples_leaf= 4,verbose=1,n_jobs=-1)
#clf=RandomForestClassifier(n_jobs=-1,n_estimators=100,max_depth=100)
clf.fit(X_train_cv,(y_train_cv))
y_pred=clf.predict_proba(X_test_cv).T[1]
print y_pred.shape
xx.append(llfun(y_test_cv,(y_pred)))
ypred=y_pred
yreal=y_test_cv
idx=idx[test_index]
print xx[-1]#,y_pred.shape
break
print xx,'average:',np.mean(xx),'std',np.std(xx)
return ypred,yreal,idx#np.mean(xx)
示例7: FeaturesSelectionRandomForests
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
class FeaturesSelectionRandomForests(object):
def __init__(self, n_estimators = 100, feature_importance_th = 0.005):
self.n_estimators = n_estimators
self.feature_importance_th = feature_importance_th
def fit(self, X, y, n_estimators = None, feature_importance_th = None):
if n_estimators is not None:
assert isinstance(n_estimators,(int,long,float))
self.n_estimators = n_estimators
if feature_importance_th is not None:
assert isinstance(feature_importance_th,(int,long,float))
self.feature_importance_th = feature_importance_th
#filter features by forest model
self.trees = ExtraTreesClassifier(n_estimators=100, compute_importances=True)
self.trees.fit(X, y)
self.features_mask = np.where(self.trees.feature_importances_ > 0.005)[0]
def plot_features_importance(self):
pd.DataFrame(self.trees.feature_importances_).plot(kind='bar')
plt.show()
def transform(self, X):
assert hasattr(self,"features_mask")
return X[:, self.features_mask]
示例8: extratreeclassifier
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def extratreeclassifier(input_file,Output,test_size):
lvltrace.lvltrace("LVLEntree dans extratreeclassifier split_test")
ncol=tools.file_col_coma(input_file)
data = np.loadtxt(input_file, delimiter=',', usecols=range(ncol-1))
X = data[:,1:]
y = data[:,0]
n_samples, n_features = X.shape
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
print X_train.shape, X_test.shape
clf = ExtraTreesClassifier(n_estimators=10)
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
print "Extremely Randomized Trees"
print "classification accuracy:", metrics.accuracy_score(y_test, y_pred)
print "precision:", metrics.precision_score(y_test, y_pred)
print "recall:", metrics.recall_score(y_test, y_pred)
print "f1 score:", metrics.f1_score(y_test, y_pred)
print "\n"
results = Output+"_Extremely_Random_Forest_metrics_test.txt"
file = open(results, "w")
file.write("Extremely Random Forest Classifier estimator accuracy\n")
file.write("Classification Accuracy Score: %f\n"%metrics.accuracy_score(y_test, y_pred))
file.write("Precision Score: %f\n"%metrics.precision_score(y_test, y_pred))
file.write("Recall Score: %f\n"%metrics.recall_score(y_test, y_pred))
file.write("F1 Score: %f\n"%metrics.f1_score(y_test, y_pred))
file.write("\n")
file.write("True Value, Predicted Value, Iteration\n")
for n in xrange(len(y_test)):
file.write("%f,%f,%i\n"%(y_test[n],y_pred[n],(n+1)))
file.close()
title = "Extremely Randomized Trees %f"%test_size
save = Output + "Extremely_Randomized_Trees_confusion_matrix"+"_%s.png"%test_size
plot_confusion_matrix(y_test, y_pred,title,save)
lvltrace.lvltrace("LVLSortie dans extratreeclassifier split_test")
示例9: reduceRF
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def reduceRF(label):
global x_data_rf_reduced, importantFeatureLocs
model = ExtraTreesClassifier()
model.fit(x_data, y_data[:, label])
# the relative importance of each attribute
importance = model.feature_importances_
weight = float(0)
del importantFeatureLocs[:] # reset
#print(importance)
for ele in np.sort(importance)[::-1]:
weight += float(ele)
featureIndex = np.where(importance==ele)
for loc in featureIndex[0]:
importantFeatureLocs.append(loc)
if weight > RFThreshold :
break
# remove duplications
importantFeatureLocs = list(set(importantFeatureLocs))
# extracting relevant columns from input data. Note that importantFeatureLocs
# may be unsorted (since python 'set' is unsorted), so features are extracted
# in unorderd fashion. This info is stored in the softmax model class
x_data_rf_reduced = x_data[:, importantFeatureLocs]
示例10: MyExtraTree
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
class MyExtraTree(MyClassifier):
def __init__(self, params=dict()):
self._params = params
self._extree = ExtraTreesClassifier(**(self._params))
def update_params(self, updates):
self._params.update(updates)
self._extree = ExtraTreesClassifier(**(self._params))
def fit(self, Xtrain, ytrain):
self._extree.fit(Xtrain, ytrain)
# def predict(self, Xtest, option = None):
# return self._extree.predict(Xtest)
def predict_proba(self, Xtest, option = None):
return self._extree.predict_proba(Xtest)[:, 1]
def predict_proba_multi(self, Xtest, option = None):
return self._extree.predict_proba(Xtest)
def plt_feature_importance(self, fname_list, f_range = list()):
importances = self._extree.feature_importances_
std = np.std([tree.feature_importances_ for tree in self._extree.estimators_], axis=0)
indices = np.argsort(importances)[::-1]
fname_array = np.array(fname_list)
if not f_range:
f_range = range(indices.shape[0])
n_f = len(f_range)
plt.figure()
plt.title("Extra Tree Feature importances")
plt.barh(range(n_f), importances[indices[f_range]],
color="b", xerr=std[indices[f_range]], ecolor='k',align="center")
plt.yticks(range(n_f), fname_array[indices[f_range]])
plt.ylim([-1, n_f])
plt.show()
def list_feature_importance(self, fname_list, f_range = list(), return_list = False):
importances = self._extree.feature_importances_
indices = np.argsort(importances)[::-1]
print 'Extra tree feature ranking:'
if not f_range :
f_range = range(indices.shape[0])
n_f = len(f_range)
for i in range(n_f):
f = f_range[i]
print '{0:d}. feature[{1:d}] {2:s} ({3:f})'.format(f + 1, indices[f], fname_list[indices[f]], importances[indices[f]])
if return_list:
return [indices[f_range[i]] for i in range(n_f)]
示例11: fit
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def fit(self, X, Y, sample_weight=None):
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectFromModel
num_features = X.shape[1]
max_features = int(float(self.max_features) * (np.log(num_features) + 1))
# Use at most half of the features
max_features = max(1, min(int(X.shape[1] / 2), max_features))
estimator = ExtraTreesClassifier(
n_estimators=self.n_estimators,
criterion=self.criterion,
max_depth=self.max_depth,
min_samples_split=self.min_samples_split,
min_samples_leaf=self.min_samples_leaf,
bootstrap=self.bootstrap,
max_features=max_features,
max_leaf_nodes=self.max_leaf_nodes,
oob_score=self.oob_score,
n_jobs=self.n_jobs,
verbose=self.verbose,
random_state=self.random_state,
class_weight=self.class_weight,
)
estimator.fit(X, Y, sample_weight=sample_weight)
self.preprocessor = SelectFromModel(estimator=estimator, threshold="mean", prefit=True)
return self
示例12: _cascade_layer
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def _cascade_layer(self, X, y=None, layer=0):
n_tree = getattr(self, 'n_cascadeRFtree')
n_cascadeRF = getattr(self, 'n_cascadeRF')
min_samples = getattr(self, 'min_samples_cascade')
prf = RandomForestClassifier(
n_estimators=100, max_features=8,
bootstrap=True, criterion="entropy", min_samples_split=20,
max_depth=None, class_weight='balanced', oob_score=True)
crf = ExtraTreesClassifier(
n_estimators=100, max_depth=None,
bootstrap=True, oob_score=True)
prf_pred = []
if y is not None:
# print('Adding/Training Layer, n_layer={}'.format(self.n_layer))
for irf in range(n_cascadeRF):
prf.fit(X, y)
crf.fit(X, y)
setattr(self, '_casprf{}_{}'.format(self.n_layer, irf), prf)
setattr(self, '_cascrf{}_{}'.format(self.n_layer, irf), crf)
probas = prf.oob_decision_function_
probas += crf.oob_decision_function_
prf_pred.append(probas)
elif y is None:
for irf in range(n_cascadeRF):
prf = getattr(self, '_casprf{}_{}'.format(layer, irf))
crf = getattr(self, '_cascrf{}_{}'.format(layer, irf))
probas = prf.predict_proba(X)
probas += crf.predict_proba(X)
prf_pred.append(probas)
return prf_pred
示例13: calc_prob
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def calc_prob(df_features_driver, df_features_other):
df_train = df_features_driver.append(df_features_other)
df_train.reset_index(inplace = True)
df_train.Driver = df_train.Driver.astype(int)
# So far, the best result was achieved by using a RandomForestClassifier with Bagging
# model = BaggingClassifier(base_estimator = ExtraTreesClassifier())
# model = BaggingClassifier(base_estimator = svm.SVC(gamma=2, C=1))
# model = BaggingClassifier(base_estimator = linear_model.LogisticRegression())
# model = BaggingClassifier(base_estimator = linear_model.LogisticRegression())
# model = BaggingClassifier(base_estimator = AdaBoostClassifier())
#model = RandomForestClassifier(200)
# model = BaggingClassifier(base_estimator = [RandomForestClassifier(), linear_model.LogisticRegression()])
# model = EnsembleClassifier([BaggingClassifier(base_estimator = RandomForestClassifier()),
# GradientBoostingClassifier])
#model = GradientBoostingClassifier(n_estimators = 10000)
model = ExtraTreesClassifier(n_estimators=100,max_features='auto',random_state=0, n_jobs=2, criterion='entropy', bootstrap=True)
# model = ExtraTreesClassifier(500, criterion='entropy')
feature_columns = df_train.iloc[:, 4:]
# Train the classifier
model.fit(feature_columns, df_train.Driver)
df_submission = pd.DataFrame()
df_submission['driver_trip'] = create_first_column(df_features_driver)
probs_array = model.predict_proba(feature_columns[:200]) # Return array with the probability for every driver
probs_df = pd.DataFrame(probs_array)
df_submission['prob'] = np.array(probs_df.iloc[:, 1])
return df_submission
示例14: tree
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def tree(train_data, train_labels, all_bigrams, task):
forest = ExtraTreesClassifier(n_estimators=100, random_state=0)
forest.fit(train_data, train_labels)
importances = forest.feature_importances_
indices = np.argsort(importances)[::-1]
# Print the feature ranking
print "-"*45
print task
for f in range(20):
print("%d. feature, name: %s, importance: %f" % (f + 1, all_bigrams[indices[f]], importances[indices[f]]))
# Plot the feature importances of the forest
pl.figure()
n = train_data.shape[1]
n = 2000
pl.title("Sorted feature importance for %s" %(task))
pl.bar(range(n), importances[indices][:n], color="black", align="center")
pl.xlim([0, (n)])
pl.xticks([num for num in range(0, n+1, 250)])
pl.savefig(task+'.pdf', bbox_inches='tight')
print "plot saved"
return indices
示例15: train_classifiers
# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def train_classifiers(X_data, y_data):
############ Linear SVM: 0.908 #############
clf_LSVM = svm.SVC(kernel = 'linear')
clf_LSVM.fit(X_data, y_data)
############ MultinomialNB: 0.875 #############
clf_MNB = MultinomialNB()
clf_MNB.fit(X_data, y_data)
############ Random Forest: 0.910 #############
clf_RF = RandomForestClassifier(n_estimators=200, criterion='entropy')
clf_RF.fit(X_data, y_data)
############ Extra Tree: 0.915 ##################
clf_ETC = ExtraTreesClassifier(n_estimators=500, max_depth=None, min_samples_split=1, random_state=0)
clf_ETC.fit(X_data, y_data)
############ AdaBoost: 0.88 ##################
clf_Ada = AdaBoostClassifier()
clf_Ada.fit(X_data, y_data)
############ rbf SVM: 0.895 #############
clf_rbf = svm.SVC(C=200, gamma=0.06, kernel='rbf')
clf_rbf.fit(X_data, y_data)
############ GradientBoosting: 0.88 #############
clf_GBC = GradientBoostingClassifier()
clf_GBC.fit(X_data, y_data)
return clf_LSVM, clf_MNB, clf_RF, clf_ETC, clf_Ada, clf_rbf, clf_GBC