当前位置: 首页>>代码示例>>Python>>正文


Python ExtraTreesClassifier.fit方法代码示例

本文整理汇总了Python中sklearn.ensemble.ExtraTreesClassifier.fit方法的典型用法代码示例。如果您正苦于以下问题:Python ExtraTreesClassifier.fit方法的具体用法?Python ExtraTreesClassifier.fit怎么用?Python ExtraTreesClassifier.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble.ExtraTreesClassifier的用法示例。


在下文中一共展示了ExtraTreesClassifier.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: plotFeatureImportances

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def plotFeatureImportances(x, y, fieldNames, numTrees = 100):
    print fieldNames
    # fit
    forest = ExtraTreesClassifier(n_estimators=numTrees, compute_importances=True, random_state=0)
    forest.fit(x, y)

    # get importances
    importances = forest.feature_importances_
    print sum(importances)
    std = np.std([tree.feature_importances_ for tree in forest.estimators_], axis=0)
    indices = np.argsort(importances)[::-1]

    # present
    numFeatures = len(importances)
    print 'feature ranking:'
    for i in xrange(numFeatures):
        print '%d. feature %d (%s) has importance %f' % (i+1, indices[i], fieldNames[indices[i]], importances[indices[i]])

    xtickLabels = [fieldNames[i] for i in indices]
    pylab.figure()
    pylab.title('Feature Importances From A Random Forest with %s trees' % numTrees)
    pylab.bar(xrange(numFeatures), importances[indices], color='r', yerr=std[indices], align='center')
    pylab.xticks(xrange(numFeatures), xtickLabels)
    pylab.xlim([-1, numFeatures])
    pylab.show()
开发者ID:jennyyuejin,项目名称:Kaggle,代码行数:27,代码来源:visualization.py

示例2: train_tree

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def train_tree():
	word_vector_hash = knn.word_vectors(training, vector_length, False)

	sku_vectors, class_labels, _, sku_hash = knn.data(adapt1, vector_length, 'all', word_vector_hash)
	xtrees = ExtraTreesClassifier(n_estimators=1, max_depth=None, min_samples_split=1, random_state=0)
	model2 = xtrees.fit(sku_vectors, class_labels)

	sku_vectors, class_labels, _, sku_hash = knn.data(adapt2, vector_length, 'all', word_vector_hash)
	xtrees = ExtraTreesClassifier(n_estimators=1, max_depth=None, min_samples_split=1, random_state=0)
	model3 = xtrees.fit(sku_vectors, class_labels)

	sku_vectors, class_labels, _, sku_hash = knn.data(adapt3, vector_length, 'all', word_vector_hash)
	xtrees = ExtraTreesClassifier(n_estimators=1, max_depth=None, min_samples_split=1, random_state=0)
	model4 = xtrees.fit(sku_vectors, class_labels)

	# Non-adaptive data
	sku_vectors, class_labels, _, sku_hash = knn.data(training, vector_length, False, word_vector_hash)
	model2 = ConfidenceDecorator(model2, sku_vectors, class_labels)
	model3 = ConfidenceDecorator(model3, sku_vectors, class_labels)
	model4 = ConfidenceDecorator(model4, sku_vectors, class_labels)

	xtrees = ExtraTreesClassifier(n_estimators=1, max_depth=None, min_samples_split=1, random_state=0)
	model1 = xtrees.fit(sku_vectors, class_labels)
	model1 = ConfidenceDecorator(model1, sku_vectors, class_labels)

	forest = RandomForestClassifier(n_estimators=3, max_depth=None, min_samples_split=1, random_state=0)
	model5 = forest.fit(sku_vectors, class_labels)
	model5 = ConfidenceDecorator(model5, sku_vectors, class_labels)

	#neigh = neighbors.KNeighborsClassifier(n_neighbors=10, warn_on_equidistant=False, weights="distance")
	#model6 = neigh.fit(sku_vectors, class_labels)
	#model6 = ConfidenceDecorator(model6, sku_vectors, class_labels)

	models = [model1, model2, model3, model4, model5]# model6]
	return models, word_vector_hash
开发者ID:ageek,项目名称:kaggle-1,代码行数:37,代码来源:decision_tree.py

示例3: main

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def main():

    # Define the known data points or "training" data
    explanatory_fields = "d100 dd0 dd5 fday ffp gsdd5 gsp map mat_tenths mmax_tenths mmindd0 mmin_tenths mtcm_tenths mtwm_tenths sday".split()
    explanatory_rasters = [os.path.join(TRAINING_DIR, "current_" + r + ".img") for r in explanatory_fields]
    response_shapes = os.path.join(TRAINING_DIR, "DF.shp")

    # Load the training rasters using the sampled subset
    try:
        cached = json.load(open("_cached_training.json"))
        train_xs = np.array(cached['train_xs'])
        train_y = np.array(cached['train_y'])
    except IOError:
        train_xs, train_y = load_training_vector(response_shapes, 
            explanatory_rasters, response_field='GRIDCODE')
        cache = {'train_xs': train_xs.tolist(), 'train_y': train_y.tolist()}
        with open("_cached_training.json", 'w') as fh:
            fh.write(json.dumps(cache))

    print(train_xs.shape, train_y.shape)

    # Train the classifier
    clf = ExtraTreesClassifier(n_estimators=120, n_jobs=3)
    clf.fit(train_xs, train_y)
    print(clf)

    evaluate_clf(clf, train_xs, train_y, feature_names=explanatory_fields)
开发者ID:bendv,项目名称:pyimpute,代码行数:29,代码来源:example_eval.py

示例4: top_importances

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def top_importances(features_df=None, labels_df=None, top_N=10):
    ''' Finds the top N importances using the ExtraTreesClassifier.
        
    Finds the top N importances of a dataframe of features and a dataframe
        of labels using the ExtraTreesClassifier.
    
    Args:
        features_df: Pandas dataframe of features used to predict.
        labels_df: Pandas dataframe of labels to be predicted.
        top_N: interger value of the top N most importance features to return.
    Returns:
        Pandas dataframe containing the top N importances and their 
        importance scores.
    
    '''
    reducer = ExtraTreesClassifier(n_estimators=2000, bootstrap=False,
                                   oob_score=False, max_features=.10,
                                   min_samples_split=10, min_samples_leaf=2,
                                   criterion='gini')

    reducer.fit(features_df, labels_df)
    scores = pd.DataFrame(reducer.feature_importances_,
                          index=features_df.columns)
    scores.columns = ['Importances']
    scores = scores.sort(['Importances'], ascending=False)
    return scores[0:top_N]
开发者ID:DingChiLin,项目名称:FCH808.github.io,代码行数:28,代码来源:poi_add_features.py

示例5: train_random_forest

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def train_random_forest(X_train,y_train,**kwargs):
    from sklearn.ensemble import ExtraTreesClassifier

    n_estimators = kwargs.pop('n_estimators',300)
    max_features = kwargs.pop('max_features','auto')
    n_jobs       = kwargs.pop('n_jobs',-1)
    verbose      = kwargs.pop('verbose',0)
    tuned_params = kwargs.pop('tuned_params',None)

    # initialize baseline classifier
    clf = ExtraTreesClassifier(n_estimators=n_estimators,random_state=42,
                               n_jobs=n_jobs,verbose=verbose,criterion='gini',
                               max_features=max_features,oob_score=True,
                               bootstrap=True)
    
    if tuned_params is not None: # optimize if desired
        from sklearn.grid_search import GridSearchCV
        cv = GridSearchCV(clf,tuned_params,cv=5,scoring='roc_auc',
                          n_jobs=n_jobs,verbose=verbose,refit=True)
        cv.fit(X_train, y_train)
        clf = cv.best_estimator_
    else: # otherwise train with the specified parameters (no tuning)
        clf.fit(X_train,y_train)

    return clf
开发者ID:caseyjlaw,项目名称:activecontainer,代码行数:27,代码来源:sklearn_utils.py

示例6: kfold_cv

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def kfold_cv(X_train, y_train,idx,k):

    kf = StratifiedKFold(y_train,n_folds=k)
    xx=[]
    count=0
    for train_index, test_index in kf:
        count+=1
        X_train_cv, X_test_cv = X_train[train_index,:],X_train[test_index,:]
        gc.collect()
        y_train_cv, y_test_cv = y_train[train_index],y_train[test_index]
        y_pred=np.zeros(X_test_cv.shape[0])
        m=0
         
        for j in range(m):
            clf=xgb_classifier(eta=0.1,min_child_weight=20,col=0.5,subsample=0.7,depth=5,num_round=200,seed=j*77,gamma=0.1)
            y_pred+=clf.train_predict(X_train_cv,(y_train_cv),X_test_cv,y_test=(y_test_cv))
        #y_pred/=m;
        clf=ExtraTreesClassifier(n_estimators=700,max_features= 50,criterion= 'entropy',min_samples_split= 3,
                            max_depth= 60, min_samples_leaf= 4,verbose=1,n_jobs=-1)
        #clf=RandomForestClassifier(n_jobs=-1,n_estimators=100,max_depth=100)
        clf.fit(X_train_cv,(y_train_cv))
        y_pred=clf.predict_proba(X_test_cv).T[1]
        print y_pred.shape
        xx.append(llfun(y_test_cv,(y_pred)))
        ypred=y_pred
        yreal=y_test_cv
        idx=idx[test_index]
        print xx[-1]#,y_pred.shape
        break

    print xx,'average:',np.mean(xx),'std',np.std(xx)
    return ypred,yreal,idx#np.mean(xx)
开发者ID:daxiongshu,项目名称:bnp,代码行数:34,代码来源:ex2.py

示例7: FeaturesSelectionRandomForests

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
class FeaturesSelectionRandomForests(object):
    
    
    def __init__(self, n_estimators = 100, feature_importance_th = 0.005):
        
        self.n_estimators = n_estimators
        self.feature_importance_th = feature_importance_th
        
            
    def fit(self, X, y, n_estimators = None, feature_importance_th = None):
        
        if n_estimators is not None:
            assert isinstance(n_estimators,(int,long,float))
            self.n_estimators = n_estimators
        if feature_importance_th is not None:
            assert isinstance(feature_importance_th,(int,long,float))
            self.feature_importance_th = feature_importance_th
        
        #filter features by forest model
        self.trees = ExtraTreesClassifier(n_estimators=100, compute_importances=True)
        self.trees.fit(X, y)
        self.features_mask = np.where(self.trees.feature_importances_ > 0.005)[0]

    
    def plot_features_importance(self):
        
        pd.DataFrame(self.trees.feature_importances_).plot(kind='bar')
        plt.show()
        
    
    def transform(self, X):

        assert hasattr(self,"features_mask")

        return X[:, self.features_mask]
开发者ID:macchineimparanti,项目名称:impara,代码行数:37,代码来源:features_filtering.py

示例8: extratreeclassifier

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def extratreeclassifier(input_file,Output,test_size):
    lvltrace.lvltrace("LVLEntree dans extratreeclassifier split_test")
    ncol=tools.file_col_coma(input_file)
    data = np.loadtxt(input_file, delimiter=',', usecols=range(ncol-1))
    X = data[:,1:]
    y = data[:,0]
    n_samples, n_features = X.shape
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    print X_train.shape, X_test.shape
    clf = ExtraTreesClassifier(n_estimators=10)
    clf.fit(X_train,y_train)
    y_pred = clf.predict(X_test)
    print "Extremely Randomized Trees"
    print "classification accuracy:", metrics.accuracy_score(y_test, y_pred)
    print "precision:", metrics.precision_score(y_test, y_pred)
    print "recall:", metrics.recall_score(y_test, y_pred)
    print "f1 score:", metrics.f1_score(y_test, y_pred)
    print "\n"
    results = Output+"_Extremely_Random_Forest_metrics_test.txt"
    file = open(results, "w")
    file.write("Extremely Random Forest Classifier estimator accuracy\n")
    file.write("Classification Accuracy Score: %f\n"%metrics.accuracy_score(y_test, y_pred))
    file.write("Precision Score: %f\n"%metrics.precision_score(y_test, y_pred))
    file.write("Recall Score: %f\n"%metrics.recall_score(y_test, y_pred))
    file.write("F1 Score: %f\n"%metrics.f1_score(y_test, y_pred))
    file.write("\n")
    file.write("True Value, Predicted Value, Iteration\n")
    for n in xrange(len(y_test)):
        file.write("%f,%f,%i\n"%(y_test[n],y_pred[n],(n+1)))
    file.close()
    title = "Extremely Randomized Trees %f"%test_size
    save = Output + "Extremely_Randomized_Trees_confusion_matrix"+"_%s.png"%test_size
    plot_confusion_matrix(y_test, y_pred,title,save)
    lvltrace.lvltrace("LVLSortie dans extratreeclassifier split_test")
开发者ID:xaviervasques,项目名称:Neuron_Morpho_Classification_ML,代码行数:36,代码来源:supervised_split_test.py

示例9: reduceRF

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def reduceRF(label):
  global x_data_rf_reduced, importantFeatureLocs
  model = ExtraTreesClassifier()
  model.fit(x_data, y_data[:, label])

  # the relative importance of each attribute
  importance = model.feature_importances_
  weight = float(0)
  del importantFeatureLocs[:] # reset
  #print(importance)  

  for ele in np.sort(importance)[::-1]:
    weight += float(ele)
    featureIndex = np.where(importance==ele)
    for loc in featureIndex[0]:
      importantFeatureLocs.append(loc)
  
    if weight > RFThreshold :
      break
  
  # remove duplications
  importantFeatureLocs = list(set(importantFeatureLocs))

  # extracting relevant columns from input data. Note that importantFeatureLocs
  # may be unsorted (since python 'set' is unsorted), so features are extracted
  # in unorderd fashion. This info is stored in the softmax model class
  x_data_rf_reduced = x_data[:, importantFeatureLocs]
开发者ID:tgangwani,项目名称:DynReconfig,代码行数:29,代码来源:learn.py

示例10: MyExtraTree

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
class MyExtraTree(MyClassifier):
    def __init__(self, params=dict()):
        self._params = params
        self._extree = ExtraTreesClassifier(**(self._params))

    def update_params(self, updates):
        self._params.update(updates)
        self._extree = ExtraTreesClassifier(**(self._params))

    def fit(self, Xtrain, ytrain):
        self._extree.fit(Xtrain, ytrain)

    # def predict(self, Xtest, option = None):
    #   return self._extree.predict(Xtest)

    def predict_proba(self, Xtest, option = None):
        return self._extree.predict_proba(Xtest)[:, 1]

    def predict_proba_multi(self, Xtest, option = None):
        return self._extree.predict_proba(Xtest)

    def plt_feature_importance(self, fname_list, f_range = list()):
        importances = self._extree.feature_importances_

        std = np.std([tree.feature_importances_ for tree in self._extree.estimators_], axis=0)
        indices = np.argsort(importances)[::-1]

        fname_array = np.array(fname_list)

        if not f_range:
            f_range = range(indices.shape[0])

        n_f = len(f_range)

        plt.figure()
        plt.title("Extra Tree Feature importances")
        plt.barh(range(n_f), importances[indices[f_range]],
               color="b", xerr=std[indices[f_range]], ecolor='k',align="center")
        plt.yticks(range(n_f), fname_array[indices[f_range]])
        plt.ylim([-1, n_f])
        plt.show()


    def list_feature_importance(self, fname_list, f_range = list(), return_list = False):
        importances = self._extree.feature_importances_
        indices = np.argsort(importances)[::-1]

        print 'Extra tree feature ranking:'

        if not f_range :
            f_range = range(indices.shape[0])

        n_f = len(f_range)

        for i in range(n_f):
            f = f_range[i]
            print '{0:d}. feature[{1:d}]  {2:s}  ({3:f})'.format(f + 1, indices[f], fname_list[indices[f]], importances[indices[f]])

        if return_list:
            return [indices[f_range[i]] for i in range(n_f)]
开发者ID:tonyzhangrt,项目名称:wklearn,代码行数:62,代码来源:learner.py

示例11: fit

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
    def fit(self, X, Y, sample_weight=None):
        from sklearn.ensemble import ExtraTreesClassifier
        from sklearn.feature_selection import SelectFromModel

        num_features = X.shape[1]
        max_features = int(float(self.max_features) * (np.log(num_features) + 1))
        # Use at most half of the features
        max_features = max(1, min(int(X.shape[1] / 2), max_features))
        estimator = ExtraTreesClassifier(
            n_estimators=self.n_estimators,
            criterion=self.criterion,
            max_depth=self.max_depth,
            min_samples_split=self.min_samples_split,
            min_samples_leaf=self.min_samples_leaf,
            bootstrap=self.bootstrap,
            max_features=max_features,
            max_leaf_nodes=self.max_leaf_nodes,
            oob_score=self.oob_score,
            n_jobs=self.n_jobs,
            verbose=self.verbose,
            random_state=self.random_state,
            class_weight=self.class_weight,
        )
        estimator.fit(X, Y, sample_weight=sample_weight)
        self.preprocessor = SelectFromModel(estimator=estimator, threshold="mean", prefit=True)
        return self
开发者ID:automl,项目名称:auto-sklearn,代码行数:28,代码来源:extra_trees_preproc_for_classification.py

示例12: _cascade_layer

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
    def _cascade_layer(self, X, y=None, layer=0):
        n_tree = getattr(self, 'n_cascadeRFtree')
        n_cascadeRF = getattr(self, 'n_cascadeRF')
        min_samples = getattr(self, 'min_samples_cascade')

        prf = RandomForestClassifier(
            n_estimators=100, max_features=8,
            bootstrap=True, criterion="entropy", min_samples_split=20,
            max_depth=None, class_weight='balanced', oob_score=True)
        crf = ExtraTreesClassifier(
            n_estimators=100, max_depth=None,
            bootstrap=True, oob_score=True)

        prf_pred = []
        if y is not None:
            # print('Adding/Training Layer, n_layer={}'.format(self.n_layer))
            for irf in range(n_cascadeRF):
                prf.fit(X, y)
                crf.fit(X, y)
                setattr(self, '_casprf{}_{}'.format(self.n_layer, irf), prf)
                setattr(self, '_cascrf{}_{}'.format(self.n_layer, irf), crf)
                probas = prf.oob_decision_function_
                probas += crf.oob_decision_function_
                prf_pred.append(probas)
        elif y is None:
            for irf in range(n_cascadeRF):
                prf = getattr(self, '_casprf{}_{}'.format(layer, irf))
                crf = getattr(self, '_cascrf{}_{}'.format(layer, irf))
                probas = prf.predict_proba(X)
                probas += crf.predict_proba(X)
                prf_pred.append(probas)

        return prf_pred
开发者ID:TinghuiWang,项目名称:pyActLearn,代码行数:35,代码来源:gcforest.py

示例13: calc_prob

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def calc_prob(df_features_driver, df_features_other):

    df_train = df_features_driver.append(df_features_other)
    df_train.reset_index(inplace = True)
    df_train.Driver = df_train.Driver.astype(int)

    # So far, the best result was achieved by using a RandomForestClassifier with Bagging
    # model = BaggingClassifier(base_estimator = ExtraTreesClassifier())
    # model = BaggingClassifier(base_estimator = svm.SVC(gamma=2, C=1))
    # model = BaggingClassifier(base_estimator = linear_model.LogisticRegression())
    # model = BaggingClassifier(base_estimator = linear_model.LogisticRegression())
    # model = BaggingClassifier(base_estimator = AdaBoostClassifier())
    #model = RandomForestClassifier(200)
    # model = BaggingClassifier(base_estimator = [RandomForestClassifier(), linear_model.LogisticRegression()])
    # model = EnsembleClassifier([BaggingClassifier(base_estimator = RandomForestClassifier()),
    #                             GradientBoostingClassifier])
    #model = GradientBoostingClassifier(n_estimators = 10000)
    model = ExtraTreesClassifier(n_estimators=100,max_features='auto',random_state=0, n_jobs=2, criterion='entropy', bootstrap=True)
    # model = ExtraTreesClassifier(500, criterion='entropy')

    feature_columns = df_train.iloc[:, 4:]

    # Train the classifier
    model.fit(feature_columns, df_train.Driver)
    df_submission = pd.DataFrame()

    df_submission['driver_trip'] = create_first_column(df_features_driver)

    probs_array = model.predict_proba(feature_columns[:200]) # Return array with the probability for every driver
    probs_df = pd.DataFrame(probs_array)

    df_submission['prob'] = np.array(probs_df.iloc[:, 1])

    return df_submission
开发者ID:EdwardBetts,项目名称:awesome-kagg-ml,代码行数:36,代码来源:Step_3_ExtraTrees.py

示例14: tree

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def tree(train_data, train_labels, all_bigrams, task):
	forest = ExtraTreesClassifier(n_estimators=100, random_state=0)
	forest.fit(train_data, train_labels)
	importances = forest.feature_importances_
	indices = np.argsort(importances)[::-1]

	# Print the feature ranking
	print "-"*45
	print task

	for f in range(20):
	  print("%d. feature, name: %s, importance: %f" % (f + 1, all_bigrams[indices[f]], importances[indices[f]]))

	# Plot the feature importances of the forest
	pl.figure()
	n = train_data.shape[1]
	n = 2000
	pl.title("Sorted feature importance for %s" %(task))
	pl.bar(range(n), importances[indices][:n], color="black", align="center")
	pl.xlim([0, (n)])
	pl.xticks([num for num  in range(0, n+1, 250)])
	pl.savefig(task+'.pdf', bbox_inches='tight')
	print "plot saved"

	return indices
开发者ID:MariaBarrett,项目名称:LPIIExam,代码行数:27,代码来源:ngram.py

示例15: train_classifiers

# 需要导入模块: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
# 或者: from sklearn.ensemble.ExtraTreesClassifier import fit [as 别名]
def train_classifiers(X_data, y_data):
    ############ Linear SVM: 0.908 #############
    clf_LSVM = svm.SVC(kernel = 'linear')
    clf_LSVM.fit(X_data, y_data)
    
    ############ MultinomialNB: 0.875 #############
    clf_MNB = MultinomialNB()
    clf_MNB.fit(X_data, y_data)
    
    ############ Random Forest: 0.910 #############
    clf_RF = RandomForestClassifier(n_estimators=200, criterion='entropy')
    clf_RF.fit(X_data, y_data)
    
    ############ Extra Tree: 0.915 ##################
    clf_ETC = ExtraTreesClassifier(n_estimators=500, max_depth=None, min_samples_split=1, random_state=0)
    clf_ETC.fit(X_data, y_data)
    
    ############ AdaBoost: 0.88 ##################
    clf_Ada = AdaBoostClassifier()
    clf_Ada.fit(X_data, y_data)
    
    ############ rbf SVM: 0.895 #############
    clf_rbf = svm.SVC(C=200, gamma=0.06, kernel='rbf')
    clf_rbf.fit(X_data, y_data)
    
    ############ GradientBoosting: 0.88 #############
    clf_GBC = GradientBoostingClassifier()
    clf_GBC.fit(X_data, y_data)
    
    return clf_LSVM, clf_MNB, clf_RF, clf_ETC, clf_Ada, clf_rbf, clf_GBC    
开发者ID:yiwuxie15,项目名称:yiwuxie15-urlBootstrap,代码行数:32,代码来源:urlAnalysis.py


注:本文中的sklearn.ensemble.ExtraTreesClassifier.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。