Python ensemble.ExtraTreesClassifier方法代码示例

本文整理汇总了Python中sklearn.ensemble.ExtraTreesClassifier方法的典型用法代码示例。如果您正苦于以下问题：Python ensemble.ExtraTreesClassifier方法的具体用法？Python ensemble.ExtraTreesClassifier怎么用？Python ensemble.ExtraTreesClassifier使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble的用法示例。

在下文中一共展示了ensemble.ExtraTreesClassifier方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: create_model_from_signatures

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def create_model_from_signatures(sig_csv_path, model_out, sig_datatype=np.int32):
    """
    Takes a .csv file containing class signatures - produced by extract_features_to_csv - and uses it to train
    and pickle a scikit-learn model.

    Parameters
    ----------
    sig_csv_path
        The path to the signatures file
    model_out
        The location to save the pickled model to.
    sig_datatype
        The datatype to read the csv as. Defaults to int32.

    Notes
    -----
    At present, the model is an ExtraTreesClassifier arrived at by tpot:
    model = ens.ExtraTreesClassifier(bootstrap=False, criterion="gini", max_features=0.55, min_samples_leaf=2,
                                 min_samples_split=16, n_estimators=100, n_jobs=4, class_weight='balanced')
    """
    model = ens.ExtraTreesClassifier(bootstrap=False, criterion="gini", max_features=0.55, min_samples_leaf=2,
                                     min_samples_split=16, n_estimators=100, n_jobs=4, class_weight='balanced')
    features, labels = load_signatures(sig_csv_path, sig_datatype)
    model.fit(features, labels)
    joblib.dump(model, model_out)

开发者ID:clcr，项目名称:pyeo，代码行数:27，代码来源:classification.py

示例2: init

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def __init__(self, params):
        super(ExtraTreesAlgorithm, self).__init__(params)
        logger.debug("ExtraTreesAlgorithm.__init__")

        self.library_version = sklearn.__version__
        self.trees_in_step = additional.get("trees_in_step", 100)
        self.max_steps = additional.get("max_steps", 50)
        self.early_stopping_rounds = additional.get("early_stopping_rounds", 50)
        self.model = ExtraTreesClassifier(
            n_estimators=self.trees_in_step,
            criterion=params.get("criterion", "gini"),
            max_features=params.get("max_features", 0.6),
            min_samples_split=params.get("min_samples_split", 30),
            warm_start=True,
            n_jobs=-1,
            random_state=params.get("seed", 1),
        )

开发者ID:mljar，项目名称:mljar-supervised，代码行数:19，代码来源:extra_trees.py

示例3: run_sklearn

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def run_sklearn():
  n_trees = 100
  n_folds = 3

  # https://www.analyticsvidhya.com/blog/2015/06/tuning-random-forest-model/
  alg_list = [
      ['lreg',LinearRegression()],
      ['rforest',RandomForestRegressor(n_estimators=1000, n_jobs=-1, max_depth=3)],
      ['extree',ExtraTreesClassifier(n_estimators = 1000,max_depth=2)],
      ['adaboost',AdaBoostRegressor(base_estimator=None, n_estimators=600, learning_rate=1.0)],
      ['knn', sklearn.neighbors.KNeighborsRegressor(n_neighbors=5)]
  ]

  start_time = time.time()
  for name,alg in alg_list:
      train = jhkaggle.train_sklearn.TrainSKLearn("1",name,alg,False)
      train.run()
      train = None
  elapsed_time = time.time() - start_time
  print("Elapsed time: {}".format(jhkaggle.util.hms_string(elapsed_time)))

开发者ID:jeffheaton，项目名称:jh-kaggle-util，代码行数:22，代码来源:models.py

示例4: run_sklearn

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def run_sklearn():
  n_trees = 100
  n_folds = 3

  # https://www.analyticsvidhya.com/blog/2015/06/tuning-random-forest-model/
  alg_list = [
      ['rforest',RandomForestClassifier(n_estimators=1000, n_jobs=-1, verbose=1, max_depth=3)],
      ['extree',ExtraTreesClassifier(n_estimators = 1000,max_depth=3,n_jobs=-1)],
      ['adaboost',AdaBoostClassifier(base_estimator=None, n_estimators=600, learning_rate=1.0)],
      ['knn', sklearn.neighbors.KNeighborsClassifier(n_neighbors=5,n_jobs=-1)]
  ]

  start_time = time.time()
  for name,alg in alg_list:
      train = jhkaggle.train_sklearn.TrainSKLearn("1",name,alg,False)
      train.run()
      train = None

开发者ID:jeffheaton，项目名称:jh-kaggle-util，代码行数:19，代码来源:models.py

示例5: test_time

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def test_time(pipeline_name, name, path):
    if pipeline_name == "LR":
        pipeline = make_pipeline(LogisticRegression())

    if pipeline_name == "FGS":
        pipeline = make_pipeline(FeatureGradientSelector(), LogisticRegression())

    if pipeline_name == "Tree":
        pipeline = make_pipeline(SelectFromModel(ExtraTreesClassifier(n_estimators=50)), LogisticRegression())
    
    test_benchmark = Benchmark()
    print("Dataset:\t", name)
    print("Pipeline:\t", pipeline_name)
    starttime = datetime.datetime.now()
    test_benchmark.run_test(pipeline, name, path)
    endtime = datetime.datetime.now()
    print("Used time: ", (endtime - starttime).microseconds/1000)
    print("")

开发者ID:microsoft，项目名称:nni，代码行数:20，代码来源:benchmark_test.py

示例6: test

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def test():
    url_zip_train = 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/rcv1_train.binary.bz2'
    urllib.request.urlretrieve(url_zip_train, filename='train.bz2')

    f_svm = open('train.svm', 'wt')
    with bz2.open('train.bz2', 'rb') as f_zip:
        data = f_zip.read()
        f_svm.write(data.decode('utf-8'))
    f_svm.close()


    X, y = load_svmlight_file('train.svm')
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)


    pipeline = make_pipeline(FeatureGradientSelector(n_epochs=1, n_features=10), LogisticRegression())
    # pipeline = make_pipeline(SelectFromModel(ExtraTreesClassifier(n_estimators=50)), LogisticRegression())

    pipeline.fit(X_train, y_train)

    print("Pipeline Score: ", pipeline.score(X_train, y_train))

开发者ID:microsoft，项目名称:nni，代码行数:23，代码来源:sklearn_test.py

示例7: init

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(
            options.get('params', {}),
            ints=['random_state', 'n_estimators', 'max_depth',
                  'min_samples_split', 'max_leaf_nodes'],
            strs=['max_features', 'criterion'],
        )

        if 'max_depth' not in out_params:
            out_params.setdefault('max_leaf_nodes', 2000)

        if 'max_features' in out_params:
            out_params['max_features'] = handle_max_features(out_params['max_features'])

        self.estimator = _ExtraTreesClassifier(class_weight='balanced',
                                                 **out_params)

开发者ID:splunk，项目名称:mltk-algo-contrib，代码行数:20，代码来源:ExtraTreesClassifier.py

示例8: init

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def __init__(
        self,data_block, predictors=[],cv_folds=10,
        scoring_metric='accuracy',additional_display_metrics=[]):

        base_classification.__init__(
            self, alg=ExtraTreesClassifier(), data_block=data_block, 
            predictors=predictors,cv_folds=cv_folds,
            scoring_metric=scoring_metric, 
            additional_display_metrics=additional_display_metrics)

        self.model_output = pd.Series(self.default_parameters)
        self.model_output['Feature_Importance'] = "-"
        self.model_output['OOB_Score'] = "-"

        #Set parameters to default values:
        self.set_parameters(set_default=True)

开发者ID:aarshayj，项目名称:easyML，代码行数:18，代码来源:models_classification.py

示例9: define_clfs_params

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def define_clfs_params(self):
        '''
        Defines all relevant parameters and classes for classfier objects.
        Edit these if you wish to change parameters.
        '''
        # These are the classifiers
        self.clfs = {
            'RF': RandomForestClassifier(n_estimators = 50, n_jobs = -1),
            'ET': ExtraTreesClassifier(n_estimators = 10, n_jobs = -1, criterion = 'entropy'),
            'AB': AdaBoostClassifier(DecisionTreeClassifier(max_depth = [1, 5, 10, 15]), algorithm = "SAMME", n_estimators = 200),
            'LR': LogisticRegression(penalty = 'l1', C = 1e5),
            'SVM': svm.SVC(kernel = 'linear', probability = True, random_state = 0),
            'GB': GradientBoostingClassifier(learning_rate = 0.05, subsample = 0.5, max_depth = 6, n_estimators = 10),
            'NB': GaussianNB(),
            'DT': DecisionTreeClassifier(),
            'SGD': SGDClassifier(loss = 'log', penalty = 'l2'),
            'KNN': KNeighborsClassifier(n_neighbors = 3)
            }
        # These are the parameters which will be run through
        self.params = {
             'RF':{'n_estimators': [1,10,100,1000], 'max_depth': [10, 15,20,30,40,50,60,70,100], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'LR': {'penalty': ['l1','l2'], 'C': [0.00001,0.0001,0.001,0.01,0.1,1,10], 'random_state': [1]},
             'SGD': {'loss': ['log'], 'penalty': ['l2','l1','elasticnet'], 'random_state': [1]},
             'ET': {'n_estimators': [1,10,100,1000], 'criterion' : ['gini', 'entropy'], 'max_depth': [1,3,5,10,15], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'AB': {'algorithm': ['SAMME', 'SAMME.R'], 'n_estimators': [1,10,100,1000], 'random_state': [1]},
             'GB': {'n_estimators': [1,10,100,1000], 'learning_rate' : [0.001,0.01,0.05,0.1,0.5],'subsample' : [0.1,0.5,1.0], 'max_depth': [1,3,5,10,20,50,100], 'random_state': [1]},
             'NB': {},
             'DT': {'criterion': ['gini', 'entropy'], 'max_depth': [1,2,15,20,30,40,50], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'SVM' :{'C' :[0.00001,0.0001,0.001,0.01,0.1,1,10],'kernel':['linear'], 'random_state': [1]},
             'KNN' :{'n_neighbors': [1,5,10,25,50,100],'weights': ['uniform','distance'],'algorithm': ['auto','ball_tree','kd_tree']}
             }

开发者ID:aldengolab，项目名称:fake-news-detection，代码行数:33，代码来源:model_loop.py

示例10: test_change_algorithms

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def test_change_algorithms():
    X, y = make_classification(n_samples=1000,
                               n_features=10,
                               n_classes=2,
                               n_clusters_per_class=1,
                               random_state=0)
    X = pd.DataFrame(X)
    y = pd.Series(y)
    cls = MALSS('classification')
    cls.fit(X, y, algorithm_selection_only=True)
    algorithms = cls.get_algorithms()

    assert algorithms[0][0] == 'Support Vector Machine (RBF Kernel)'
    assert algorithms[1][0] == 'Random Forest'
    assert algorithms[2][0] == 'Logistic Regression'
    assert algorithms[3][0] == 'Decision Tree'
    assert algorithms[4][0] == 'k-Nearest Neighbors'

    cls.remove_algorithm(0)
    cls.remove_algorithm()
    algorithms = cls.get_algorithms()
    assert algorithms[0][0] == 'Random Forest'
    assert algorithms[1][0] == 'Logistic Regression'
    assert algorithms[2][0] == 'Decision Tree'

    from sklearn.ensemble import ExtraTreesClassifier as ET
    cls.add_algorithm(ET(n_jobs=3),
                      [{'n_estimators': [10, 30, 50],
                        'max_depth': [3, 5, None],
                        'max_features': [0.3, 0.6, 'auto']}],
                      'Extremely Randomized Trees')
    algorithms = cls.get_algorithms()
    assert algorithms[0][0] == 'Random Forest'
    assert algorithms[1][0] == 'Logistic Regression'
    assert algorithms[2][0] == 'Decision Tree'
    assert algorithms[3][0] == 'Extremely Randomized Trees'

开发者ID:canard0328，项目名称:malss，代码行数:38，代码来源:test.py

示例11: test_min_impurity_split

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def test_min_impurity_split():
    # Test if min_impurity_split of base estimators is set
    # Regression test for #8006
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    all_estimators = [RandomForestClassifier, RandomForestRegressor,
                      ExtraTreesClassifier, ExtraTreesRegressor]

    for Estimator in all_estimators:
        est = Estimator(min_impurity_split=0.1)
        est = assert_warns_message(DeprecationWarning, "min_impurity_decrease",
                                   est.fit, X, y)
        for tree in est.estimators_:
            assert_equal(tree.min_impurity_split, 0.1)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:15，代码来源:test_forest.py

示例12: test_min_impurity_decrease

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def test_min_impurity_decrease():
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    all_estimators = [RandomForestClassifier, RandomForestRegressor,
                      ExtraTreesClassifier, ExtraTreesRegressor]

    for Estimator in all_estimators:
        est = Estimator(min_impurity_decrease=0.1)
        est.fit(X, y)
        for tree in est.estimators_:
            # Simply check if the parameter is passed on correctly. Tree tests
            # will suffice for the actual working of this param
            assert_equal(tree.min_impurity_decrease, 0.1)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:14，代码来源:test_forest.py

示例13: fit

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def fit(self, X, y, sample_weight=None, eval_set=None, sample_weight_eval_set=None, **kwargs):
        orig_cols = list(X.names)
        if self.num_classes >= 2:
            lb = LabelEncoder()
            lb.fit(self.labels)
            y = lb.transform(y)
            model = ExtraTreesClassifier(**self.params)
        else:
            model = ExtraTreesRegressor(**self.params)

        # Replace missing values with a value smaller than all observed values
        self.min = dict()
        for col in X.names:
            XX = X[:, col]
            self.min[col] = XX.min1()
            if self.min[col] is None or np.isnan(self.min[col]):
                self.min[col] = -1e10
            else:
                self.min[col] -= 1
            XX.replace(None, self.min[col])
            X[:, col] = XX
            assert X[dt.isna(dt.f[col]), col].nrows == 0
        X = X.to_numpy()

        model.fit(X, y)
        importances = np.array(model.feature_importances_)
        self.set_model_properties(model=model,
                                  features=orig_cols,
                                  importances=importances.tolist(),
                                  iterations=self.params['n_estimators'])

开发者ID:h2oai，项目名称:driverlessai-recipes，代码行数:32，代码来源:extra_trees.py

示例14: random_forest

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def random_forest(train_vecs,y_train,test_vecs,y_test):
    clf = RandomForestClassifier(n_estimators=10, max_depth=10,min_samples_split=2,n_jobs=1,random_state=0)
    clf.fit(train_vecs,y_train)
    joblib.dump(clf,storedpaths+'model_randomforest.pkl')
    test_scores=clf.score(test_vecs,y_test)
    return test_scores
    
# 训练 ExtraTreesClassifier 分类算法

开发者ID:ruanyangry，项目名称:Sentiment_Analysis_cnn_lstm_cnnlstm_textcnn_bilstm，代码行数:10，代码来源:sentiment_analysis_ml.py

示例15: extract_tree

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def extract_tree(train_vecs,y_train,test_vecs,y_test):
    clf = ExtraTreesClassifier(n_estimators=10, max_depth=10,min_samples_split=2,n_jobs=1,random_state=0)
    clf.fit(train_vecs,y_train)
    joblib.dump(clf,storedpaths+'model_extracttree.pkl')
    test_scores=clf.score(test_vecs,y_test)
    return test_scores
    
# 训练 GBDT 分类算法