当前位置: 首页>>代码示例>>Python>>正文


Python ensemble.RandomForestClassifier方法代码示例

本文整理汇总了Python中sklearn.ensemble.RandomForestClassifier方法的典型用法代码示例。如果您正苦于以下问题:Python ensemble.RandomForestClassifier方法的具体用法?Python ensemble.RandomForestClassifier怎么用?Python ensemble.RandomForestClassifier使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.ensemble的用法示例。


在下文中一共展示了ensemble.RandomForestClassifier方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: mmb_evaluate_model

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def mmb_evaluate_model(self):
        """
        Returns scores from cross validation evaluation on the malicious / benign classifier
        """
        predictive_features = self.features['predictive_features']
        self.clf_X = self.modeldata[predictive_features].values
        self.clf_y = np.array(self.modeldata['label'])

        X_train, X_test, y_train, y_test = train_test_split(self.clf_X, self.clf_y, test_size=0.2, random_state=0)
        lb = LabelBinarizer()
        y_train = np.array([number[0] for number in lb.fit_transform(y_train)])
        eval_cls = RandomForestClassifier(n_estimators=100, max_features=.2)
        eval_cls.fit(X_train, y_train)

        recall = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='recall')
        precision = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='precision')
        accuracy = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='accuracy')
        f1_score = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='f1_macro')

        return {'accuracy': accuracy, 'f1': f1_score, 'precision': precision, 'recall': recall} 
开发者ID:egaus,项目名称:MaliciousMacroBot,代码行数:22,代码来源:mmbot.py

示例2: __init__

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def __init__(self, model_type='classifier', feature_type='fingerprints',
                 n_estimators=100, n_ensemble=5):
        super(RandomForestQSAR, self).__init__()
        self.n_estimators = n_estimators
        self.n_ensemble = n_ensemble
        self.model = []
        self.model_type = model_type
        if self.model_type == 'classifier':
            for i in range(n_ensemble):
                self.model.append(RFC(n_estimators=n_estimators))
        elif self.model_type == 'regressor':
            for i in range(n_ensemble):
                self.model.append(RFR(n_estimators=n_estimators))
        else:
            raise ValueError('invalid value for argument')
        self.feature_type = feature_type
        if self.feature_type == 'descriptors':
            self.calc = Calculator(descriptors, ignore_3D=True)
            self.desc_mean = [0]*self.n_ensemble 
开发者ID:Mariewelt,项目名称:OpenChem,代码行数:21,代码来源:vanilla_model.py

示例3: __init__

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def __init__(self, outputs, inputs, k=None, hypers=None, params=None,
            distargs=None, rng=None):
        self.rng = gu.gen_rng() if rng is None else rng
        self.outputs = outputs
        self.inputs = inputs
        self.rng = gu.gen_rng() if rng is None else rng
        assert len(self.outputs) == 1
        assert len(self.inputs) >= 1
        assert self.outputs[0] not in self.inputs
        assert len(distargs['inputs']['stattypes']) == len(self.inputs)
        self.stattypes = distargs['inputs']['stattypes']
        # Number of output categories and input dimension.
        # XXX WHATTA HACK. BayesDB passes in top-level kwargs, not in distargs.
        self.k = k if k is not None else int(distargs['k'])
        self.p = len(distargs['inputs']['stattypes'])
        # Sufficient statistics.
        self.N = 0
        self.data = Data(x=OrderedDict(), Y=OrderedDict())
        self.counts = [0] * self.k
        # Outlier and random forest parameters.
        if params is None: params = {}
        self.alpha = params.get('alpha', .1)
        self.regressor = params.get('forest', None)
        if self.regressor is None:
            self.regressor = RandomForestClassifier(random_state=self.rng) 
开发者ID:probcomp,项目名称:cgpm,代码行数:27,代码来源:forest.py

示例4: test_sklearn_classification_overfit

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def test_sklearn_classification_overfit(self):
    """Test that sklearn models can overfit simple classification datasets."""
    n_samples = 10
    n_features = 3
    n_tasks = 1

    # Generate dummy dataset
    np.random.seed(123)
    ids = np.arange(n_samples)
    X = np.random.rand(n_samples, n_features)
    y = np.random.randint(2, size=(n_samples, n_tasks))
    w = np.ones((n_samples, n_tasks))
    dataset = dc.data.NumpyDataset(X, y, w, ids)

    classification_metric = dc.metrics.Metric(dc.metrics.roc_auc_score)
    sklearn_model = RandomForestClassifier()
    model = dc.models.SklearnModel(sklearn_model)

    # Fit trained model
    model.fit(dataset)
    model.save()

    # Eval model on train
    scores = model.evaluate(dataset, [classification_metric])
    assert scores[classification_metric.name] > .9 
开发者ID:deepchem,项目名称:deepchem,代码行数:27,代码来源:test_overfit.py

示例5: trainFunctionTypeClassifier

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def trainFunctionTypeClassifier(self, scs):
        """Train the type classifier, according to all known code segments.

        Args:
            scs (list): list of all known (sark) code segments

        Note:
            Training must happen *after* the calibration phase
        """
        functions = []
        for sc in scs:
            functions += list(filter(lambda func: not self._analyzer.fptr_identifier.isPointedFunction(func.start_ea), sc.functions))
        clf = RandomForestClassifier(n_estimators=100)
        eas = list(map(lambda x: x.start_ea, functions))
        data_set = list(map(self.extractFunctionTypeSample, eas))
        data_results = list(map(self._analyzer.codeType, eas))
        # classify
        clf.fit(data_set, data_results)
        # store the results
        self._type_classifier = clf 
开发者ID:CheckPointSW,项目名称:Karta,代码行数:22,代码来源:function.py

示例6: buildModel

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def buildModel(dataset, method, parameters):
    """
    Build final model for predicting real testing data
    """
    features = dataset.columns[0:-1]

    if method == 'RNN':
        clf = performRNNlass(dataset[features], dataset['UpDown'])
        return clf

    elif method == 'RF':
        clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1)

    elif method == 'KNN':
        clf = neighbors.KNeighborsClassifier()

    elif method == 'SVM':
        c = parameters[0]
        g =  parameters[1]
        clf = SVC(C=c, gamma=g)

    elif method == 'ADA':
        clf = AdaBoostClassifier()

    return clf.fit(dataset[features], dataset['UpDown']) 
开发者ID:chinuy,项目名称:stock-price-prediction,代码行数:27,代码来源:classifier.py

示例7: Train

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def Train(data, treecount, tezh, yanzhgdata):
    model = RFC(n_estimators=treecount, max_features=tezh, class_weight='balanced')
    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算MSE
    train_mse = fmse(data[:, -1], train_out)[0]

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算f1度量
    add_mse = fmse(yanzhgdata[:, -1], add_yan)[0]
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数 
开发者ID:Anfany,项目名称:Machine-Learning-for-Beginner-by-Python3,代码行数:18,代码来源:adult_RF_Classify.py

示例8: make_pipeline

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def make_pipeline(encoding_method):
    # static transformers from the other columns
    transformers = [('one-hot-clean', encoder_dict['one-hot'], clean_columns)]
    # adding the encoded column
    transformers += [(encoding_method + '-dirty', encoder_dict[encoding_method],
                      [dirty_column])]
    pipeline = Pipeline([
        # Use ColumnTransformer to combine the features
        ('union', ColumnTransformer(
            transformers=transformers,
            remainder='drop')),
        ('scaler', StandardScaler(with_mean=False)),
        ('classifier', RandomForestClassifier(random_state=5))
    ])

    return pipeline


###############################################################################
# Evaluation of different encoding methods
# -----------------------------------------
# We then loop over encoding methods, scoring the different pipeline predictions
# using a cross validation score: 
开发者ID:dirty-cat,项目名称:dirty_cat,代码行数:25,代码来源:03_fit_predict_plot_midwest_survey.py

示例9: build_model

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def build_model(self, X_train, y_train):
        if self.paras.load == True:
            model = self.load_training_model(self.paras.window_len)
            if model != None:
                return model

        print('build Random Forrest model...')

        # range of number of trees : 5*(1 -> 10) = 5,10,...,50 trees
        t_min = self.paras.tree_min[index]
        t_max = self.paras.tree_max[index]
        # range of max of features : 1 -> 10 features
        f_min = self.paras.feature_min[index]
        f_max = self.paras.feature_max[index]
        # range of window : 1 -> 70 days 
        w_min = self.paras.window_min
        w_max = self.paras.window_max
        
        w_opt, n_opt, m_opt = self.best_window(X_train, y_train, w_min,w_max,t_min,t_max,f_min,f_max)
        model = RandomForestClassifier(n_estimators=n_opt,max_features=m_opt, n_jobs=8, verbose=self.paras.verbose)
        return model 
开发者ID:doncat99,项目名称:StockRecommendSystem,代码行数:23,代码来源:Stock_Prediction_Model_Random_Forrest.py

示例10: test_run

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def test_run(self):
        self.input_data['item2embedding'] = dict(i0=[1, 2], i1=[3, 4])
        self.input_data['similarity_data'] = pd.DataFrame(
            dict(item1=['i0', 'i0', 'i1'], item2=['i0', 'i1', 'i1'], similarity=[1, 0, 1]))

        task = TrainPairwiseSimilarityModel(
            item2embedding_task=_DummyTask(),
            similarity_data_task=_DummyTask(),
            model_name='RandomForestClassifier',
            item0_column_name='item1',
            item1_column_name='item2',
            similarity_column_name='similarity')
        task.load = MagicMock(side_effect=self._load)
        task.dump = MagicMock(side_effect=self._dump)

        task.run()
        self.assertIsInstance(self.dump_data, RandomForestClassifier) 
开发者ID:m3dev,项目名称:redshells,代码行数:19,代码来源:test_train_pairwise_similarity_model.py

示例11: create_random_forest_tfidf

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def create_random_forest_tfidf():
    vectorizer = TfidfVectorizer(lowercase=False)
    rf = RandomForestClassifier(n_estimators=500, random_state=777)
    return Pipeline([("vectorizer", vectorizer), ("rf", rf)]) 
开发者ID:interpretml,项目名称:interpret-text,代码行数:6,代码来源:common_utils.py

示例12: create_random_forest_vectorizer

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def create_random_forest_vectorizer():
    vectorizer = CountVectorizer(lowercase=False, min_df=0.0, binary=True)
    rf = RandomForestClassifier(n_estimators=500, random_state=777)
    return Pipeline([("vectorizer", vectorizer), ("rf", rf)]) 
开发者ID:interpretml,项目名称:interpret-text,代码行数:6,代码来源:common_utils.py

示例13: create_sklearn_random_forest_classifier

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def create_sklearn_random_forest_classifier(X, y):
    rfc = ensemble.RandomForestClassifier(max_depth=4, random_state=777)
    model = rfc.fit(X, y)
    return model 
开发者ID:interpretml,项目名称:interpret-text,代码行数:6,代码来源:common_utils.py

示例14: build_models

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def build_models(self):
        """
        After get_language_features is called, this function builds the models based on
        the classifier matrix and labels.
        :return:
        """
        self.cls = RandomForestClassifier(n_estimators=100, max_features=.2)
        # build classifier
        self.cls.fit(self.clf_X, self.clf_y)

        return self.cls 
开发者ID:egaus,项目名称:MaliciousMacroBot,代码行数:13,代码来源:mmbot.py

示例15: define_clfs_params

# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def define_clfs_params(self):
        '''
        Defines all relevant parameters and classes for classfier objects.
        Edit these if you wish to change parameters.
        '''
        # These are the classifiers
        self.clfs = {
            'RF': RandomForestClassifier(n_estimators = 50, n_jobs = -1),
            'ET': ExtraTreesClassifier(n_estimators = 10, n_jobs = -1, criterion = 'entropy'),
            'AB': AdaBoostClassifier(DecisionTreeClassifier(max_depth = [1, 5, 10, 15]), algorithm = "SAMME", n_estimators = 200),
            'LR': LogisticRegression(penalty = 'l1', C = 1e5),
            'SVM': svm.SVC(kernel = 'linear', probability = True, random_state = 0),
            'GB': GradientBoostingClassifier(learning_rate = 0.05, subsample = 0.5, max_depth = 6, n_estimators = 10),
            'NB': GaussianNB(),
            'DT': DecisionTreeClassifier(),
            'SGD': SGDClassifier(loss = 'log', penalty = 'l2'),
            'KNN': KNeighborsClassifier(n_neighbors = 3)
            }
        # These are the parameters which will be run through
        self.params = {
             'RF':{'n_estimators': [1,10,100,1000], 'max_depth': [10, 15,20,30,40,50,60,70,100], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'LR': {'penalty': ['l1','l2'], 'C': [0.00001,0.0001,0.001,0.01,0.1,1,10], 'random_state': [1]},
             'SGD': {'loss': ['log'], 'penalty': ['l2','l1','elasticnet'], 'random_state': [1]},
             'ET': {'n_estimators': [1,10,100,1000], 'criterion' : ['gini', 'entropy'], 'max_depth': [1,3,5,10,15], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'AB': {'algorithm': ['SAMME', 'SAMME.R'], 'n_estimators': [1,10,100,1000], 'random_state': [1]},
             'GB': {'n_estimators': [1,10,100,1000], 'learning_rate' : [0.001,0.01,0.05,0.1,0.5],'subsample' : [0.1,0.5,1.0], 'max_depth': [1,3,5,10,20,50,100], 'random_state': [1]},
             'NB': {},
             'DT': {'criterion': ['gini', 'entropy'], 'max_depth': [1,2,15,20,30,40,50], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'SVM' :{'C' :[0.00001,0.0001,0.001,0.01,0.1,1,10],'kernel':['linear'], 'random_state': [1]},
             'KNN' :{'n_neighbors': [1,5,10,25,50,100],'weights': ['uniform','distance'],'algorithm': ['auto','ball_tree','kd_tree']}
             } 
开发者ID:aldengolab,项目名称:fake-news-detection,代码行数:33,代码来源:model_loop.py


注:本文中的sklearn.ensemble.RandomForestClassifier方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。