本文整理汇总了Python中sklearn.ensemble.RandomForestClassifier方法的典型用法代码示例。如果您正苦于以下问题:Python ensemble.RandomForestClassifier方法的具体用法?Python ensemble.RandomForestClassifier怎么用?Python ensemble.RandomForestClassifier使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble
的用法示例。
在下文中一共展示了ensemble.RandomForestClassifier方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: mmb_evaluate_model
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def mmb_evaluate_model(self):
"""
Returns scores from cross validation evaluation on the malicious / benign classifier
"""
predictive_features = self.features['predictive_features']
self.clf_X = self.modeldata[predictive_features].values
self.clf_y = np.array(self.modeldata['label'])
X_train, X_test, y_train, y_test = train_test_split(self.clf_X, self.clf_y, test_size=0.2, random_state=0)
lb = LabelBinarizer()
y_train = np.array([number[0] for number in lb.fit_transform(y_train)])
eval_cls = RandomForestClassifier(n_estimators=100, max_features=.2)
eval_cls.fit(X_train, y_train)
recall = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='recall')
precision = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='precision')
accuracy = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='accuracy')
f1_score = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='f1_macro')
return {'accuracy': accuracy, 'f1': f1_score, 'precision': precision, 'recall': recall}
示例2: __init__
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def __init__(self, model_type='classifier', feature_type='fingerprints',
n_estimators=100, n_ensemble=5):
super(RandomForestQSAR, self).__init__()
self.n_estimators = n_estimators
self.n_ensemble = n_ensemble
self.model = []
self.model_type = model_type
if self.model_type == 'classifier':
for i in range(n_ensemble):
self.model.append(RFC(n_estimators=n_estimators))
elif self.model_type == 'regressor':
for i in range(n_ensemble):
self.model.append(RFR(n_estimators=n_estimators))
else:
raise ValueError('invalid value for argument')
self.feature_type = feature_type
if self.feature_type == 'descriptors':
self.calc = Calculator(descriptors, ignore_3D=True)
self.desc_mean = [0]*self.n_ensemble
示例3: __init__
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def __init__(self, outputs, inputs, k=None, hypers=None, params=None,
distargs=None, rng=None):
self.rng = gu.gen_rng() if rng is None else rng
self.outputs = outputs
self.inputs = inputs
self.rng = gu.gen_rng() if rng is None else rng
assert len(self.outputs) == 1
assert len(self.inputs) >= 1
assert self.outputs[0] not in self.inputs
assert len(distargs['inputs']['stattypes']) == len(self.inputs)
self.stattypes = distargs['inputs']['stattypes']
# Number of output categories and input dimension.
# XXX WHATTA HACK. BayesDB passes in top-level kwargs, not in distargs.
self.k = k if k is not None else int(distargs['k'])
self.p = len(distargs['inputs']['stattypes'])
# Sufficient statistics.
self.N = 0
self.data = Data(x=OrderedDict(), Y=OrderedDict())
self.counts = [0] * self.k
# Outlier and random forest parameters.
if params is None: params = {}
self.alpha = params.get('alpha', .1)
self.regressor = params.get('forest', None)
if self.regressor is None:
self.regressor = RandomForestClassifier(random_state=self.rng)
示例4: test_sklearn_classification_overfit
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def test_sklearn_classification_overfit(self):
"""Test that sklearn models can overfit simple classification datasets."""
n_samples = 10
n_features = 3
n_tasks = 1
# Generate dummy dataset
np.random.seed(123)
ids = np.arange(n_samples)
X = np.random.rand(n_samples, n_features)
y = np.random.randint(2, size=(n_samples, n_tasks))
w = np.ones((n_samples, n_tasks))
dataset = dc.data.NumpyDataset(X, y, w, ids)
classification_metric = dc.metrics.Metric(dc.metrics.roc_auc_score)
sklearn_model = RandomForestClassifier()
model = dc.models.SklearnModel(sklearn_model)
# Fit trained model
model.fit(dataset)
model.save()
# Eval model on train
scores = model.evaluate(dataset, [classification_metric])
assert scores[classification_metric.name] > .9
示例5: trainFunctionTypeClassifier
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def trainFunctionTypeClassifier(self, scs):
"""Train the type classifier, according to all known code segments.
Args:
scs (list): list of all known (sark) code segments
Note:
Training must happen *after* the calibration phase
"""
functions = []
for sc in scs:
functions += list(filter(lambda func: not self._analyzer.fptr_identifier.isPointedFunction(func.start_ea), sc.functions))
clf = RandomForestClassifier(n_estimators=100)
eas = list(map(lambda x: x.start_ea, functions))
data_set = list(map(self.extractFunctionTypeSample, eas))
data_results = list(map(self._analyzer.codeType, eas))
# classify
clf.fit(data_set, data_results)
# store the results
self._type_classifier = clf
示例6: buildModel
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def buildModel(dataset, method, parameters):
"""
Build final model for predicting real testing data
"""
features = dataset.columns[0:-1]
if method == 'RNN':
clf = performRNNlass(dataset[features], dataset['UpDown'])
return clf
elif method == 'RF':
clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1)
elif method == 'KNN':
clf = neighbors.KNeighborsClassifier()
elif method == 'SVM':
c = parameters[0]
g = parameters[1]
clf = SVC(C=c, gamma=g)
elif method == 'ADA':
clf = AdaBoostClassifier()
return clf.fit(dataset[features], dataset['UpDown'])
示例7: Train
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def Train(data, treecount, tezh, yanzhgdata):
model = RFC(n_estimators=treecount, max_features=tezh, class_weight='balanced')
model.fit(data[:, :-1], data[:, -1])
# 给出训练数据的预测值
train_out = model.predict(data[:, :-1])
# 计算MSE
train_mse = fmse(data[:, -1], train_out)[0]
# 给出验证数据的预测值
add_yan = model.predict(yanzhgdata[:, :-1])
# 计算f1度量
add_mse = fmse(yanzhgdata[:, -1], add_yan)[0]
print(train_mse, add_mse)
return train_mse, add_mse
# 最终确定组合的函数
示例8: make_pipeline
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def make_pipeline(encoding_method):
# static transformers from the other columns
transformers = [('one-hot-clean', encoder_dict['one-hot'], clean_columns)]
# adding the encoded column
transformers += [(encoding_method + '-dirty', encoder_dict[encoding_method],
[dirty_column])]
pipeline = Pipeline([
# Use ColumnTransformer to combine the features
('union', ColumnTransformer(
transformers=transformers,
remainder='drop')),
('scaler', StandardScaler(with_mean=False)),
('classifier', RandomForestClassifier(random_state=5))
])
return pipeline
###############################################################################
# Evaluation of different encoding methods
# -----------------------------------------
# We then loop over encoding methods, scoring the different pipeline predictions
# using a cross validation score:
示例9: build_model
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def build_model(self, X_train, y_train):
if self.paras.load == True:
model = self.load_training_model(self.paras.window_len)
if model != None:
return model
print('build Random Forrest model...')
# range of number of trees : 5*(1 -> 10) = 5,10,...,50 trees
t_min = self.paras.tree_min[index]
t_max = self.paras.tree_max[index]
# range of max of features : 1 -> 10 features
f_min = self.paras.feature_min[index]
f_max = self.paras.feature_max[index]
# range of window : 1 -> 70 days
w_min = self.paras.window_min
w_max = self.paras.window_max
w_opt, n_opt, m_opt = self.best_window(X_train, y_train, w_min,w_max,t_min,t_max,f_min,f_max)
model = RandomForestClassifier(n_estimators=n_opt,max_features=m_opt, n_jobs=8, verbose=self.paras.verbose)
return model
示例10: test_run
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def test_run(self):
self.input_data['item2embedding'] = dict(i0=[1, 2], i1=[3, 4])
self.input_data['similarity_data'] = pd.DataFrame(
dict(item1=['i0', 'i0', 'i1'], item2=['i0', 'i1', 'i1'], similarity=[1, 0, 1]))
task = TrainPairwiseSimilarityModel(
item2embedding_task=_DummyTask(),
similarity_data_task=_DummyTask(),
model_name='RandomForestClassifier',
item0_column_name='item1',
item1_column_name='item2',
similarity_column_name='similarity')
task.load = MagicMock(side_effect=self._load)
task.dump = MagicMock(side_effect=self._dump)
task.run()
self.assertIsInstance(self.dump_data, RandomForestClassifier)
示例11: create_random_forest_tfidf
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def create_random_forest_tfidf():
vectorizer = TfidfVectorizer(lowercase=False)
rf = RandomForestClassifier(n_estimators=500, random_state=777)
return Pipeline([("vectorizer", vectorizer), ("rf", rf)])
示例12: create_random_forest_vectorizer
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def create_random_forest_vectorizer():
vectorizer = CountVectorizer(lowercase=False, min_df=0.0, binary=True)
rf = RandomForestClassifier(n_estimators=500, random_state=777)
return Pipeline([("vectorizer", vectorizer), ("rf", rf)])
示例13: create_sklearn_random_forest_classifier
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def create_sklearn_random_forest_classifier(X, y):
rfc = ensemble.RandomForestClassifier(max_depth=4, random_state=777)
model = rfc.fit(X, y)
return model
示例14: build_models
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def build_models(self):
"""
After get_language_features is called, this function builds the models based on
the classifier matrix and labels.
:return:
"""
self.cls = RandomForestClassifier(n_estimators=100, max_features=.2)
# build classifier
self.cls.fit(self.clf_X, self.clf_y)
return self.cls
示例15: define_clfs_params
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import RandomForestClassifier [as 别名]
def define_clfs_params(self):
'''
Defines all relevant parameters and classes for classfier objects.
Edit these if you wish to change parameters.
'''
# These are the classifiers
self.clfs = {
'RF': RandomForestClassifier(n_estimators = 50, n_jobs = -1),
'ET': ExtraTreesClassifier(n_estimators = 10, n_jobs = -1, criterion = 'entropy'),
'AB': AdaBoostClassifier(DecisionTreeClassifier(max_depth = [1, 5, 10, 15]), algorithm = "SAMME", n_estimators = 200),
'LR': LogisticRegression(penalty = 'l1', C = 1e5),
'SVM': svm.SVC(kernel = 'linear', probability = True, random_state = 0),
'GB': GradientBoostingClassifier(learning_rate = 0.05, subsample = 0.5, max_depth = 6, n_estimators = 10),
'NB': GaussianNB(),
'DT': DecisionTreeClassifier(),
'SGD': SGDClassifier(loss = 'log', penalty = 'l2'),
'KNN': KNeighborsClassifier(n_neighbors = 3)
}
# These are the parameters which will be run through
self.params = {
'RF':{'n_estimators': [1,10,100,1000], 'max_depth': [10, 15,20,30,40,50,60,70,100], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
'LR': {'penalty': ['l1','l2'], 'C': [0.00001,0.0001,0.001,0.01,0.1,1,10], 'random_state': [1]},
'SGD': {'loss': ['log'], 'penalty': ['l2','l1','elasticnet'], 'random_state': [1]},
'ET': {'n_estimators': [1,10,100,1000], 'criterion' : ['gini', 'entropy'], 'max_depth': [1,3,5,10,15], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
'AB': {'algorithm': ['SAMME', 'SAMME.R'], 'n_estimators': [1,10,100,1000], 'random_state': [1]},
'GB': {'n_estimators': [1,10,100,1000], 'learning_rate' : [0.001,0.01,0.05,0.1,0.5],'subsample' : [0.1,0.5,1.0], 'max_depth': [1,3,5,10,20,50,100], 'random_state': [1]},
'NB': {},
'DT': {'criterion': ['gini', 'entropy'], 'max_depth': [1,2,15,20,30,40,50], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
'SVM' :{'C' :[0.00001,0.0001,0.001,0.01,0.1,1,10],'kernel':['linear'], 'random_state': [1]},
'KNN' :{'n_neighbors': [1,5,10,25,50,100],'weights': ['uniform','distance'],'algorithm': ['auto','ball_tree','kd_tree']}
}