本文整理汇总了Python中sklearn.ensemble.ExtraTreesClassifier方法的典型用法代码示例。如果您正苦于以下问题:Python ensemble.ExtraTreesClassifier方法的具体用法?Python ensemble.ExtraTreesClassifier怎么用?Python ensemble.ExtraTreesClassifier使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble
的用法示例。
在下文中一共展示了ensemble.ExtraTreesClassifier方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: create_model_from_signatures
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def create_model_from_signatures(sig_csv_path, model_out, sig_datatype=np.int32):
"""
Takes a .csv file containing class signatures - produced by extract_features_to_csv - and uses it to train
and pickle a scikit-learn model.
Parameters
----------
sig_csv_path
The path to the signatures file
model_out
The location to save the pickled model to.
sig_datatype
The datatype to read the csv as. Defaults to int32.
Notes
-----
At present, the model is an ExtraTreesClassifier arrived at by tpot:
model = ens.ExtraTreesClassifier(bootstrap=False, criterion="gini", max_features=0.55, min_samples_leaf=2,
min_samples_split=16, n_estimators=100, n_jobs=4, class_weight='balanced')
"""
model = ens.ExtraTreesClassifier(bootstrap=False, criterion="gini", max_features=0.55, min_samples_leaf=2,
min_samples_split=16, n_estimators=100, n_jobs=4, class_weight='balanced')
features, labels = load_signatures(sig_csv_path, sig_datatype)
model.fit(features, labels)
joblib.dump(model, model_out)
示例2: __init__
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def __init__(self, params):
super(ExtraTreesAlgorithm, self).__init__(params)
logger.debug("ExtraTreesAlgorithm.__init__")
self.library_version = sklearn.__version__
self.trees_in_step = additional.get("trees_in_step", 100)
self.max_steps = additional.get("max_steps", 50)
self.early_stopping_rounds = additional.get("early_stopping_rounds", 50)
self.model = ExtraTreesClassifier(
n_estimators=self.trees_in_step,
criterion=params.get("criterion", "gini"),
max_features=params.get("max_features", 0.6),
min_samples_split=params.get("min_samples_split", 30),
warm_start=True,
n_jobs=-1,
random_state=params.get("seed", 1),
)
示例3: run_sklearn
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def run_sklearn():
n_trees = 100
n_folds = 3
# https://www.analyticsvidhya.com/blog/2015/06/tuning-random-forest-model/
alg_list = [
['lreg',LinearRegression()],
['rforest',RandomForestRegressor(n_estimators=1000, n_jobs=-1, max_depth=3)],
['extree',ExtraTreesClassifier(n_estimators = 1000,max_depth=2)],
['adaboost',AdaBoostRegressor(base_estimator=None, n_estimators=600, learning_rate=1.0)],
['knn', sklearn.neighbors.KNeighborsRegressor(n_neighbors=5)]
]
start_time = time.time()
for name,alg in alg_list:
train = jhkaggle.train_sklearn.TrainSKLearn("1",name,alg,False)
train.run()
train = None
elapsed_time = time.time() - start_time
print("Elapsed time: {}".format(jhkaggle.util.hms_string(elapsed_time)))
示例4: run_sklearn
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def run_sklearn():
n_trees = 100
n_folds = 3
# https://www.analyticsvidhya.com/blog/2015/06/tuning-random-forest-model/
alg_list = [
['rforest',RandomForestClassifier(n_estimators=1000, n_jobs=-1, verbose=1, max_depth=3)],
['extree',ExtraTreesClassifier(n_estimators = 1000,max_depth=3,n_jobs=-1)],
['adaboost',AdaBoostClassifier(base_estimator=None, n_estimators=600, learning_rate=1.0)],
['knn', sklearn.neighbors.KNeighborsClassifier(n_neighbors=5,n_jobs=-1)]
]
start_time = time.time()
for name,alg in alg_list:
train = jhkaggle.train_sklearn.TrainSKLearn("1",name,alg,False)
train.run()
train = None
示例5: test_time
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def test_time(pipeline_name, name, path):
if pipeline_name == "LR":
pipeline = make_pipeline(LogisticRegression())
if pipeline_name == "FGS":
pipeline = make_pipeline(FeatureGradientSelector(), LogisticRegression())
if pipeline_name == "Tree":
pipeline = make_pipeline(SelectFromModel(ExtraTreesClassifier(n_estimators=50)), LogisticRegression())
test_benchmark = Benchmark()
print("Dataset:\t", name)
print("Pipeline:\t", pipeline_name)
starttime = datetime.datetime.now()
test_benchmark.run_test(pipeline, name, path)
endtime = datetime.datetime.now()
print("Used time: ", (endtime - starttime).microseconds/1000)
print("")
示例6: test
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def test():
url_zip_train = 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/rcv1_train.binary.bz2'
urllib.request.urlretrieve(url_zip_train, filename='train.bz2')
f_svm = open('train.svm', 'wt')
with bz2.open('train.bz2', 'rb') as f_zip:
data = f_zip.read()
f_svm.write(data.decode('utf-8'))
f_svm.close()
X, y = load_svmlight_file('train.svm')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
pipeline = make_pipeline(FeatureGradientSelector(n_epochs=1, n_features=10), LogisticRegression())
# pipeline = make_pipeline(SelectFromModel(ExtraTreesClassifier(n_estimators=50)), LogisticRegression())
pipeline.fit(X_train, y_train)
print("Pipeline Score: ", pipeline.score(X_train, y_train))
示例7: __init__
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def __init__(self, options):
self.handle_options(options)
out_params = convert_params(
options.get('params', {}),
ints=['random_state', 'n_estimators', 'max_depth',
'min_samples_split', 'max_leaf_nodes'],
strs=['max_features', 'criterion'],
)
if 'max_depth' not in out_params:
out_params.setdefault('max_leaf_nodes', 2000)
if 'max_features' in out_params:
out_params['max_features'] = handle_max_features(out_params['max_features'])
self.estimator = _ExtraTreesClassifier(class_weight='balanced',
**out_params)
示例8: __init__
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def __init__(
self,data_block, predictors=[],cv_folds=10,
scoring_metric='accuracy',additional_display_metrics=[]):
base_classification.__init__(
self, alg=ExtraTreesClassifier(), data_block=data_block,
predictors=predictors,cv_folds=cv_folds,
scoring_metric=scoring_metric,
additional_display_metrics=additional_display_metrics)
self.model_output = pd.Series(self.default_parameters)
self.model_output['Feature_Importance'] = "-"
self.model_output['OOB_Score'] = "-"
#Set parameters to default values:
self.set_parameters(set_default=True)
示例9: define_clfs_params
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def define_clfs_params(self):
'''
Defines all relevant parameters and classes for classfier objects.
Edit these if you wish to change parameters.
'''
# These are the classifiers
self.clfs = {
'RF': RandomForestClassifier(n_estimators = 50, n_jobs = -1),
'ET': ExtraTreesClassifier(n_estimators = 10, n_jobs = -1, criterion = 'entropy'),
'AB': AdaBoostClassifier(DecisionTreeClassifier(max_depth = [1, 5, 10, 15]), algorithm = "SAMME", n_estimators = 200),
'LR': LogisticRegression(penalty = 'l1', C = 1e5),
'SVM': svm.SVC(kernel = 'linear', probability = True, random_state = 0),
'GB': GradientBoostingClassifier(learning_rate = 0.05, subsample = 0.5, max_depth = 6, n_estimators = 10),
'NB': GaussianNB(),
'DT': DecisionTreeClassifier(),
'SGD': SGDClassifier(loss = 'log', penalty = 'l2'),
'KNN': KNeighborsClassifier(n_neighbors = 3)
}
# These are the parameters which will be run through
self.params = {
'RF':{'n_estimators': [1,10,100,1000], 'max_depth': [10, 15,20,30,40,50,60,70,100], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
'LR': {'penalty': ['l1','l2'], 'C': [0.00001,0.0001,0.001,0.01,0.1,1,10], 'random_state': [1]},
'SGD': {'loss': ['log'], 'penalty': ['l2','l1','elasticnet'], 'random_state': [1]},
'ET': {'n_estimators': [1,10,100,1000], 'criterion' : ['gini', 'entropy'], 'max_depth': [1,3,5,10,15], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
'AB': {'algorithm': ['SAMME', 'SAMME.R'], 'n_estimators': [1,10,100,1000], 'random_state': [1]},
'GB': {'n_estimators': [1,10,100,1000], 'learning_rate' : [0.001,0.01,0.05,0.1,0.5],'subsample' : [0.1,0.5,1.0], 'max_depth': [1,3,5,10,20,50,100], 'random_state': [1]},
'NB': {},
'DT': {'criterion': ['gini', 'entropy'], 'max_depth': [1,2,15,20,30,40,50], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
'SVM' :{'C' :[0.00001,0.0001,0.001,0.01,0.1,1,10],'kernel':['linear'], 'random_state': [1]},
'KNN' :{'n_neighbors': [1,5,10,25,50,100],'weights': ['uniform','distance'],'algorithm': ['auto','ball_tree','kd_tree']}
}
示例10: test_change_algorithms
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def test_change_algorithms():
X, y = make_classification(n_samples=1000,
n_features=10,
n_classes=2,
n_clusters_per_class=1,
random_state=0)
X = pd.DataFrame(X)
y = pd.Series(y)
cls = MALSS('classification')
cls.fit(X, y, algorithm_selection_only=True)
algorithms = cls.get_algorithms()
assert algorithms[0][0] == 'Support Vector Machine (RBF Kernel)'
assert algorithms[1][0] == 'Random Forest'
assert algorithms[2][0] == 'Logistic Regression'
assert algorithms[3][0] == 'Decision Tree'
assert algorithms[4][0] == 'k-Nearest Neighbors'
cls.remove_algorithm(0)
cls.remove_algorithm()
algorithms = cls.get_algorithms()
assert algorithms[0][0] == 'Random Forest'
assert algorithms[1][0] == 'Logistic Regression'
assert algorithms[2][0] == 'Decision Tree'
from sklearn.ensemble import ExtraTreesClassifier as ET
cls.add_algorithm(ET(n_jobs=3),
[{'n_estimators': [10, 30, 50],
'max_depth': [3, 5, None],
'max_features': [0.3, 0.6, 'auto']}],
'Extremely Randomized Trees')
algorithms = cls.get_algorithms()
assert algorithms[0][0] == 'Random Forest'
assert algorithms[1][0] == 'Logistic Regression'
assert algorithms[2][0] == 'Decision Tree'
assert algorithms[3][0] == 'Extremely Randomized Trees'
示例11: test_min_impurity_split
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def test_min_impurity_split():
# Test if min_impurity_split of base estimators is set
# Regression test for #8006
X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
all_estimators = [RandomForestClassifier, RandomForestRegressor,
ExtraTreesClassifier, ExtraTreesRegressor]
for Estimator in all_estimators:
est = Estimator(min_impurity_split=0.1)
est = assert_warns_message(DeprecationWarning, "min_impurity_decrease",
est.fit, X, y)
for tree in est.estimators_:
assert_equal(tree.min_impurity_split, 0.1)
示例12: test_min_impurity_decrease
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def test_min_impurity_decrease():
X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
all_estimators = [RandomForestClassifier, RandomForestRegressor,
ExtraTreesClassifier, ExtraTreesRegressor]
for Estimator in all_estimators:
est = Estimator(min_impurity_decrease=0.1)
est.fit(X, y)
for tree in est.estimators_:
# Simply check if the parameter is passed on correctly. Tree tests
# will suffice for the actual working of this param
assert_equal(tree.min_impurity_decrease, 0.1)
示例13: fit
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def fit(self, X, y, sample_weight=None, eval_set=None, sample_weight_eval_set=None, **kwargs):
orig_cols = list(X.names)
if self.num_classes >= 2:
lb = LabelEncoder()
lb.fit(self.labels)
y = lb.transform(y)
model = ExtraTreesClassifier(**self.params)
else:
model = ExtraTreesRegressor(**self.params)
# Replace missing values with a value smaller than all observed values
self.min = dict()
for col in X.names:
XX = X[:, col]
self.min[col] = XX.min1()
if self.min[col] is None or np.isnan(self.min[col]):
self.min[col] = -1e10
else:
self.min[col] -= 1
XX.replace(None, self.min[col])
X[:, col] = XX
assert X[dt.isna(dt.f[col]), col].nrows == 0
X = X.to_numpy()
model.fit(X, y)
importances = np.array(model.feature_importances_)
self.set_model_properties(model=model,
features=orig_cols,
importances=importances.tolist(),
iterations=self.params['n_estimators'])
示例14: random_forest
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def random_forest(train_vecs,y_train,test_vecs,y_test):
clf = RandomForestClassifier(n_estimators=10, max_depth=10,min_samples_split=2,n_jobs=1,random_state=0)
clf.fit(train_vecs,y_train)
joblib.dump(clf,storedpaths+'model_randomforest.pkl')
test_scores=clf.score(test_vecs,y_test)
return test_scores
# 训练 ExtraTreesClassifier 分类算法
开发者ID:ruanyangry,项目名称:Sentiment_Analysis_cnn_lstm_cnnlstm_textcnn_bilstm,代码行数:10,代码来源:sentiment_analysis_ml.py
示例15: extract_tree
# 需要导入模块: from sklearn import ensemble [as 别名]
# 或者: from sklearn.ensemble import ExtraTreesClassifier [as 别名]
def extract_tree(train_vecs,y_train,test_vecs,y_test):
clf = ExtraTreesClassifier(n_estimators=10, max_depth=10,min_samples_split=2,n_jobs=1,random_state=0)
clf.fit(train_vecs,y_train)
joblib.dump(clf,storedpaths+'model_extracttree.pkl')
test_scores=clf.score(test_vecs,y_test)
return test_scores
# 训练 GBDT 分类算法
开发者ID:ruanyangry,项目名称:Sentiment_Analysis_cnn_lstm_cnnlstm_textcnn_bilstm,代码行数:10,代码来源:sentiment_analysis_ml.py