本文整理汇总了Python中tpot.TPOTClassifier类的典型用法代码示例。如果您正苦于以下问题:Python TPOTClassifier类的具体用法?Python TPOTClassifier怎么用?Python TPOTClassifier使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了TPOTClassifier类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_random_ind_2
def test_random_ind_2():
"""Assert that the TPOTClassifier can generate the same pipeline export with random seed of 45"""
tpot_obj = TPOTClassifier(random_state=45)
tpot_obj._pbar = tqdm(total=1, disable=True)
pipeline = tpot_obj._toolbox.individual()
expected_code = """import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from tpot.built_in_operators import ZeroCount
# NOTE: Make sure that the class is labeled 'class' in the data file
tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64)
features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1)
training_features, testing_features, training_classes, testing_classes = \\
train_test_split(features, tpot_data['class'], random_state=42)
exported_pipeline = make_pipeline(
ZeroCount(),
LogisticRegression(C=0.0001, dual=False, penalty="l2")
)
exported_pipeline.fit(training_features, training_classes)
results = exported_pipeline.predict(testing_features)
"""
assert expected_code == export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset)
示例2: test_fit2
def test_fit2():
"""Assert that the TPOT fit function provides an optimized pipeline when config_dict is \'TPOT light\'"""
tpot_obj = TPOTClassifier(random_state=42, population_size=1, offspring_size=2, generations=1, verbosity=0, config_dict='TPOT light')
tpot_obj.fit(training_features, training_classes)
assert isinstance(tpot_obj._optimized_pipeline, creator.Individual)
assert not (tpot_obj._start_datetime is None)
示例3: test_export_random_ind
def test_export_random_ind():
"""Assert that the TPOTClassifier can generate the same pipeline export with random seed of 39."""
tpot_obj = TPOTClassifier(random_state=39)
tpot_obj._pbar = tqdm(total=1, disable=True)
pipeline = tpot_obj._toolbox.individual()
expected_code = """import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeClassifier
# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1).values
training_features, testing_features, training_target, testing_target = \\
train_test_split(features, tpot_data['target'].values, random_state=42)
exported_pipeline = make_pipeline(
SelectPercentile(score_func=f_classif, percentile=65),
DecisionTreeClassifier(criterion="gini", max_depth=7, min_samples_leaf=4, min_samples_split=18)
)
exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
assert expected_code == export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset)
示例4: test_pipeline_score_save
def test_pipeline_score_save():
"""Assert that the TPOTClassifier can generate a scored pipeline export correctly."""
tpot_obj = TPOTClassifier()
tpot_obj._fit_init()
tpot_obj._pbar = tqdm(total=1, disable=True)
pipeline_string = (
'DecisionTreeClassifier(SelectPercentile(input_matrix, SelectPercentile__percentile=20),'
'DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=8,'
'DecisionTreeClassifier__min_samples_leaf=5, DecisionTreeClassifier__min_samples_split=5)'
)
pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
expected_code = """import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeClassifier
# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1).values
training_features, testing_features, training_target, testing_target = \\
train_test_split(features, tpot_data['target'].values, random_state=None)
# Average CV score on the training set was:0.929813743
exported_pipeline = make_pipeline(
SelectPercentile(score_func=f_classif, percentile=20),
DecisionTreeClassifier(criterion="gini", max_depth=8, min_samples_leaf=5, min_samples_split=5)
)
exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
assert_equal(expected_code, export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset, pipeline_score=0.929813743))
示例5: test_mut_operator_stats_update
def test_mut_operator_stats_update():
"""Asserts that self._random_mutation_operator updates stats as expected."""
tpot_obj = TPOTClassifier()
ind = creator.Individual.from_string(
'KNeighborsClassifier('
'BernoulliNB(input_matrix, BernoulliNB__alpha=10.0, BernoulliNB__fit_prior=False),'
'KNeighborsClassifier__n_neighbors=10, '
'KNeighborsClassifier__p=1, '
'KNeighborsClassifier__weights=uniform'
')',
tpot_obj._pset
)
initialize_stats_dict(ind)
ind.statistics["crossover_count"] = random.randint(0, 10)
ind.statistics["mutation_count"] = random.randint(0, 10)
# set as evaluated pipelines in tpot_obj.evaluated_individuals_
tpot_obj.evaluated_individuals_[str(ind)] = tpot_obj._combine_individual_stats(2, 0.99, ind.statistics)
for _ in range(10):
offspring, = tpot_obj._random_mutation_operator(ind)
assert offspring.statistics['crossover_count'] == ind.statistics['crossover_count']
assert offspring.statistics['mutation_count'] == ind.statistics['mutation_count'] + 1
assert offspring.statistics['predecessor'] == (str(ind),)
ind = offspring
示例6: test_fit
def test_fit():
"""Assert that the TPOT fit function provides an optimized pipeline"""
tpot_obj = TPOTClassifier(random_state=42, population_size=1, generations=1, verbosity=0)
tpot_obj.fit(training_features, training_classes)
assert isinstance(tpot_obj._optimized_pipeline, creator.Individual)
assert tpot_obj._gp_generation == 0
assert not (tpot_obj._start_datetime is None)
示例7: test_gen
def test_gen():
"""Assert that TPOT's gen_grow_safe function returns a pipeline of expected structure"""
tpot_obj = TPOTClassifier()
pipeline = tpot_obj._gen_grow_safe(tpot_obj._pset, 1, 3)
assert len(pipeline) > 1
assert pipeline[0].ret == Output_DF
示例8: test_export
def test_export():
"""Assert that TPOT's export function throws a ValueError when no optimized pipeline exists"""
tpot_obj = TPOTClassifier()
try:
tpot_obj.export("test_export.py")
assert False # Should be unreachable
except ValueError:
pass
示例9: test_invaild_dataset_warning
def test_invaild_dataset_warning():
"""Assert that the TPOT fit function raises a ValueError when dataset is not in right format"""
tpot_obj = TPOTClassifier(random_state=42, population_size=1, offspring_size=2, generations=1, verbosity=0)
bad_training_classes = training_classes.reshape((1, len(training_classes)))# common mistake in classes
try:
tpot_obj.fit(training_features ,bad_training_classes) # typo for balanced_accuracy
assert False
except ValueError:
pass
示例10: test_predict
def test_predict():
"""Assert that the TPOT predict function raises a ValueError when no optimized pipeline exists"""
tpot_obj = TPOTClassifier()
try:
tpot_obj.predict(testing_features)
assert False # Should be unreachable
except ValueError:
pass
示例11: test_score
def test_score():
"""Assert that the TPOT score function raises a RuntimeError when no optimized pipeline exists"""
tpot_obj = TPOTClassifier()
try:
tpot_obj.score(testing_features, testing_classes)
assert False # Should be unreachable
except RuntimeError:
pass
示例12: test_predict_2
def test_predict_2():
"""Assert that the TPOT predict function returns a numpy matrix of shape (num_testing_rows,)"""
tpot_obj = TPOTClassifier()
tpot_obj._optimized_pipeline = creator.Individual.\
from_string('DecisionTreeClassifier(input_matrix)', tpot_obj._pset)
tpot_obj._fitted_pipeline = tpot_obj._toolbox.compile(expr=tpot_obj._optimized_pipeline)
tpot_obj._fitted_pipeline.fit(training_features, training_classes)
result = tpot_obj.predict(testing_features)
assert result.shape == (testing_features.shape[0],)
示例13: test_dict_initialization
def test_dict_initialization():
"""Asserts that gp_deap.initialize_stats_dict initializes individual statistics correctly"""
tpot_obj = TPOTClassifier()
tpot_obj._fit_init()
tb = tpot_obj._toolbox
test_ind = tb.individual()
initialize_stats_dict(test_ind)
assert test_ind.statistics['generation'] == 0
assert test_ind.statistics['crossover_count'] == 0
assert test_ind.statistics['mutation_count'] == 0
assert test_ind.statistics['predecessor'] == ('ROOT',)
示例14: test_imputer_in_export
def test_imputer_in_export():
"""Assert that TPOT exports a pipeline with an imputation step if imputation was used in fit()."""
tpot_obj = TPOTClassifier(
random_state=42,
population_size=1,
offspring_size=2,
generations=1,
verbosity=0,
config_dict='TPOT light'
)
features_with_nan = np.copy(training_features)
features_with_nan[0][0] = float('nan')
tpot_obj.fit(features_with_nan, training_target)
# use fixed pipeline since the random.seed() performs differently in python 2.* and 3.*
pipeline_string = (
'KNeighborsClassifier('
'input_matrix, '
'KNeighborsClassifier__n_neighbors=10, '
'KNeighborsClassifier__p=1, '
'KNeighborsClassifier__weights=uniform'
')'
)
tpot_obj._optimized_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
export_code = export_pipeline(tpot_obj._optimized_pipeline, tpot_obj.operators, tpot_obj._pset, tpot_obj._imputed)
expected_code = """import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import Imputer
# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1).values
training_features, testing_features, training_target, testing_target = \\
train_test_split(features, tpot_data['target'].values, random_state=None)
imputer = Imputer(strategy="median")
imputer.fit(training_features)
training_features = imputer.transform(training_features)
testing_features = imputer.transform(testing_features)
exported_pipeline = KNeighborsClassifier(n_neighbors=10, p=1, weights="uniform")
exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
assert_equal(export_code, expected_code)
示例15: test_predict_2
def test_predict_2():
"""Assert that the TPOT predict function returns a numpy matrix of shape (num_testing_rows,)"""
tpot_obj = TPOTClassifier()
pipeline_string= ('DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini'
', DecisionTreeClassifier__max_depth=8,DecisionTreeClassifier__min_samples_leaf=5,'
'DecisionTreeClassifier__min_samples_split=5)')
tpot_obj._optimized_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
tpot_obj._fitted_pipeline = tpot_obj._toolbox.compile(expr=tpot_obj._optimized_pipeline)
tpot_obj._fitted_pipeline.fit(training_features, training_classes)
result = tpot_obj.predict(testing_features)
assert result.shape == (testing_features.shape[0],)