本文整理汇总了Python中autosklearn.util.StopWatch.wall_elapsed方法的典型用法代码示例。如果您正苦于以下问题:Python StopWatch.wall_elapsed方法的具体用法?Python StopWatch.wall_elapsed怎么用?Python StopWatch.wall_elapsed使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类autosklearn.util.StopWatch
的用法示例。
在下文中一共展示了StopWatch.wall_elapsed方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from autosklearn.util import StopWatch [as 别名]
# 或者: from autosklearn.util.StopWatch import wall_elapsed [as 别名]
def main(self):
watch = StopWatch()
watch.start_task('ensemble_builder')
used_time = 0
time_iter = 0
index_run = 0
num_iteration = 0
current_num_models = 0
last_hash = None
current_hash = None
backend = Backend(self.output_dir, self.autosklearn_tmp_dir)
dir_ensemble = os.path.join(self.autosklearn_tmp_dir,
'.auto-sklearn',
'predictions_ensemble')
dir_valid = os.path.join(self.autosklearn_tmp_dir,
'.auto-sklearn',
'predictions_valid')
dir_test = os.path.join(self.autosklearn_tmp_dir,
'.auto-sklearn',
'predictions_test')
paths_ = [dir_ensemble, dir_valid, dir_test]
dir_ensemble_list_mtimes = []
self.logger.debug('Starting main loop with %f seconds and %d iterations '
'left.' % (self.limit - used_time, num_iteration))
while used_time < self.limit or (self.max_iterations > 0 and
self.max_iterations >= num_iteration):
num_iteration += 1
self.logger.debug('Time left: %f', self.limit - used_time)
self.logger.debug('Time last ensemble building: %f', time_iter)
# Reload the ensemble targets every iteration, important, because cv may
# update the ensemble targets in the cause of running auto-sklearn
# TODO update cv in order to not need this any more!
targets_ensemble = backend.load_targets_ensemble()
# Load the predictions from the models
exists = [os.path.isdir(dir_) for dir_ in paths_]
if not exists[0]: # all(exists):
self.logger.debug('Prediction directory %s does not exist!' %
dir_ensemble)
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
if self.shared_mode is False:
dir_ensemble_list = sorted(glob.glob(os.path.join(
dir_ensemble, 'predictions_ensemble_%s_*.npy' % self.seed)))
if exists[1]:
dir_valid_list = sorted(glob.glob(os.path.join(
dir_valid, 'predictions_valid_%s_*.npy' % self.seed)))
else:
dir_valid_list = []
if exists[2]:
dir_test_list = sorted(glob.glob(os.path.join(
dir_test, 'predictions_test_%s_*.npy' % self.seed)))
else:
dir_test_list = []
else:
dir_ensemble_list = sorted(os.listdir(dir_ensemble))
dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else []
dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else []
# Check the modification times because predictions can be updated
# over time!
old_dir_ensemble_list_mtimes = dir_ensemble_list_mtimes
dir_ensemble_list_mtimes = []
for dir_ensemble_file in dir_ensemble_list:
if dir_ensemble_file.endswith("/"):
dir_ensemble_file = dir_ensemble_file[:-1]
basename = os.path.basename(dir_ensemble_file)
dir_ensemble_file = os.path.join(dir_ensemble, basename)
mtime = os.path.getmtime(dir_ensemble_file)
dir_ensemble_list_mtimes.append(mtime)
if len(dir_ensemble_list) == 0:
self.logger.debug('Directories are empty')
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
if len(dir_ensemble_list) <= current_num_models and \
old_dir_ensemble_list_mtimes == dir_ensemble_list_mtimes:
self.logger.debug('Nothing has changed since the last time')
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
watch.start_task('index_run' + str(index_run))
watch.start_task('ensemble_iter_' + str(num_iteration))
# List of num_runs (which are in the filename) which will be included
# later
include_num_runs = []
backup_num_runs = []
#.........这里部分代码省略.........
示例2: main
# 需要导入模块: from autosklearn.util import StopWatch [as 别名]
# 或者: from autosklearn.util.StopWatch import wall_elapsed [as 别名]
def main(autosklearn_tmp_dir,
basename,
task_type,
metric,
limit,
output_dir,
ensemble_size=None,
ensemble_nbest=None,
seed=1,
shared_mode=False,
max_iterations=-1,
precision="32"):
watch = StopWatch()
watch.start_task('ensemble_builder')
used_time = 0
time_iter = 0
index_run = 0
num_iteration = 0
current_num_models = 0
backend = Backend(output_dir, autosklearn_tmp_dir)
dir_ensemble = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
'predictions_ensemble')
dir_valid = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
'predictions_valid')
dir_test = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
'predictions_test')
paths_ = [dir_ensemble, dir_valid, dir_test]
targets_ensemble = backend.load_targets_ensemble()
dir_ensemble_list_mtimes = []
while used_time < limit or (max_iterations > 0 and max_iterations >= num_iteration):
num_iteration += 1
logger.debug('Time left: %f', limit - used_time)
logger.debug('Time last iteration: %f', time_iter)
# Load the predictions from the models
exists = [os.path.isdir(dir_) for dir_ in paths_]
if not exists[0]: # all(exists):
logger.debug('Prediction directory %s does not exist!' %
dir_ensemble)
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
if shared_mode is False:
dir_ensemble_list = sorted(glob.glob(os.path.join(
dir_ensemble, 'predictions_ensemble_%s_*.npy' % seed)))
if exists[1]:
dir_valid_list = sorted(glob.glob(os.path.join(
dir_valid, 'predictions_valid_%s_*.npy' % seed)))
else:
dir_valid_list = []
if exists[2]:
dir_test_list = sorted(glob.glob(os.path.join(
dir_test, 'predictions_test_%s_*.npy' % seed)))
else:
dir_test_list = []
else:
dir_ensemble_list = sorted(os.listdir(dir_ensemble))
dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else []
dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else []
# Check the modification times because predictions can be updated
# over time!
old_dir_ensemble_list_mtimes = dir_ensemble_list_mtimes
dir_ensemble_list_mtimes = []
for dir_ensemble_file in dir_ensemble_list:
dir_ensemble_file = os.path.join(dir_ensemble, dir_ensemble_file)
mtime = os.path.getmtime(dir_ensemble_file)
dir_ensemble_list_mtimes.append(mtime)
if len(dir_ensemble_list) == 0:
logger.debug('Directories are empty')
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
if len(dir_ensemble_list) <= current_num_models and \
old_dir_ensemble_list_mtimes == dir_ensemble_list_mtimes:
logger.debug('Nothing has changed since the last time')
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
watch.start_task('ensemble_iter_' + str(index_run))
# List of num_runs (which are in the filename) which will be included
# later
include_num_runs = []
backup_num_runs = []
model_and_automl_re = re.compile(r'_([0-9]*)_([0-9]*)\.npy$')
if ensemble_nbest is not None:
# Keeps track of the single scores of each model in our ensemble
scores_nbest = []
#.........这里部分代码省略.........
示例3: AutoML
# 需要导入模块: from autosklearn.util import StopWatch [as 别名]
# 或者: from autosklearn.util.StopWatch import wall_elapsed [as 别名]
#.........这里部分代码省略.........
# Check arguments prior to doing anything!
if self._resampling_strategy not in ['holdout', 'holdout-iterative-fit',
'cv', 'nested-cv', 'partial-cv']:
raise ValueError('Illegal resampling strategy: %s' %
self._resampling_strategy)
if self._resampling_strategy == 'partial-cv' and \
self._ensemble_size != 0:
raise ValueError("Resampling strategy partial-cv cannot be used "
"together with ensembles.")
self._backend._make_internals_directory()
if self._keep_models:
try:
os.mkdir(self._backend.get_model_dir())
except OSError:
self._logger.warning("model directory already exists")
if not self._shared_mode:
raise
self._metric = datamanager.info['metric']
self._task = datamanager.info['task']
self._label_num = datamanager.info['label_num']
set_auto_seed(self._seed)
# == Pickle the data manager, here, because no more global
# OneHotEncoding
data_manager_path = self._backend.save_datamanager(datamanager)
self._save_ensemble_data(
datamanager.data['X_train'],
datamanager.data['Y_train'])
time_for_load_data = self._stopwatch.wall_elapsed(self._dataset_name)
if self._debug_mode:
self._print_load_time(
self._dataset_name,
self._time_for_task,
time_for_load_data,
self._logger)
# == Perform dummy predictions
self._do_dummy_prediction(datamanager)
# = Create a searchspace
# Do this before One Hot Encoding to make sure that it creates a
# search space for a dense classifier even if one hot encoding would
# make it sparse (tradeoff; if one hot encoding would make it sparse,
# densifier and truncatedSVD would probably lead to a MemoryError,
# like this we can't use some of the preprocessing methods in case
# the data became sparse)
self.configuration_space, configspace_path = _create_search_space(
self._tmp_dir,
datamanager.info,
self._backend,
self._stopwatch,
self._logger,
self._include_estimators,
self._include_preprocessors)
self.configuration_space_created_hook(datamanager)
# == Calculate metafeatures
meta_features = _calculate_metafeatures(
data_feat_type=datamanager.feat_type,
data_info_task=datamanager.info['task'],
示例4: main
# 需要导入模块: from autosklearn.util import StopWatch [as 别名]
# 或者: from autosklearn.util.StopWatch import wall_elapsed [as 别名]
def main(logger, predictions_dir, basename, task_type, metric, limit, output_dir,
ensemble_size=None):
watch = StopWatch()
watch.start_task('ensemble_builder')
used_time = 0
time_iter = 0
index_run = 0
current_num_models = 0
while used_time < limit:
logger.debug('Time left: %f' % (limit - used_time))
logger.debug('Time last iteration: %f' % time_iter)
# Load the true labels of the validation data
true_labels = np.load(os.path.join(predictions_dir,
'true_labels_ensemble.npy'))
# Load the predictions from the models
all_predictions_train = []
dir_ensemble = os.path.join(predictions_dir, 'predictions_ensemble/')
dir_valid = os.path.join(predictions_dir, 'predictions_valid/')
dir_test = os.path.join(predictions_dir, 'predictions_test/')
if not os.path.isdir(dir_ensemble) or not os.path.isdir(dir_valid) or \
not os.path.isdir(dir_test):
logger.debug('Prediction directory does not exist')
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
dir_ensemble_list = sorted(os.listdir(dir_ensemble))
dir_valid_list = sorted(os.listdir(dir_valid))
dir_test_list = sorted(os.listdir(dir_test))
if check_data(logger, len(dir_ensemble_list), len(dir_valid_list),
len(dir_test_list), current_num_models):
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
watch.start_task('ensemble_iter_' + str(index_run))
# Binary mask where True indicates that the corresponding will be
# excluded from the ensemble
exclude_mask = []
if ensemble_size is not None:
# Keeps track of the single scores of each model in our ensemble
scores_nbest = []
# The indices of the model that are currently in our ensemble
indices_nbest = []
model_idx = 0
for f in dir_ensemble_list:
predictions = np.load(os.path.join(dir_ensemble, f))
score = calculate_score(true_labels, predictions,
task_type, metric,
predictions.shape[1])
if ensemble_size is not None:
if score <= 0.001:
exclude_mask.append(True)
logger.error('Model only predicts at random: ' + f +
' has score: ' + str(score))
# If we have less model in our ensemble than ensemble_size add
# the current model if it is better than random
elif len(scores_nbest) < ensemble_size:
scores_nbest.append(score)
indices_nbest.append(model_idx)
exclude_mask.append(False)
else:
# Take the worst performing model in our ensemble so far
idx = np.argmin(np.array([scores_nbest]))
# If the current model is better than the worst model in
# our ensemble replace it by the current model
if scores_nbest[idx] < score:
logger.debug(
'Worst model in our ensemble: %d with score %f will be replaced by model %d with score %f'
% (idx, scores_nbest[idx], model_idx, score))
scores_nbest[idx] = score
# Exclude the old model
exclude_mask[int(indices_nbest[idx])] = True
indices_nbest[idx] = model_idx
exclude_mask.append(False)
# Otherwise exclude the current model from the ensemble
else:
exclude_mask.append(True)
else:
# Load all predictions that are better than random
if score <= 0.001:
exclude_mask.append(True)
logger.error('Model only predicts at random: ' + f +
' has score: ' + str(score))
else:
exclude_mask.append(False)
all_predictions_train.append(predictions)
model_idx += 1
#.........这里部分代码省略.........
示例5: main
# 需要导入模块: from autosklearn.util import StopWatch [as 别名]
# 或者: from autosklearn.util.StopWatch import wall_elapsed [as 别名]
def main(autosklearn_tmp_dir,
dataset_name,
task_type,
metric,
limit,
output_dir,
ensemble_size=None,
ensemble_nbest=None,
seed=1,
shared_mode=False,
max_iterations=-1,
precision="32"):
watch = StopWatch()
watch.start_task('ensemble_builder')
used_time = 0
time_iter = 0
index_run = 0
num_iteration = 0
current_num_models = 0
backend = Backend(output_dir, autosklearn_tmp_dir)
dir_ensemble = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
'predictions_ensemble')
dir_valid = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
'predictions_valid')
dir_test = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
'predictions_test')
paths_ = [dir_ensemble, dir_valid, dir_test]
dir_ensemble_list_mtimes = []
while used_time < limit or (max_iterations > 0 and max_iterations >= num_iteration):
num_iteration += 1
logger.debug('Time left: %f', limit - used_time)
logger.debug('Time last iteration: %f', time_iter)
# Reload the ensemble targets every iteration, important, because cv may
# update the ensemble targets in the cause of running auto-sklearn
# TODO update cv in order to not need this any more!
targets_ensemble = backend.load_targets_ensemble()
# Load the predictions from the models
exists = [os.path.isdir(dir_) for dir_ in paths_]
if not exists[0]: # all(exists):
logger.debug('Prediction directory %s does not exist!' %
dir_ensemble)
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
if shared_mode is False:
dir_ensemble_list = sorted(glob.glob(os.path.join(
dir_ensemble, 'predictions_ensemble_%s_*.npy' % seed)))
if exists[1]:
dir_valid_list = sorted(glob.glob(os.path.join(
dir_valid, 'predictions_valid_%s_*.npy' % seed)))
else:
dir_valid_list = []
if exists[2]:
dir_test_list = sorted(glob.glob(os.path.join(
dir_test, 'predictions_test_%s_*.npy' % seed)))
else:
dir_test_list = []
else:
dir_ensemble_list = sorted(os.listdir(dir_ensemble))
dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else []
dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else []
# Check the modification times because predictions can be updated
# over time!
old_dir_ensemble_list_mtimes = dir_ensemble_list_mtimes
dir_ensemble_list_mtimes = []
for dir_ensemble_file in dir_ensemble_list:
if dir_ensemble_file.endswith("/"):
dir_ensemble_file = dir_ensemble_file[:-1]
basename = os.path.basename(dir_ensemble_file)
dir_ensemble_file = os.path.join(dir_ensemble, basename)
mtime = os.path.getmtime(dir_ensemble_file)
dir_ensemble_list_mtimes.append(mtime)
if len(dir_ensemble_list) == 0:
logger.debug('Directories are empty')
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
if len(dir_ensemble_list) <= current_num_models and \
old_dir_ensemble_list_mtimes == dir_ensemble_list_mtimes:
logger.debug('Nothing has changed since the last time')
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
watch.start_task('ensemble_iter_' + str(index_run))
# List of num_runs (which are in the filename) which will be included
# later
#.........这里部分代码省略.........
示例6: AutoML
# 需要导入模块: from autosklearn.util import StopWatch [as 别名]
# 或者: from autosklearn.util.StopWatch import wall_elapsed [as 别名]
#.........这里部分代码省略.........
self._debug(part)
return self._fit(loaded_data_manager)
def _save_data_manager(self, data_d, tmp_dir, basename, watcher):
task_name = 'StoreDatamanager'
watcher.start_task(task_name)
filepath = os.path.join(tmp_dir, basename + '_Manager.pkl')
if _check_path_for_save(filepath, 'Data manager ', self._debug):
pickle.dump(data_d, open(filepath, 'w'), protocol=-1)
watcher.stop_task(task_name)
return filepath
def _fit(self, manager):
# TODO: check that data and task definition fit together!
self._metric = manager.info['metric']
self._task = manager.info['task']
self._target_num = manager.info['target_num']
set_auto_seed(self._seed)
# load data
_save_ensemble_data(
manager.data['X_train'],
manager.data['Y_train'],
self._tmp_dir,
self._stopwatch)
time_for_load_data = self._stopwatch.wall_elapsed(self._basename)
if self._debug_mode:
_print_load_time(
self._basename,
self._time_for_task,
time_for_load_data,
self._info)
# == Calculate metafeatures
meta_features = _calculate_meta_features(
data_feat_type=manager.feat_type,
data_info_task=manager.info['task'], basename=self._basename,
metalearning_cnt=self._initial_configurations_via_metalearning,
x_train=manager.data['X_train'], y_train=manager.data['Y_train'],
watcher=self._stopwatch, log_function=self._debug)
self._stopwatch.start_task('OneHot')
manager.perform_hot_encoding()
self._ohe = manager.encoder
self._stopwatch.stop_task('OneHot')
# == Pickle the data manager
data_manager_path = self._save_data_manager(
manager,
self._tmp_dir,
self._basename,
watcher=self._stopwatch)
# = Create a searchspace
self._configuration_space, configspace_path = _create_search_space(
self._tmp_dir,
manager.info,
示例7: AutoML
# 需要导入模块: from autosklearn.util import StopWatch [as 别名]
# 或者: from autosklearn.util.StopWatch import wall_elapsed [as 别名]
#.........这里部分代码省略.........
'cv', 'nested-cv', 'partial-cv']:
raise ValueError('Illegal resampling strategy: %s' %
self._resampling_strategy)
if self._resampling_strategy == 'partial-cv' and \
self._ensemble_size != 0:
raise ValueError("Resampling strategy partial-cv cannot be used "
"together with ensembles.")
acquisition_functions = ['EI', 'EIPS']
if self.acquisition_function not in acquisition_functions:
raise ValueError('Illegal acquisition %s: Must be one of %s.' %
(self.acquisition_function, acquisition_functions))
self._backend._make_internals_directory()
if self._keep_models:
try:
os.mkdir(self._backend.get_model_dir())
except OSError:
self._logger.warning("model directory already exists")
if not self._shared_mode:
raise
self._metric = datamanager.info['metric']
self._task = datamanager.info['task']
self._label_num = datamanager.info['label_num']
# == Pickle the data manager to speed up loading
data_manager_path = self._backend.save_datamanager(datamanager)
self._save_ensemble_data(
datamanager.data['X_train'],
datamanager.data['Y_train'])
time_for_load_data = self._stopwatch.wall_elapsed(self._dataset_name)
if self._debug_mode:
self._print_load_time(
self._dataset_name,
self._time_for_task,
time_for_load_data,
self._logger)
# == Perform dummy predictions
num_run = 1
#if self._resampling_strategy in ['holdout', 'holdout-iterative-fit']:
num_run = self._do_dummy_prediction(datamanager, num_run)
# = Create a searchspace
# Do this before One Hot Encoding to make sure that it creates a
# search space for a dense classifier even if one hot encoding would
# make it sparse (tradeoff; if one hot encoding would make it sparse,
# densifier and truncatedSVD would probably lead to a MemoryError,
# like this we can't use some of the preprocessing methods in case
# the data became sparse)
self.configuration_space, configspace_path = self._create_search_space(
self._tmp_dir,
self._backend,
datamanager,
self._include_estimators,
self._include_preprocessors)
# == RUN ensemble builder
# Do this before calculating the meta-features to make sure that the
# dummy predictions are actually included in the ensemble even if
# calculating the meta-features takes very long
ensemble_task_name = 'runEnsemble'
示例8: main
# 需要导入模块: from autosklearn.util import StopWatch [as 别名]
# 或者: from autosklearn.util.StopWatch import wall_elapsed [as 别名]
def main(logger,
predictions_dir,
basename,
task_type,
metric,
limit,
output_dir,
ensemble_size=None,
seed=1,
indices_output_dir='.'):
watch = StopWatch()
watch.start_task('ensemble_builder')
task_type = STRING_TO_TASK_TYPES[task_type]
used_time = 0
time_iter = 0
index_run = 0
current_num_models = 0
dir_ensemble = join(predictions_dir, 'predictions_ensemble_%s/' % seed)
dir_valid = join(predictions_dir, 'predictions_valid_%s/' % seed)
dir_test = join(predictions_dir, 'predictions_test_%s/' % seed)
paths_ = [dir_ensemble, dir_valid, dir_test]
tru_labels_path = join(predictions_dir, 'true_labels_ensemble.npy')
while used_time < limit:
logger.debug('Time left: %f', limit - used_time)
logger.debug('Time last iteration: %f', time_iter)
# Load the true labels of the validation data
true_labels = np.load(tru_labels_path)
# Load the predictions from the models
exists = [os.path.isdir(dir_) for dir_ in paths_]
if not exists[0]: # all(exists):
logger.debug('Prediction directory %s does not exist!' %
dir_ensemble)
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
dir_ensemble_list = sorted(os.listdir(dir_ensemble))
dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else []
dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else []
if check_data(logger, len(dir_ensemble_list), current_num_models):
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
watch.start_task('ensemble_iter_' + str(index_run))
# List of num_runs (which are in the filename) which will be included
# later
include_num_runs = []
re_num_run = re.compile(r'_([0-9]*)\.npy$')
if ensemble_size is not None:
# Keeps track of the single scores of each model in our ensemble
scores_nbest = []
# The indices of the model that are currently in our ensemble
indices_nbest = []
# The names of the models
model_names = []
# The num run of the models
num_runs = []
model_names_to_scores = dict()
model_idx = 0
for model_name in dir_ensemble_list:
predictions = np.load(os.path.join(dir_ensemble, model_name))
score = calculate_score(true_labels, predictions,
task_type, metric,
predictions.shape[1])
model_names_to_scores[model_name] = score
num_run = int(re_num_run.search(model_name).group(1))
if ensemble_size is not None:
if score <= 0.001:
# include_num_runs.append(True)
logger.error('Model only predicts at random: ' +
model_name + ' has score: ' + str(score))
# If we have less models in our ensemble than ensemble_size add
# the current model if it is better than random
elif len(scores_nbest) < ensemble_size:
scores_nbest.append(score)
indices_nbest.append(model_idx)
include_num_runs.append(num_run)
model_names.append(model_name)
num_runs.append(num_run)
else:
# Take the worst performing model in our ensemble so far
idx = np.argmin(np.array([scores_nbest]))
# If the current model is better than the worst model in
# our ensemble replace it by the current model
if scores_nbest[idx] < score:
logger.debug('Worst model in our ensemble: %s with '
#.........这里部分代码省略.........
示例9: AutoML
# 需要导入模块: from autosklearn.util import StopWatch [as 别名]
# 或者: from autosklearn.util.StopWatch import wall_elapsed [as 别名]
#.........这里部分代码省略.........
if self._resampling_strategy not in ['holdout', 'holdout-iterative-fit',
'cv', 'nested-cv', 'partial-cv']:
raise ValueError('Illegal resampling strategy: %s' %
self._resampling_strategy)
if self._resampling_strategy == 'partial-cv' and \
self._ensemble_size != 0:
raise ValueError("Resampling strategy partial-cv cannot be used "
"together with ensembles.")
acquisition_functions = ['EI', 'EIPS']
if self.acquisition_function not in acquisition_functions:
raise ValueError('Illegal acquisition %s: Must be one of %s.' %
(self.acquisition_function, acquisition_functions))
self._backend._make_internals_directory()
if self._keep_models:
try:
os.mkdir(self._backend.get_model_dir())
except OSError:
if not self._shared_mode:
raise
self._metric = datamanager.info['metric']
self._task = datamanager.info['task']
self._label_num = datamanager.info['label_num']
# == Pickle the data manager to speed up loading
data_manager_path = self._backend.save_datamanager(datamanager)
self._save_ensemble_data(
datamanager.data['X_train'],
datamanager.data['Y_train'])
time_for_load_data = self._stopwatch.wall_elapsed(self._dataset_name)
if self._debug_mode:
self._print_load_time(
self._dataset_name,
self._time_for_task,
time_for_load_data,
self._logger)
# == Perform dummy predictions
num_run = 1
#if self._resampling_strategy in ['holdout', 'holdout-iterative-fit']:
num_run = self._do_dummy_prediction(datamanager, num_run)
# = Create a searchspace
# Do this before One Hot Encoding to make sure that it creates a
# search space for a dense classifier even if one hot encoding would
# make it sparse (tradeoff; if one hot encoding would make it sparse,
# densifier and truncatedSVD would probably lead to a MemoryError,
# like this we can't use some of the preprocessing methods in case
# the data became sparse)
self.configuration_space, configspace_path = self._create_search_space(
self._backend.temporary_directory,
self._backend,
datamanager,
self._include_estimators,
self._include_preprocessors)
# == RUN ensemble builder
# Do this before calculating the meta-features to make sure that the
# dummy predictions are actually included in the ensemble even if
# calculating the meta-features takes very long
ensemble_task_name = 'runEnsemble'
示例10: AutoML
# 需要导入模块: from autosklearn.util import StopWatch [as 别名]
# 或者: from autosklearn.util.StopWatch import wall_elapsed [as 别名]
#.........这里部分代码省略.........
if self._resampling_strategy not in [
'holdout', 'holdout-iterative-fit',
'cv', 'partial-cv',
'partial-cv-iterative-fit'] \
and not issubclass(self._resampling_strategy, BaseCrossValidator)\
and not issubclass(self._resampling_strategy, _RepeatedSplits)\
and not issubclass(self._resampling_strategy, BaseShuffleSplit):
raise ValueError('Illegal resampling strategy: %s' %
self._resampling_strategy)
if self._resampling_strategy in ['partial-cv', 'partial-cv-iterative-fit'] \
and self._ensemble_size != 0:
raise ValueError("Resampling strategy %s cannot be used "
"together with ensembles." % self._resampling_strategy)
if self._resampling_strategy in ['partial-cv', 'cv',
'partial-cv-iterative-fit'] and \
not 'folds' in self._resampling_strategy_arguments:
self._resampling_strategy_arguments['folds'] = 5
self._backend._make_internals_directory()
if self._keep_models:
try:
os.makedirs(self._backend.get_model_dir())
except (OSError, FileExistsError) as e:
if not self._shared_mode:
raise
self._metric = metric
self._task = datamanager.info['task']
self._label_num = datamanager.info['label_num']
# == Pickle the data manager to speed up loading
data_manager_path = self._backend.save_datamanager(datamanager)
time_for_load_data = self._stopwatch.wall_elapsed(self._dataset_name)
if self._debug_mode:
self._print_load_time(
self._dataset_name,
self._time_for_task,
time_for_load_data,
self._logger)
# == Perform dummy predictions
num_run = 1
#if self._resampling_strategy in ['holdout', 'holdout-iterative-fit']:
num_run = self._do_dummy_prediction(datamanager, num_run)
# = Create a searchspace
# Do this before One Hot Encoding to make sure that it creates a
# search space for a dense classifier even if one hot encoding would
# make it sparse (tradeoff; if one hot encoding would make it sparse,
# densifier and truncatedSVD would probably lead to a MemoryError,
# like this we can't use some of the preprocessing methods in case
# the data became sparse)
self.configuration_space, configspace_path = self._create_search_space(
self._backend.temporary_directory,
self._backend,
datamanager,
include_estimators=self._include_estimators,
exclude_estimators=self._exclude_estimators,
include_preprocessors=self._include_preprocessors,
exclude_preprocessors=self._exclude_preprocessors)
if only_return_configuration_space:
return self.configuration_space
# == RUN ensemble builder