本文整理汇总了Python中autosklearn.util.StopWatch.stop_task方法的典型用法代码示例。如果您正苦于以下问题:Python StopWatch.stop_task方法的具体用法?Python StopWatch.stop_task怎么用?Python StopWatch.stop_task使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类autosklearn.util.StopWatch
的用法示例。
在下文中一共展示了StopWatch.stop_task方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_stopwatch_overhead
# 需要导入模块: from autosklearn.util import StopWatch [as 别名]
# 或者: from autosklearn.util.StopWatch import stop_task [as 别名]
def test_stopwatch_overhead(self):
# Wall Overhead
start = time.time()
cpu_start = time.clock()
watch = StopWatch()
for i in range(1, 1000):
watch.start_task('task_%d' % i)
watch.stop_task('task_%d' % i)
cpu_stop = time.clock()
stop = time.time()
dur = stop - start
cpu_dur = cpu_stop - cpu_start
cpu_overhead = cpu_dur - watch.cpu_sum()
wall_overhead = dur - watch.wall_sum()
self.assertLess(cpu_overhead, 1)
self.assertLess(wall_overhead, 1)
self.assertLess(watch.cpu_sum(), 2 * watch.wall_sum())
示例2: test_stopwatch_overhead
# 需要导入模块: from autosklearn.util import StopWatch [as 别名]
# 或者: from autosklearn.util.StopWatch import stop_task [as 别名]
def test_stopwatch_overhead(self):
# CPU overhead
start = time.clock()
watch = StopWatch()
for i in range(1, 100000):
watch.start_task("task_%d" % i)
watch.stop_task("task_%d" % i)
stop = time.clock()
dur = stop - start
cpu_overhead = dur - watch.cpu_sum()
self.assertLess(cpu_overhead, 1.5)
# Wall Overhead
start = time.time()
watch = StopWatch()
for i in range(1, 100000):
watch.start_task("task_%d" % i)
watch.stop_task("task_%d" % i)
stop = time.time()
dur = stop - start
wall_overhead = dur - watch.wall_sum()
self.assertLess(wall_overhead, 2)
self.assertLess(cpu_overhead, wall_overhead)
示例3: main
# 需要导入模块: from autosklearn.util import StopWatch [as 别名]
# 或者: from autosklearn.util.StopWatch import stop_task [as 别名]
#.........这里部分代码省略.........
self.logger.info('Ensemble output did not change.')
time.sleep(2)
continue
else:
last_hash = current_hash
else:
last_hash = current_hash
# Save the ensemble for later use in the main auto-sklearn module!
backend.save_ensemble(ensemble, index_run, self.seed)
# Save predictions for valid and test data set
if len(dir_valid_list) == len(dir_ensemble_list):
all_predictions_valid = np.array(all_predictions_valid)
ensemble_predictions_valid = ensemble.predict(all_predictions_valid)
if self.task_type == BINARY_CLASSIFICATION:
ensemble_predictions_valid = ensemble_predictions_valid[:, 1]
if self.low_precision:
if self.task_type in [BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION, MULTILABEL_CLASSIFICATION]:
ensemble_predictions_valid[ensemble_predictions_valid < 1e-4] = 0.
if self.metric in [BAC_METRIC, F1_METRIC]:
bin_array = np.zeros(ensemble_predictions_valid.shape, dtype=np.int32)
if (self.task_type != MULTICLASS_CLASSIFICATION) or (
ensemble_predictions_valid.shape[1] == 1):
bin_array[ensemble_predictions_valid >= 0.5] = 1
else:
sample_num = ensemble_predictions_valid.shape[0]
for i in range(sample_num):
j = np.argmax(ensemble_predictions_valid[i, :])
bin_array[i, j] = 1
ensemble_predictions_valid = bin_array
if self.task_type in CLASSIFICATION_TASKS:
if ensemble_predictions_valid.size < (20000 * 20):
precision = 3
else:
precision = 2
else:
if ensemble_predictions_valid.size > 1000000:
precision = 4
else:
# File size maximally 2.1MB
precision = 6
backend.save_predictions_as_txt(ensemble_predictions_valid,
'valid', index_run, prefix=self.dataset_name,
precision=precision)
else:
self.logger.info('Could not find as many validation set predictions (%d)'
'as ensemble predictions (%d)!.',
len(dir_valid_list), len(dir_ensemble_list))
del all_predictions_valid
if len(dir_test_list) == len(dir_ensemble_list):
all_predictions_test = np.array(all_predictions_test)
ensemble_predictions_test = ensemble.predict(all_predictions_test)
if self.task_type == BINARY_CLASSIFICATION:
ensemble_predictions_test = ensemble_predictions_test[:, 1]
if self.low_precision:
if self.task_type in [BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION, MULTILABEL_CLASSIFICATION]:
ensemble_predictions_test[ensemble_predictions_test < 1e-4] = 0.
if self.metric in [BAC_METRIC, F1_METRIC]:
bin_array = np.zeros(ensemble_predictions_test.shape,
dtype=np.int32)
if (self.task_type != MULTICLASS_CLASSIFICATION) or (
ensemble_predictions_test.shape[1] == 1):
bin_array[ensemble_predictions_test >= 0.5] = 1
else:
sample_num = ensemble_predictions_test.shape[0]
for i in range(sample_num):
j = np.argmax(ensemble_predictions_test[i, :])
bin_array[i, j] = 1
ensemble_predictions_test = bin_array
if self.task_type in CLASSIFICATION_TASKS:
if ensemble_predictions_test.size < (20000 * 20):
precision = 3
else:
precision = 2
else:
if ensemble_predictions_test.size > 1000000:
precision = 4
else:
precision = 6
backend.save_predictions_as_txt(ensemble_predictions_test,
'test', index_run, prefix=self.dataset_name,
precision=precision)
else:
self.logger.info('Could not find as many test set predictions (%d) as '
'ensemble predictions (%d)!',
len(dir_test_list), len(dir_ensemble_list))
del all_predictions_test
current_num_models = len(dir_ensemble_list)
watch.stop_task('index_run' + str(index_run))
time_iter = watch.get_wall_dur('index_run' + str(index_run))
used_time = watch.wall_elapsed('ensemble_builder')
index_run += 1
return
示例4: AutoML
# 需要导入模块: from autosklearn.util import StopWatch [as 别名]
# 或者: from autosklearn.util.StopWatch import stop_task [as 别名]
#.........这里部分代码省略.........
encode_labels=False)
return self._fit(loaded_data_manager)
def fit_automl_dataset(self, dataset):
self._stopwatch = StopWatch()
self._backend.save_start_time(self._seed)
name = os.path.basename(dataset)
self._stopwatch.start_task(name)
self._start_task(self._stopwatch, name)
self._dataset_name = name
logger_name = 'AutoML(%d):%s' % (self._seed, name)
setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name)))
self._logger = get_logger(logger_name)
self._logger.debug('======== Reading and converting data ==========')
# Encoding the labels will be done after the metafeature calculation!
loaded_data_manager = CompetitionDataManager(
dataset, encode_labels=False,
max_memory_in_mb=float(self._ml_memory_limit) / 3)
loaded_data_manager_str = str(loaded_data_manager).split('\n')
for part in loaded_data_manager_str:
self._logger.debug(part)
return self._fit(loaded_data_manager)
@staticmethod
def _start_task(watcher, task_name):
watcher.start_task(task_name)
@staticmethod
def _stop_task(watcher, task_name):
watcher.stop_task(task_name)
@staticmethod
def _print_load_time(basename, time_left_for_this_task,
time_for_load_data, logger):
time_left_after_reading = max(
0, time_left_for_this_task - time_for_load_data)
logger.info('Remaining time after reading %s %5.2f sec' %
(basename, time_left_after_reading))
return time_for_load_data
def _do_dummy_prediction(self, datamanager):
autosklearn.cli.base_interface.main(datamanager,
self._resampling_strategy,
None,
None,
mode_args=self._resampling_strategy_arguments)
def _fit(self, datamanager):
# Reset learnt stuff
self.models_ = None
self.ensemble_indices_ = None
# Check arguments prior to doing anything!
if self._resampling_strategy not in ['holdout', 'holdout-iterative-fit',
'cv', 'nested-cv', 'partial-cv']:
raise ValueError('Illegal resampling strategy: %s' %
self._resampling_strategy)
if self._resampling_strategy == 'partial-cv' and \
self._ensemble_size != 0:
raise ValueError("Resampling strategy partial-cv cannot be used "
示例5: main
# 需要导入模块: from autosklearn.util import StopWatch [as 别名]
# 或者: from autosklearn.util.StopWatch import stop_task [as 别名]
#.........这里部分代码省略.........
precision)
# if len(all_predictions_train) == len(all_predictions_test) == len(
# all_predictions_valid) == 0:
if len(include_num_runs) == 0:
logger.error('All models do just random guessing')
time.sleep(2)
continue
else:
try:
indices, trajectory = ensemble_selection(
np.array(all_predictions_train), targets_ensemble,
ensemble_size, task_type, metric)
logger.info('Trajectory and indices!')
logger.info(trajectory)
logger.info(indices)
except ValueError as e:
logger.error('Caught ValueError: ' + str(e))
used_time = watch.wall_elapsed('ensemble_builder')
time.sleep(2)
continue
except Exception as e:
logger.error('Caught error! %s', e.message)
used_time = watch.wall_elapsed('ensemble_builder')
time.sleep(2)
continue
# Output the score
logger.info('Training performance: %f' % trajectory[-1])
# Print the ensemble members:
ensemble_members_run_numbers = dict()
ensemble_members = Counter(indices).most_common()
ensemble_members_string = 'Ensemble members:\n'
logger.info(ensemble_members)
for ensemble_member in ensemble_members:
weight = float(ensemble_member[1]) / len(indices)
ensemble_members_string += \
(' %s; weight: %10f; performance: %10f\n' %
(indices_to_model_names[ensemble_member[0]],
weight,
model_names_to_scores[
indices_to_model_names[ensemble_member[0]]]))
ensemble_members_run_numbers[
indices_to_run_num[
ensemble_member[0]]] = weight
logger.info(ensemble_members_string)
# Save the ensemble indices for later use!
backend.save_ensemble_indices_weights(ensemble_members_run_numbers,
index_run, seed)
all_predictions_valid = get_predictions(dir_valid,
dir_valid_list,
include_num_runs,
model_and_automl_re,
precision)
# Save predictions for valid and test data set
if len(dir_valid_list) == len(dir_ensemble_list):
all_predictions_valid = np.array(all_predictions_valid)
ensemble_predictions_valid = np.mean(
all_predictions_valid[indices.astype(int)], axis=0)
backend.save_predictions_as_txt(ensemble_predictions_valid,
'valid', index_run, prefix=basename)
else:
logger.info('Could not find as many validation set predictions (%d)'
'as ensemble predictions (%d)!.',
len(dir_valid_list), len(dir_ensemble_list))
del all_predictions_valid
all_predictions_test = get_predictions(dir_test,
dir_test_list,
include_num_runs,
model_and_automl_re,
precision)
if len(dir_test_list) == len(dir_ensemble_list):
all_predictions_test = np.array(all_predictions_test)
ensemble_predictions_test = np.mean(
all_predictions_test[indices.astype(int)], axis=0)
backend.save_predictions_as_txt(ensemble_predictions_test,
'test', index_run, prefix=basename)
else:
logger.info('Could not find as many test set predictions (%d) as '
'ensemble predictions (%d)!',
len(dir_test_list), len(dir_ensemble_list))
del all_predictions_test
current_num_models = len(dir_ensemble_list)
watch.stop_task('ensemble_iter_' + str(index_run))
time_iter = watch.get_wall_dur('ensemble_iter_' + str(index_run))
used_time = watch.wall_elapsed('ensemble_builder')
index_run += 1
return
示例6: main
# 需要导入模块: from autosklearn.util import StopWatch [as 别名]
# 或者: from autosklearn.util.StopWatch import stop_task [as 别名]
#.........这里部分代码省略.........
include_num_runs.append((automl_seed, num_run))
model_idx += 1
# If there is no model better than random guessing, we have to use
# all models which do random guessing
if len(include_num_runs) == 0:
include_num_runs = backup_num_runs
indices_to_model_names = dict()
indices_to_run_num = dict()
for i, model_name in enumerate(dir_ensemble_list):
match = model_and_automl_re.search(model_name)
automl_seed = int(match.group(1))
num_run = int(match.group(2))
if (automl_seed, num_run) in include_num_runs:
num_indices = len(indices_to_model_names)
indices_to_model_names[num_indices] = model_name
indices_to_run_num[num_indices] = (automl_seed, num_run)
try:
all_predictions_train, all_predictions_valid, all_predictions_test =\
get_all_predictions(dir_ensemble, dir_ensemble_list,
dir_valid, dir_valid_list,
dir_test, dir_test_list,
include_num_runs,
model_and_automl_re,
precision)
except IOError:
logger.error('Could not load the predictions.')
continue
if len(include_num_runs) == 0:
logger.error('All models do just random guessing')
time.sleep(2)
continue
else:
ensemble = EnsembleSelection(ensemble_size=ensemble_size,
task_type=task_type,
metric=metric)
try:
ensemble.fit(all_predictions_train, targets_ensemble,
include_num_runs)
logger.info(ensemble)
except ValueError as e:
logger.error('Caught ValueError: ' + str(e))
used_time = watch.wall_elapsed('ensemble_builder')
time.sleep(2)
continue
except IndexError as e:
logger.error('Caught IndexError: ' + str(e))
used_time = watch.wall_elapsed('ensemble_builder')
time.sleep(2)
continue
except Exception as e:
logger.error('Caught error! %s', e.message)
used_time = watch.wall_elapsed('ensemble_builder')
time.sleep(2)
continue
# Output the score
logger.info('Training performance: %f' % ensemble.train_score_)
# Save the ensemble for later use in the main auto-sklearn module!
backend.save_ensemble(ensemble, index_run, seed)
# Save predictions for valid and test data set
if len(dir_valid_list) == len(dir_ensemble_list):
all_predictions_valid = np.array(all_predictions_valid)
ensemble_predictions_valid = ensemble.predict(all_predictions_valid)
backend.save_predictions_as_txt(ensemble_predictions_valid,
'valid', index_run, prefix=dataset_name)
else:
logger.info('Could not find as many validation set predictions (%d)'
'as ensemble predictions (%d)!.',
len(dir_valid_list), len(dir_ensemble_list))
del all_predictions_valid
if len(dir_test_list) == len(dir_ensemble_list):
all_predictions_test = np.array(all_predictions_test)
ensemble_predictions_test = ensemble.predict(all_predictions_test)
backend.save_predictions_as_txt(ensemble_predictions_test,
'test', index_run, prefix=dataset_name)
else:
logger.info('Could not find as many test set predictions (%d) as '
'ensemble predictions (%d)!',
len(dir_test_list), len(dir_ensemble_list))
del all_predictions_test
current_num_models = len(dir_ensemble_list)
watch.stop_task('ensemble_iter_' + str(index_run))
time_iter = watch.get_wall_dur('ensemble_iter_' + str(index_run))
used_time = watch.wall_elapsed('ensemble_builder')
index_run += 1
return
示例7: main
# 需要导入模块: from autosklearn.util import StopWatch [as 别名]
# 或者: from autosklearn.util.StopWatch import stop_task [as 别名]
#.........这里部分代码省略.........
# the current model if it is better than random
elif len(scores_nbest) < ensemble_size:
scores_nbest.append(score)
indices_nbest.append(model_idx)
exclude_mask.append(False)
else:
# Take the worst performing model in our ensemble so far
idx = np.argmin(np.array([scores_nbest]))
# If the current model is better than the worst model in
# our ensemble replace it by the current model
if scores_nbest[idx] < score:
logger.debug(
'Worst model in our ensemble: %d with score %f will be replaced by model %d with score %f'
% (idx, scores_nbest[idx], model_idx, score))
scores_nbest[idx] = score
# Exclude the old model
exclude_mask[int(indices_nbest[idx])] = True
indices_nbest[idx] = model_idx
exclude_mask.append(False)
# Otherwise exclude the current model from the ensemble
else:
exclude_mask.append(True)
else:
# Load all predictions that are better than random
if score <= 0.001:
exclude_mask.append(True)
logger.error('Model only predicts at random: ' + f +
' has score: ' + str(score))
else:
exclude_mask.append(False)
all_predictions_train.append(predictions)
model_idx += 1
print(exclude_mask)
all_predictions_valid = get_predictions(dir_valid,
dir_valid_list,
exclude_mask)
all_predictions_test = get_predictions(dir_test,
dir_test_list,
exclude_mask)
if len(all_predictions_train) == len(all_predictions_test) == len(
all_predictions_valid) == 0:
logger.error('All models do just random guessing')
time.sleep(2)
continue
if len(all_predictions_train) == 1:
logger.debug('Only one model so far we just copy its predictions')
Y_valid = all_predictions_valid[0]
Y_test = all_predictions_test[0]
else:
try:
# Compute the weights for the ensemble
# Use equally initialized weights
n_models = len(all_predictions_train)
init_weights = np.ones([n_models]) / n_models
weights = weighted_ensemble(logger.debug, np.array(all_predictions_train),
true_labels, task_type, metric,
init_weights)
except ValueError:
logger.error('Caught ValueError!')
used_time = watch.wall_elapsed('ensemble_builder')
continue
except Exception:
logger.error('Caught error!')
used_time = watch.wall_elapsed('ensemble_builder')
continue
# Compute the ensemble predictions for the valid data
Y_valid = ensemble_prediction(np.array(all_predictions_valid),
weights)
# Compute the ensemble predictions for the test data
Y_test = ensemble_prediction(np.array(all_predictions_test),
weights)
# Save predictions for valid and test data set
filename_test = os.path.join(
output_dir,
basename + '_valid_' + str(index_run).zfill(3) + '.predict')
save_predictions(os.path.join(predictions_dir,
filename_test), Y_valid)
filename_test = os.path.join(
output_dir,
basename + '_test_' + str(index_run).zfill(3) + '.predict')
save_predictions(os.path.join(predictions_dir,
filename_test), Y_test)
current_num_models = len(dir_ensemble_list)
watch.stop_task('ensemble_iter_' + str(index_run))
time_iter = watch.get_wall_dur('ensemble_iter_' + str(index_run))
used_time = watch.wall_elapsed('ensemble_builder')
index_run += 1
return
示例8: AutoML
# 需要导入模块: from autosklearn.util import StopWatch [as 别名]
# 或者: from autosklearn.util.StopWatch import stop_task [as 别名]
#.........这里部分代码省略.........
self._logger.info(text)
def fit_automl_dataset(self, basename, input_dir):
# == Creating a data object with data and information about it
self._basename = basename
self._stopwatch = StopWatch()
self._stopwatch.start_task(self._basename)
self._logger = get_automl_logger(self._log_dir, self._basename,
self._seed)
self._debug('======== Reading and converting data ==========')
# Encoding the labels will be done after the metafeature calculation!
loaded_data_manager = CompetitionDataManager(self._basename, input_dir,
verbose=True,
encode_labels=False)
loaded_data_manager_str = str(loaded_data_manager).split('\n')
for part in loaded_data_manager_str:
self._debug(part)
return self._fit(loaded_data_manager)
def _save_data_manager(self, data_d, tmp_dir, basename, watcher):
task_name = 'StoreDatamanager'
watcher.start_task(task_name)
filepath = os.path.join(tmp_dir, basename + '_Manager.pkl')
if _check_path_for_save(filepath, 'Data manager ', self._debug):
pickle.dump(data_d, open(filepath, 'w'), protocol=-1)
watcher.stop_task(task_name)
return filepath
def _fit(self, manager):
# TODO: check that data and task definition fit together!
self._metric = manager.info['metric']
self._task = manager.info['task']
self._target_num = manager.info['target_num']
set_auto_seed(self._seed)
# load data
_save_ensemble_data(
manager.data['X_train'],
manager.data['Y_train'],
self._tmp_dir,
self._stopwatch)
time_for_load_data = self._stopwatch.wall_elapsed(self._basename)
if self._debug_mode:
_print_load_time(
self._basename,
self._time_for_task,
time_for_load_data,
self._info)
# == Calculate metafeatures
meta_features = _calculate_meta_features(
data_feat_type=manager.feat_type,
data_info_task=manager.info['task'], basename=self._basename,
metalearning_cnt=self._initial_configurations_via_metalearning,
示例9: AutoML
# 需要导入模块: from autosklearn.util import StopWatch [as 别名]
# 或者: from autosklearn.util.StopWatch import stop_task [as 别名]
#.........这里部分代码省略.........
def fit_automl_dataset(self, dataset):
self._stopwatch = StopWatch()
self._backend.save_start_time(self._seed)
name = os.path.basename(dataset)
self._stopwatch.start_task(name)
self._start_task(self._stopwatch, name)
self._dataset_name = name
self._logger = self._get_logger(name)
self._logger.debug('======== Reading and converting data ==========')
# Encoding the labels will be done after the metafeature calculation!
self._data_memory_limit = float(self._ml_memory_limit) / 3
loaded_data_manager = CompetitionDataManager(
dataset, encode_labels=False,
max_memory_in_mb=self._data_memory_limit)
loaded_data_manager_str = str(loaded_data_manager).split('\n')
for part in loaded_data_manager_str:
self._logger.debug(part)
return self._fit(loaded_data_manager)
def _get_logger(self, name):
logger_name = 'AutoML(%d):%s' % (self._seed, name)
setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name)))
return get_logger(logger_name)
@staticmethod
def _start_task(watcher, task_name):
watcher.start_task(task_name)
@staticmethod
def _stop_task(watcher, task_name):
watcher.stop_task(task_name)
@staticmethod
def _print_load_time(basename, time_left_for_this_task,
time_for_load_data, logger):
time_left_after_reading = max(
0, time_left_for_this_task - time_for_load_data)
logger.info('Remaining time after reading %s %5.2f sec' %
(basename, time_left_after_reading))
return time_for_load_data
def _do_dummy_prediction(self, datamanager, num_run):
self._logger.info("Starting to create dummy predictions.")
time_limit = int(self._time_for_task / 6.)
memory_limit = int(self._ml_memory_limit)
_info = eval_with_limits(datamanager, self._tmp_dir, 1,
self._seed, num_run,
self._resampling_strategy,
self._resampling_strategy_arguments,
memory_limit, time_limit)
if _info[4] == StatusType.SUCCESS:
self._logger.info("Finished creating dummy prediction 1/2.")
else:
self._logger.error('Error creating dummy prediction 1/2:%s ',
_info[3])
num_run += 1
_info = eval_with_limits(datamanager, self._tmp_dir, 2,
示例10: main
# 需要导入模块: from autosklearn.util import StopWatch [as 别名]
# 或者: from autosklearn.util.StopWatch import stop_task [as 别名]
#.........这里部分代码省略.........
include_num_runs,
re_num_run)
if len(all_predictions_train) == len(all_predictions_test) == len(
all_predictions_valid) == 0:
logger.error('All models do just random guessing')
time.sleep(2)
continue
elif len(all_predictions_train) == 1:
logger.debug('Only one model so far we just copy its predictions')
ensemble_members_run_numbers = {0: 1.0}
# Output the score
logger.info('Training performance: %f' %
np.max(model_names_to_scores.values()))
else:
try:
indices, trajectory = ensemble_selection(
np.array(all_predictions_train), true_labels,
ensemble_size, task_type, metric)
logger.info('Trajectory and indices!')
logger.info(trajectory)
logger.info(indices)
except ValueError as e:
logger.error('Caught ValueError: ' + str(e))
used_time = watch.wall_elapsed('ensemble_builder')
continue
except Exception as e:
logger.error('Caught error! %s', e.message)
used_time = watch.wall_elapsed('ensemble_builder')
continue
# Output the score
logger.info('Training performance: %f' % trajectory[-1])
# Print the ensemble members:
ensemble_members_run_numbers = dict()
ensemble_members = Counter(indices).most_common()
ensemble_members_string = 'Ensemble members:\n'
logger.info(ensemble_members)
for ensemble_member in ensemble_members:
weight = float(ensemble_member[1]) / len(indices)
ensemble_members_string += \
(' %s; weight: %10f; performance: %10f\n' %
(indices_to_model_names[ensemble_member[0]],
weight,
model_names_to_scores[
indices_to_model_names[ensemble_member[0]]]))
ensemble_members_run_numbers[
indices_to_run_num[
ensemble_member[0]]] = weight
logger.info(ensemble_members_string)
# Save the ensemble indices for later use!
filename_indices = os.path.join(indices_output_dir,
str(index_run).zfill(5) + '.indices')
logger.info(ensemble_members_run_numbers)
with open(filename_indices, 'w') as fh:
pickle.dump(ensemble_members_run_numbers, fh)
# Save predictions for valid and test data set
if len(dir_valid_list) == len(dir_ensemble_list):
ensemble_predictions_valid = np.mean(
all_predictions_valid[indices.astype(int)],
axis=0)
filename_test = os.path.join(
output_dir,
basename + '_valid_' + str(index_run).zfill(3) + '.predict')
save_predictions(
os.path.join(predictions_dir, filename_test),
ensemble_predictions_valid)
else:
logger.info('Could not find as many validation set predictions '
'as ensemble predictions!.')
if len(dir_test_list) == len(dir_ensemble_list):
ensemble_predictions_test = np.mean(
all_predictions_test[indices.astype(int)],
axis=0)
filename_test = os.path.join(
output_dir,
basename + '_test_' + str(index_run).zfill(3) + '.predict')
save_predictions(
os.path.join(predictions_dir, filename_test),
ensemble_predictions_test)
else:
logger.info('Could not find as many test set predictions as '
'ensemble predictions!')
current_num_models = len(dir_ensemble_list)
watch.stop_task('ensemble_iter_' + str(index_run))
time_iter = watch.get_wall_dur('ensemble_iter_' + str(index_run))
used_time = watch.wall_elapsed('ensemble_builder')
index_run += 1
return
示例11: AutoML
# 需要导入模块: from autosklearn.util import StopWatch [as 别名]
# 或者: from autosklearn.util.StopWatch import stop_task [as 别名]
#.........这里部分代码省略.........
def fit_automl_dataset(self, dataset):
self._stopwatch = StopWatch()
self._backend.save_start_time(self._seed)
name = os.path.basename(dataset)
self._stopwatch.start_task(name)
self._start_task(self._stopwatch, name)
self._dataset_name = name
self._logger = self._get_logger(name)
self._logger.debug('======== Reading and converting data ==========')
# Encoding the labels will be done after the metafeature calculation!
self._data_memory_limit = float(self._ml_memory_limit) / 3
loaded_data_manager = CompetitionDataManager(
dataset, encode_labels=False,
max_memory_in_mb=self._data_memory_limit)
loaded_data_manager_str = str(loaded_data_manager).split('\n')
for part in loaded_data_manager_str:
self._logger.debug(part)
return self._fit(loaded_data_manager)
def _get_logger(self, name):
logger_name = 'AutoML(%d):%s' % (self._seed, name)
setup_logger(os.path.join(self._backend.temporary_directory, '%s.log' % str(logger_name)))
return get_logger(logger_name)
@staticmethod
def _start_task(watcher, task_name):
watcher.start_task(task_name)
@staticmethod
def _stop_task(watcher, task_name):
watcher.stop_task(task_name)
@staticmethod
def _print_load_time(basename, time_left_for_this_task,
time_for_load_data, logger):
time_left_after_reading = max(
0, time_left_for_this_task - time_for_load_data)
logger.info('Remaining time after reading %s %5.2f sec' %
(basename, time_left_after_reading))
return time_for_load_data
def _do_dummy_prediction(self, datamanager, num_run):
self._logger.info("Starting to create dummy predictions.")
# time_limit = int(self._time_for_task / 6.)
memory_limit = int(self._ml_memory_limit)
ta = ExecuteTaFuncWithQueue(backend=self._backend,
autosklearn_seed=self._seed,
resampling_strategy=self._resampling_strategy,
initial_num_run=num_run,
logger=self._logger,
**self._resampling_strategy_arguments)
status, cost, runtime, additional_info = \
ta.run(1, cutoff=self._time_for_task, memory_limit=memory_limit)
if status == StatusType.SUCCESS:
self._logger.info("Finished creating dummy predictions.")
else:
self._logger.error('Error creating dummy predictions:%s ',
additional_info)
示例12: AutoML
# 需要导入模块: from autosklearn.util import StopWatch [as 别名]
# 或者: from autosklearn.util.StopWatch import stop_task [as 别名]
#.........这里部分代码省略.........
self._logger.debug('======== Reading and converting data ==========')
# Encoding the labels will be done after the metafeature calculation!
self._data_memory_limit = float(self._ml_memory_limit) / 3
loaded_data_manager = CompetitionDataManager(
dataset, max_memory_in_mb=self._data_memory_limit)
loaded_data_manager_str = str(loaded_data_manager).split('\n')
for part in loaded_data_manager_str:
self._logger.debug(part)
return self._fit(loaded_data_manager, metric)
def fit_on_datamanager(self, datamanager, metric):
self._stopwatch = StopWatch()
self._backend.save_start_time(self._seed)
name = os.path.basename(datamanager.name)
self._stopwatch.start_task(name)
self._start_task(self._stopwatch, name)
self._dataset_name = name
self._logger = self._get_logger(name)
self._fit(datamanager, metric)
def _get_logger(self, name):
logger_name = 'AutoML(%d):%s' % (self._seed, name)
setup_logger(os.path.join(self._backend.temporary_directory, '%s.log' % str(logger_name)))
return get_logger(logger_name)
@staticmethod
def _start_task(watcher, task_name):
watcher.start_task(task_name)
@staticmethod
def _stop_task(watcher, task_name):
watcher.stop_task(task_name)
@staticmethod
def _print_load_time(basename, time_left_for_this_task,
time_for_load_data, logger):
time_left_after_reading = max(
0, time_left_for_this_task - time_for_load_data)
logger.info('Remaining time after reading %s %5.2f sec' %
(basename, time_left_after_reading))
return time_for_load_data
def _do_dummy_prediction(self, datamanager, num_run):
# When using partial-cv it makes no sense to do dummy predictions
if self._resampling_strategy in ['partial-cv',
'partial-cv-iterative-fit']:
return num_run
self._logger.info("Starting to create dummy predictions.")
memory_limit = int(self._ml_memory_limit)
scenario_mock = unittest.mock.Mock()
scenario_mock.wallclock_limit = self._time_for_task
# This stats object is a hack - maybe the SMAC stats object should
# already be generated here!
stats = Stats(scenario_mock)
stats.start_timing()
ta = ExecuteTaFuncWithQueue(backend=self._backend,
autosklearn_seed=self._seed,
resampling_strategy=self._resampling_strategy,
initial_num_run=num_run,
logger=self._logger,