本文整理匯總了Python中autosklearn.util.StopWatch.start_task方法的典型用法代碼示例。如果您正苦於以下問題:Python StopWatch.start_task方法的具體用法?Python StopWatch.start_task怎麽用?Python StopWatch.start_task使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類autosklearn.util.StopWatch
的用法示例。
在下文中一共展示了StopWatch.start_task方法的12個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_stopwatch_overhead
# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
def test_stopwatch_overhead(self):
# Wall Overhead
start = time.time()
cpu_start = time.clock()
watch = StopWatch()
for i in range(1, 1000):
watch.start_task('task_%d' % i)
watch.stop_task('task_%d' % i)
cpu_stop = time.clock()
stop = time.time()
dur = stop - start
cpu_dur = cpu_stop - cpu_start
cpu_overhead = cpu_dur - watch.cpu_sum()
wall_overhead = dur - watch.wall_sum()
self.assertLess(cpu_overhead, 1)
self.assertLess(wall_overhead, 1)
self.assertLess(watch.cpu_sum(), 2 * watch.wall_sum())
示例2: test_stopwatch_overhead
# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
def test_stopwatch_overhead(self):
# CPU overhead
start = time.clock()
watch = StopWatch()
for i in range(1, 100000):
watch.start_task("task_%d" % i)
watch.stop_task("task_%d" % i)
stop = time.clock()
dur = stop - start
cpu_overhead = dur - watch.cpu_sum()
self.assertLess(cpu_overhead, 1.5)
# Wall Overhead
start = time.time()
watch = StopWatch()
for i in range(1, 100000):
watch.start_task("task_%d" % i)
watch.stop_task("task_%d" % i)
stop = time.time()
dur = stop - start
wall_overhead = dur - watch.wall_sum()
self.assertLess(wall_overhead, 2)
self.assertLess(cpu_overhead, wall_overhead)
示例3: main
# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
def main(self):
watch = StopWatch()
watch.start_task('ensemble_builder')
used_time = 0
time_iter = 0
index_run = 0
num_iteration = 0
current_num_models = 0
last_hash = None
current_hash = None
backend = Backend(self.output_dir, self.autosklearn_tmp_dir)
dir_ensemble = os.path.join(self.autosklearn_tmp_dir,
'.auto-sklearn',
'predictions_ensemble')
dir_valid = os.path.join(self.autosklearn_tmp_dir,
'.auto-sklearn',
'predictions_valid')
dir_test = os.path.join(self.autosklearn_tmp_dir,
'.auto-sklearn',
'predictions_test')
paths_ = [dir_ensemble, dir_valid, dir_test]
dir_ensemble_list_mtimes = []
self.logger.debug('Starting main loop with %f seconds and %d iterations '
'left.' % (self.limit - used_time, num_iteration))
while used_time < self.limit or (self.max_iterations > 0 and
self.max_iterations >= num_iteration):
num_iteration += 1
self.logger.debug('Time left: %f', self.limit - used_time)
self.logger.debug('Time last ensemble building: %f', time_iter)
# Reload the ensemble targets every iteration, important, because cv may
# update the ensemble targets in the cause of running auto-sklearn
# TODO update cv in order to not need this any more!
targets_ensemble = backend.load_targets_ensemble()
# Load the predictions from the models
exists = [os.path.isdir(dir_) for dir_ in paths_]
if not exists[0]: # all(exists):
self.logger.debug('Prediction directory %s does not exist!' %
dir_ensemble)
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
if self.shared_mode is False:
dir_ensemble_list = sorted(glob.glob(os.path.join(
dir_ensemble, 'predictions_ensemble_%s_*.npy' % self.seed)))
if exists[1]:
dir_valid_list = sorted(glob.glob(os.path.join(
dir_valid, 'predictions_valid_%s_*.npy' % self.seed)))
else:
dir_valid_list = []
if exists[2]:
dir_test_list = sorted(glob.glob(os.path.join(
dir_test, 'predictions_test_%s_*.npy' % self.seed)))
else:
dir_test_list = []
else:
dir_ensemble_list = sorted(os.listdir(dir_ensemble))
dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else []
dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else []
# Check the modification times because predictions can be updated
# over time!
old_dir_ensemble_list_mtimes = dir_ensemble_list_mtimes
dir_ensemble_list_mtimes = []
for dir_ensemble_file in dir_ensemble_list:
if dir_ensemble_file.endswith("/"):
dir_ensemble_file = dir_ensemble_file[:-1]
basename = os.path.basename(dir_ensemble_file)
dir_ensemble_file = os.path.join(dir_ensemble, basename)
mtime = os.path.getmtime(dir_ensemble_file)
dir_ensemble_list_mtimes.append(mtime)
if len(dir_ensemble_list) == 0:
self.logger.debug('Directories are empty')
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
if len(dir_ensemble_list) <= current_num_models and \
old_dir_ensemble_list_mtimes == dir_ensemble_list_mtimes:
self.logger.debug('Nothing has changed since the last time')
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
watch.start_task('index_run' + str(index_run))
watch.start_task('ensemble_iter_' + str(num_iteration))
# List of num_runs (which are in the filename) which will be included
# later
include_num_runs = []
backup_num_runs = []
#.........這裏部分代碼省略.........
示例4: AutoML
# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
class AutoML(BaseEstimator, multiprocessing.Process):
def __init__(self,
tmp_dir,
output_dir,
time_left_for_this_task,
per_run_time_limit,
log_dir=None,
initial_configurations_via_metalearning=25,
ensemble_size=1,
ensemble_nbest=1,
seed=1,
ml_memory_limit=3000,
metadata_directory=None,
queue=None,
keep_models=True,
debug_mode=False,
include_estimators=None,
include_preprocessors=None,
resampling_strategy='holdout-iterative-fit',
resampling_strategy_arguments=None,
delete_tmp_folder_after_terminate=False,
delete_output_folder_after_terminate=False,
shared_mode=False,
precision=32):
super(AutoML, self).__init__()
self._tmp_dir = tmp_dir
self._output_dir = output_dir
self._time_for_task = time_left_for_this_task
self._per_run_time_limit = per_run_time_limit
self._log_dir = log_dir if log_dir is not None else self._tmp_dir
self._initial_configurations_via_metalearning = \
initial_configurations_via_metalearning
self._ensemble_size = ensemble_size
self._ensemble_nbest = ensemble_nbest
self._seed = seed
self._ml_memory_limit = ml_memory_limit
self._metadata_directory = metadata_directory
self._queue = queue
self._keep_models = keep_models
self._include_estimators = include_estimators
self._include_preprocessors = include_preprocessors
self._resampling_strategy = resampling_strategy
self._resampling_strategy_arguments = resampling_strategy_arguments
self.delete_tmp_folder_after_terminate = \
delete_tmp_folder_after_terminate
self.delete_output_folder_after_terminate = \
delete_output_folder_after_terminate
self._shared_mode = shared_mode
self.precision = precision
self._datamanager = None
self._dataset_name = None
self._stopwatch = StopWatch()
self._logger = None
self._task = None
self._metric = None
self._label_num = None
self.models_ = None
self.ensemble_indices_ = None
self._debug_mode = debug_mode
self._backend = Backend(self._output_dir, self._tmp_dir)
def start_automl(self, parser):
self._backend.save_start_time(self._seed)
self._stopwatch = StopWatch()
datamanager = get_data_manager(namespace=parser)
self._stopwatch.start_task(datamanager.name)
logger_name = 'AutoML(%d):%s' % (self._seed, datamanager.name)
setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name)))
self._logger = get_logger(logger_name)
self._datamanager = datamanager
self._dataset_name = datamanager.name
self.start()
def start(self):
if self._datamanager is None:
raise ValueError('You must invoke start() only via start_automl()')
super(AutoML, self).start()
def run(self):
if self._datamanager is None:
raise ValueError('You must invoke run() only via start_automl()')
self._fit(self._datamanager)
def fit(self, X, y,
task=MULTICLASS_CLASSIFICATION,
metric='acc_metric',
feat_type=None,
dataset_name=None):
if dataset_name is None:
m = hashlib.md5()
m.update(X.data)
dataset_name = m.hexdigest()
self._backend.save_start_time(self._seed)
#.........這裏部分代碼省略.........
示例5: main
# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
def main(autosklearn_tmp_dir,
basename,
task_type,
metric,
limit,
output_dir,
ensemble_size=None,
ensemble_nbest=None,
seed=1,
shared_mode=False,
max_iterations=-1,
precision="32"):
watch = StopWatch()
watch.start_task('ensemble_builder')
used_time = 0
time_iter = 0
index_run = 0
num_iteration = 0
current_num_models = 0
backend = Backend(output_dir, autosklearn_tmp_dir)
dir_ensemble = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
'predictions_ensemble')
dir_valid = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
'predictions_valid')
dir_test = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
'predictions_test')
paths_ = [dir_ensemble, dir_valid, dir_test]
targets_ensemble = backend.load_targets_ensemble()
dir_ensemble_list_mtimes = []
while used_time < limit or (max_iterations > 0 and max_iterations >= num_iteration):
num_iteration += 1
logger.debug('Time left: %f', limit - used_time)
logger.debug('Time last iteration: %f', time_iter)
# Load the predictions from the models
exists = [os.path.isdir(dir_) for dir_ in paths_]
if not exists[0]: # all(exists):
logger.debug('Prediction directory %s does not exist!' %
dir_ensemble)
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
if shared_mode is False:
dir_ensemble_list = sorted(glob.glob(os.path.join(
dir_ensemble, 'predictions_ensemble_%s_*.npy' % seed)))
if exists[1]:
dir_valid_list = sorted(glob.glob(os.path.join(
dir_valid, 'predictions_valid_%s_*.npy' % seed)))
else:
dir_valid_list = []
if exists[2]:
dir_test_list = sorted(glob.glob(os.path.join(
dir_test, 'predictions_test_%s_*.npy' % seed)))
else:
dir_test_list = []
else:
dir_ensemble_list = sorted(os.listdir(dir_ensemble))
dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else []
dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else []
# Check the modification times because predictions can be updated
# over time!
old_dir_ensemble_list_mtimes = dir_ensemble_list_mtimes
dir_ensemble_list_mtimes = []
for dir_ensemble_file in dir_ensemble_list:
dir_ensemble_file = os.path.join(dir_ensemble, dir_ensemble_file)
mtime = os.path.getmtime(dir_ensemble_file)
dir_ensemble_list_mtimes.append(mtime)
if len(dir_ensemble_list) == 0:
logger.debug('Directories are empty')
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
if len(dir_ensemble_list) <= current_num_models and \
old_dir_ensemble_list_mtimes == dir_ensemble_list_mtimes:
logger.debug('Nothing has changed since the last time')
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
watch.start_task('ensemble_iter_' + str(index_run))
# List of num_runs (which are in the filename) which will be included
# later
include_num_runs = []
backup_num_runs = []
model_and_automl_re = re.compile(r'_([0-9]*)_([0-9]*)\.npy$')
if ensemble_nbest is not None:
# Keeps track of the single scores of each model in our ensemble
scores_nbest = []
#.........這裏部分代碼省略.........
示例6: main
# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
def main(autosklearn_tmp_dir,
dataset_name,
task_type,
metric,
limit,
output_dir,
ensemble_size=None,
ensemble_nbest=None,
seed=1,
shared_mode=False,
max_iterations=-1,
precision="32"):
watch = StopWatch()
watch.start_task('ensemble_builder')
used_time = 0
time_iter = 0
index_run = 0
num_iteration = 0
current_num_models = 0
backend = Backend(output_dir, autosklearn_tmp_dir)
dir_ensemble = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
'predictions_ensemble')
dir_valid = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
'predictions_valid')
dir_test = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
'predictions_test')
paths_ = [dir_ensemble, dir_valid, dir_test]
dir_ensemble_list_mtimes = []
while used_time < limit or (max_iterations > 0 and max_iterations >= num_iteration):
num_iteration += 1
logger.debug('Time left: %f', limit - used_time)
logger.debug('Time last iteration: %f', time_iter)
# Reload the ensemble targets every iteration, important, because cv may
# update the ensemble targets in the cause of running auto-sklearn
# TODO update cv in order to not need this any more!
targets_ensemble = backend.load_targets_ensemble()
# Load the predictions from the models
exists = [os.path.isdir(dir_) for dir_ in paths_]
if not exists[0]: # all(exists):
logger.debug('Prediction directory %s does not exist!' %
dir_ensemble)
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
if shared_mode is False:
dir_ensemble_list = sorted(glob.glob(os.path.join(
dir_ensemble, 'predictions_ensemble_%s_*.npy' % seed)))
if exists[1]:
dir_valid_list = sorted(glob.glob(os.path.join(
dir_valid, 'predictions_valid_%s_*.npy' % seed)))
else:
dir_valid_list = []
if exists[2]:
dir_test_list = sorted(glob.glob(os.path.join(
dir_test, 'predictions_test_%s_*.npy' % seed)))
else:
dir_test_list = []
else:
dir_ensemble_list = sorted(os.listdir(dir_ensemble))
dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else []
dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else []
# Check the modification times because predictions can be updated
# over time!
old_dir_ensemble_list_mtimes = dir_ensemble_list_mtimes
dir_ensemble_list_mtimes = []
for dir_ensemble_file in dir_ensemble_list:
if dir_ensemble_file.endswith("/"):
dir_ensemble_file = dir_ensemble_file[:-1]
basename = os.path.basename(dir_ensemble_file)
dir_ensemble_file = os.path.join(dir_ensemble, basename)
mtime = os.path.getmtime(dir_ensemble_file)
dir_ensemble_list_mtimes.append(mtime)
if len(dir_ensemble_list) == 0:
logger.debug('Directories are empty')
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
if len(dir_ensemble_list) <= current_num_models and \
old_dir_ensemble_list_mtimes == dir_ensemble_list_mtimes:
logger.debug('Nothing has changed since the last time')
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
watch.start_task('ensemble_iter_' + str(index_run))
# List of num_runs (which are in the filename) which will be included
# later
#.........這裏部分代碼省略.........
示例7: main
# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
def main(logger, predictions_dir, basename, task_type, metric, limit, output_dir,
ensemble_size=None):
watch = StopWatch()
watch.start_task('ensemble_builder')
used_time = 0
time_iter = 0
index_run = 0
current_num_models = 0
while used_time < limit:
logger.debug('Time left: %f' % (limit - used_time))
logger.debug('Time last iteration: %f' % time_iter)
# Load the true labels of the validation data
true_labels = np.load(os.path.join(predictions_dir,
'true_labels_ensemble.npy'))
# Load the predictions from the models
all_predictions_train = []
dir_ensemble = os.path.join(predictions_dir, 'predictions_ensemble/')
dir_valid = os.path.join(predictions_dir, 'predictions_valid/')
dir_test = os.path.join(predictions_dir, 'predictions_test/')
if not os.path.isdir(dir_ensemble) or not os.path.isdir(dir_valid) or \
not os.path.isdir(dir_test):
logger.debug('Prediction directory does not exist')
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
dir_ensemble_list = sorted(os.listdir(dir_ensemble))
dir_valid_list = sorted(os.listdir(dir_valid))
dir_test_list = sorted(os.listdir(dir_test))
if check_data(logger, len(dir_ensemble_list), len(dir_valid_list),
len(dir_test_list), current_num_models):
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
watch.start_task('ensemble_iter_' + str(index_run))
# Binary mask where True indicates that the corresponding will be
# excluded from the ensemble
exclude_mask = []
if ensemble_size is not None:
# Keeps track of the single scores of each model in our ensemble
scores_nbest = []
# The indices of the model that are currently in our ensemble
indices_nbest = []
model_idx = 0
for f in dir_ensemble_list:
predictions = np.load(os.path.join(dir_ensemble, f))
score = calculate_score(true_labels, predictions,
task_type, metric,
predictions.shape[1])
if ensemble_size is not None:
if score <= 0.001:
exclude_mask.append(True)
logger.error('Model only predicts at random: ' + f +
' has score: ' + str(score))
# If we have less model in our ensemble than ensemble_size add
# the current model if it is better than random
elif len(scores_nbest) < ensemble_size:
scores_nbest.append(score)
indices_nbest.append(model_idx)
exclude_mask.append(False)
else:
# Take the worst performing model in our ensemble so far
idx = np.argmin(np.array([scores_nbest]))
# If the current model is better than the worst model in
# our ensemble replace it by the current model
if scores_nbest[idx] < score:
logger.debug(
'Worst model in our ensemble: %d with score %f will be replaced by model %d with score %f'
% (idx, scores_nbest[idx], model_idx, score))
scores_nbest[idx] = score
# Exclude the old model
exclude_mask[int(indices_nbest[idx])] = True
indices_nbest[idx] = model_idx
exclude_mask.append(False)
# Otherwise exclude the current model from the ensemble
else:
exclude_mask.append(True)
else:
# Load all predictions that are better than random
if score <= 0.001:
exclude_mask.append(True)
logger.error('Model only predicts at random: ' + f +
' has score: ' + str(score))
else:
exclude_mask.append(False)
all_predictions_train.append(predictions)
model_idx += 1
#.........這裏部分代碼省略.........
示例8: AutoML
# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
class AutoML(multiprocessing.Process, BaseEstimator):
def __init__(self,
tmp_dir,
output_dir,
time_left_for_this_task,
per_run_time_limit,
log_dir=None,
initial_configurations_via_metalearning=25,
ensemble_size=1,
ensemble_nbest=1,
seed=1,
ml_memory_limit=3000,
metadata_directory=None,
queue=None,
keep_models=True,
debug_mode=False,
logger=None):
super(AutoML, self).__init__()
self._seed = seed
self._tmp_dir = tmp_dir
self._output_dir = output_dir
self._model_dir = join(self._tmp_dir, 'models_%d' % self._seed)
self._ensemble_indices_dir = join(self._tmp_dir,
'ensemble_indices_%d' % self._seed)
self._create_folder(self._tmp_dir, to_log=False)
self._time_for_task = time_left_for_this_task
self._per_run_time_limit = per_run_time_limit
self._log_dir = log_dir if log_dir is not None else self._tmp_dir
self._initial_configurations_via_metalearning = initial_configurations_via_metalearning
self._ensemble_size = ensemble_size
self._ensemble_nbest = ensemble_nbest
self._ml_memory_limit = ml_memory_limit
self._metadata_directory = metadata_directory
self._queue = queue
self._keep_models = keep_models
self._basename = None
self._stopwatch = None
self._logger = logger if logger is not None else get_automl_logger(
self._log_dir, self._basename, self._seed)
self._ohe = None
self._task = None
self._metric = None
self._target_num = None
self._debug_mode = debug_mode
self._create_folders()
def _create_folder(self, folder, to_log=True):
if to_log:
self._debug("CREATE folder: %s" % folder)
else:
print("CREATE folder: %s" % folder)
if os.path.isdir(folder):
if not self._debug_mode:
raise OSError("Folder '%s' exists" % folder)
else:
os.mkdir(folder)
def _create_folders(self):
# == Set up a directory where all the trained models will be pickled to
self._create_folder(self._output_dir)
if self._log_dir != self._tmp_dir:
self._create_folder(self._log_dir)
self._create_folder(self._model_dir)
self._create_folder(self._ensemble_indices_dir)
def run(self):
raise NotImplementedError()
def fit(self, data_x, y,
task=MULTICLASS_CLASSIFICATION,
metric='acc_metric',
feat_type=None,
dataset_name=None):
if dataset_name is None:
m = hashlib.md5()
m.update(data_x.data)
dataset_name = m.hexdigest()
self._basename = dataset_name
self._stopwatch = StopWatch()
self._stopwatch.start_task(self._basename)
loaded_data_manager = XYDataManager(data_x, y,
task=task,
metric=metric,
feat_type=feat_type,
dataset_name=dataset_name,
encode_labels=False)
return self._fit(loaded_data_manager)
def _debug(self, text):
self._logger.debug(text)
#.........這裏部分代碼省略.........
示例9: AutoML
# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
class AutoML(BaseEstimator, multiprocessing.Process):
def __init__(self,
tmp_dir,
output_dir,
time_left_for_this_task,
per_run_time_limit,
log_dir=None,
initial_configurations_via_metalearning=25,
ensemble_size=1,
ensemble_nbest=1,
seed=1,
ml_memory_limit=3000,
metadata_directory=None,
queue=None,
keep_models=True,
debug_mode=False,
include_estimators=None,
include_preprocessors=None,
resampling_strategy='holdout-iterative-fit',
resampling_strategy_arguments=None,
delete_tmp_folder_after_terminate=False,
delete_output_folder_after_terminate=False,
shared_mode=False,
precision=32,
max_iter_smac=None,
acquisition_function='EI'):
super(AutoML, self).__init__()
self._tmp_dir = tmp_dir
self._output_dir = output_dir
self._time_for_task = time_left_for_this_task
self._per_run_time_limit = per_run_time_limit
self._log_dir = log_dir if log_dir is not None else self._tmp_dir
self._initial_configurations_via_metalearning = \
initial_configurations_via_metalearning
self._ensemble_size = ensemble_size
self._ensemble_nbest = ensemble_nbest
self._seed = seed
self._ml_memory_limit = ml_memory_limit
self._data_memory_limit = None
self._metadata_directory = metadata_directory
self._queue = queue
self._keep_models = keep_models
self._include_estimators = include_estimators
self._include_preprocessors = include_preprocessors
self._resampling_strategy = resampling_strategy
self._resampling_strategy_arguments = resampling_strategy_arguments
self._max_iter_smac = max_iter_smac
self.delete_tmp_folder_after_terminate = \
delete_tmp_folder_after_terminate
self.delete_output_folder_after_terminate = \
delete_output_folder_after_terminate
self._shared_mode = shared_mode
self.precision = precision
self.acquisition_function = acquisition_function
self._datamanager = None
self._dataset_name = None
self._stopwatch = StopWatch()
self._logger = None
self._task = None
self._metric = None
self._label_num = None
self._parser = None
self.models_ = None
self.ensemble_ = None
self._can_predict = False
self._debug_mode = debug_mode
if not isinstance(self._time_for_task, int):
raise ValueError("time_left_for_this_task not of type integer, "
"but %s" % str(type(self._time_for_task)))
if not isinstance(self._per_run_time_limit, int):
raise ValueError("per_run_time_limit not of type integer, but %s" %
str(type(self._per_run_time_limit)))
# After assignging and checking variables...
self._backend = Backend(self._output_dir, self._tmp_dir)
def start_automl(self, parser):
self._parser = parser
self.start()
def start(self):
if self._parser is None:
raise ValueError('You must invoke start() only via start_automl()')
super(AutoML, self).start()
def run(self):
if self._parser is None:
raise ValueError('You must invoke run() only via start_automl()')
self._backend.save_start_time(self._seed)
self._stopwatch = StopWatch()
datamanager = get_data_manager(namespace=self._parser)
self._stopwatch.start_task(datamanager.name)
self._logger = self._get_logger(datamanager.name)
#.........這裏部分代碼省略.........
示例10: main
# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
def main(logger,
predictions_dir,
basename,
task_type,
metric,
limit,
output_dir,
ensemble_size=None,
seed=1,
indices_output_dir='.'):
watch = StopWatch()
watch.start_task('ensemble_builder')
task_type = STRING_TO_TASK_TYPES[task_type]
used_time = 0
time_iter = 0
index_run = 0
current_num_models = 0
dir_ensemble = join(predictions_dir, 'predictions_ensemble_%s/' % seed)
dir_valid = join(predictions_dir, 'predictions_valid_%s/' % seed)
dir_test = join(predictions_dir, 'predictions_test_%s/' % seed)
paths_ = [dir_ensemble, dir_valid, dir_test]
tru_labels_path = join(predictions_dir, 'true_labels_ensemble.npy')
while used_time < limit:
logger.debug('Time left: %f', limit - used_time)
logger.debug('Time last iteration: %f', time_iter)
# Load the true labels of the validation data
true_labels = np.load(tru_labels_path)
# Load the predictions from the models
exists = [os.path.isdir(dir_) for dir_ in paths_]
if not exists[0]: # all(exists):
logger.debug('Prediction directory %s does not exist!' %
dir_ensemble)
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
dir_ensemble_list = sorted(os.listdir(dir_ensemble))
dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else []
dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else []
if check_data(logger, len(dir_ensemble_list), current_num_models):
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
watch.start_task('ensemble_iter_' + str(index_run))
# List of num_runs (which are in the filename) which will be included
# later
include_num_runs = []
re_num_run = re.compile(r'_([0-9]*)\.npy$')
if ensemble_size is not None:
# Keeps track of the single scores of each model in our ensemble
scores_nbest = []
# The indices of the model that are currently in our ensemble
indices_nbest = []
# The names of the models
model_names = []
# The num run of the models
num_runs = []
model_names_to_scores = dict()
model_idx = 0
for model_name in dir_ensemble_list:
predictions = np.load(os.path.join(dir_ensemble, model_name))
score = calculate_score(true_labels, predictions,
task_type, metric,
predictions.shape[1])
model_names_to_scores[model_name] = score
num_run = int(re_num_run.search(model_name).group(1))
if ensemble_size is not None:
if score <= 0.001:
# include_num_runs.append(True)
logger.error('Model only predicts at random: ' +
model_name + ' has score: ' + str(score))
# If we have less models in our ensemble than ensemble_size add
# the current model if it is better than random
elif len(scores_nbest) < ensemble_size:
scores_nbest.append(score)
indices_nbest.append(model_idx)
include_num_runs.append(num_run)
model_names.append(model_name)
num_runs.append(num_run)
else:
# Take the worst performing model in our ensemble so far
idx = np.argmin(np.array([scores_nbest]))
# If the current model is better than the worst model in
# our ensemble replace it by the current model
if scores_nbest[idx] < score:
logger.debug('Worst model in our ensemble: %s with '
#.........這裏部分代碼省略.........
示例11: AutoML
# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
class AutoML(BaseEstimator):
def __init__(self,
backend,
time_left_for_this_task,
per_run_time_limit,
log_dir=None,
initial_configurations_via_metalearning=25,
ensemble_size=1,
ensemble_nbest=1,
seed=1,
ml_memory_limit=3000,
metadata_directory=None,
keep_models=True,
debug_mode=False,
include_estimators=None,
include_preprocessors=None,
resampling_strategy='holdout-iterative-fit',
resampling_strategy_arguments=None,
delete_tmp_folder_after_terminate=False,
delete_output_folder_after_terminate=False,
shared_mode=False,
precision=32,
max_iter_smac=None,
acquisition_function='EI'):
super(AutoML, self).__init__()
self._backend = backend
#self._tmp_dir = tmp_dir
#self._output_dir = output_dir
self._time_for_task = time_left_for_this_task
self._per_run_time_limit = per_run_time_limit
#self._log_dir = log_dir if log_dir is not None else self._tmp_dir
self._initial_configurations_via_metalearning = \
initial_configurations_via_metalearning
self._ensemble_size = ensemble_size
self._ensemble_nbest = ensemble_nbest
self._seed = seed
self._ml_memory_limit = ml_memory_limit
self._data_memory_limit = None
self._metadata_directory = metadata_directory
self._keep_models = keep_models
self._include_estimators = include_estimators
self._include_preprocessors = include_preprocessors
self._resampling_strategy = resampling_strategy
self._resampling_strategy_arguments = resampling_strategy_arguments \
if resampling_strategy_arguments is not None else {}
self._max_iter_smac = max_iter_smac
#self.delete_tmp_folder_after_terminate = \
# delete_tmp_folder_after_terminate
#self.delete_output_folder_after_terminate = \
# delete_output_folder_after_terminate
self._shared_mode = shared_mode
self.precision = precision
self.acquisition_function = acquisition_function
self._datamanager = None
self._dataset_name = None
self._stopwatch = StopWatch()
self._logger = None
self._task = None
self._metric = None
self._label_num = None
self._parser = None
self.models_ = None
self.ensemble_ = None
self._can_predict = False
self._debug_mode = debug_mode
if not isinstance(self._time_for_task, int):
raise ValueError("time_left_for_this_task not of type integer, "
"but %s" % str(type(self._time_for_task)))
if not isinstance(self._per_run_time_limit, int):
raise ValueError("per_run_time_limit not of type integer, but %s" %
str(type(self._per_run_time_limit)))
# After assignging and checking variables...
#self._backend = Backend(self._output_dir, self._tmp_dir)
def start_automl(self, parser):
self._parser = parser
self.start()
def start(self):
if self._parser is None:
raise ValueError('You must invoke start() only via start_automl()')
super(AutoML, self).start()
def run(self):
if self._parser is None:
raise ValueError('You must invoke run() only via start_automl()')
self._backend.save_start_time(self._seed)
self._stopwatch = StopWatch()
datamanager = get_data_manager(namespace=self._parser)
self._stopwatch.start_task(datamanager.name)
self._logger = self._get_logger(datamanager.name)
self._datamanager = datamanager
self._dataset_name = datamanager.name
#.........這裏部分代碼省略.........
示例12: AutoML
# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
#.........這裏部分代碼省略.........
"but %s" % str(type(self._time_for_task)))
if not isinstance(self._per_run_time_limit, int):
raise ValueError("per_run_time_limit not of type integer, but %s" %
str(type(self._per_run_time_limit)))
# After assignging and checking variables...
#self._backend = Backend(self._output_dir, self._tmp_dir)
def fit(
self, X, y,
task,
metric,
X_test=None,
y_test=None,
feat_type=None,
dataset_name=None,
only_return_configuration_space=False,
):
if self._shared_mode:
# If this fails, it's likely that this is the first call to get
# the data manager
try:
D = self._backend.load_datamanager()
dataset_name = D.name
except IOError:
pass
if dataset_name is None:
dataset_name = hash_array_or_matrix(X)
self._backend.save_start_time(self._seed)
self._stopwatch = StopWatch()
self._dataset_name = dataset_name
self._stopwatch.start_task(self._dataset_name)
self._logger = self._get_logger(dataset_name)
if metric is None:
raise ValueError('No metric given.')
if not isinstance(metric, Scorer):
raise ValueError('Metric must be instance of '
'autosklearn.metrics.Scorer.')
if feat_type is not None and len(feat_type) != X.shape[1]:
raise ValueError('Array feat_type does not have same number of '
'variables as X has features. %d vs %d.' %
(len(feat_type), X.shape[1]))
if feat_type is not None and not all([isinstance(f, str)
for f in feat_type]):
raise ValueError('Array feat_type must only contain strings.')
if feat_type is not None:
for ft in feat_type:
if ft.lower() not in ['categorical', 'numerical']:
raise ValueError('Only `Categorical` and `Numerical` are '
'valid feature types, you passed `%s`' % ft)
self._data_memory_limit = None
loaded_data_manager = XYDataManager(
X, y,
X_test=X_test,
y_test=y_test,
task=task,
feat_type=feat_type,
dataset_name=dataset_name,
)