本文整理汇总了Python中autosklearn.util.StopWatch类的典型用法代码示例。如果您正苦于以下问题:Python StopWatch类的具体用法?Python StopWatch怎么用?Python StopWatch使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了StopWatch类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self,
tmp_dir,
output_dir,
time_left_for_this_task,
per_run_time_limit,
log_dir=None,
initial_configurations_via_metalearning=25,
ensemble_size=1,
ensemble_nbest=1,
seed=1,
ml_memory_limit=3000,
metadata_directory=None,
queue=None,
keep_models=True,
debug_mode=False,
include_estimators=None,
include_preprocessors=None,
resampling_strategy='holdout',
resampling_strategy_arguments=None,
delete_tmp_folder_after_terminate=False,
delete_output_folder_after_terminate=False,
shared_mode=False):
super(AutoML, self).__init__()
self._tmp_dir = tmp_dir
self._output_dir = output_dir
self._time_for_task = time_left_for_this_task
self._per_run_time_limit = per_run_time_limit
self._log_dir = log_dir if log_dir is not None else self._tmp_dir
self._initial_configurations_via_metalearning = \
initial_configurations_via_metalearning
self._ensemble_size = ensemble_size
self._ensemble_nbest = ensemble_nbest
self._seed = seed
self._ml_memory_limit = ml_memory_limit
self._metadata_directory = metadata_directory
self._queue = queue
self._keep_models = keep_models
self._include_estimators = include_estimators
self._include_preprocessors = include_preprocessors
self._resampling_strategy = resampling_strategy
self._resampling_strategy_arguments = resampling_strategy_arguments
self.delete_tmp_folder_after_terminate = \
delete_tmp_folder_after_terminate
self.delete_output_folder_after_terminate = \
delete_output_folder_after_terminate
self._shared_mode = shared_mode
self._datamanager = None
self._dataset_name = None
self._stopwatch = StopWatch()
self._logger = None
self._task = None
self._metric = None
self._label_num = None
self.models_ = None
self.ensemble_indices_ = None
self._debug_mode = debug_mode
self._backend = Backend(self._output_dir, self._tmp_dir)
示例2: fit
def fit(self, X, y,
task=MULTICLASS_CLASSIFICATION,
metric='acc_metric',
feat_type=None,
dataset_name=None):
if dataset_name is None:
m = hashlib.md5()
m.update(X.data)
dataset_name = m.hexdigest()
self._backend.save_start_time(self._seed)
self._stopwatch = StopWatch()
self._dataset_name = dataset_name
self._stopwatch.start_task(self._dataset_name)
logger_name = 'AutoML(%d):%s' % (self._seed, dataset_name)
setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name)))
self._logger = get_logger(logger_name)
if isinstance(metric, str):
metric = STRING_TO_METRIC[metric]
loaded_data_manager = XYDataManager(X, y,
task=task,
metric=metric,
feat_type=feat_type,
dataset_name=dataset_name,
encode_labels=False)
return self._fit(loaded_data_manager)
示例3: fit
def fit(self, X, y, task=MULTICLASS_CLASSIFICATION, metric="acc_metric", feat_type=None, dataset_name=None):
if dataset_name is None:
m = hashlib.md5()
m.update(X.data)
dataset_name = m.hexdigest()
self._backend.save_start_time(self._seed)
self._stopwatch = StopWatch()
self._dataset_name = dataset_name
self._stopwatch.start_task(self._dataset_name)
logger_name = "AutoML(%d):%s" % (self._seed, dataset_name)
setup_logger(os.path.join(self._tmp_dir, "%s.log" % str(logger_name)))
self._logger = get_logger(logger_name)
if isinstance(metric, str):
metric = STRING_TO_METRIC[metric]
if feat_type is not None and len(feat_type) != X.shape[1]:
raise ValueError(
"Array feat_type does not have same number of "
"variables as X has features. %d vs %d." % (len(feat_type), X.shape[1])
)
if feat_type is not None and not all([isinstance(f, bool) for f in feat_type]):
raise ValueError("Array feat_type must only contain bools.")
loaded_data_manager = XYDataManager(
X, y, task=task, metric=metric, feat_type=feat_type, dataset_name=dataset_name, encode_labels=False
)
return self._fit(loaded_data_manager)
示例4: fit_on_datamanager
def fit_on_datamanager(self, datamanager, metric):
self._stopwatch = StopWatch()
self._backend.save_start_time(self._seed)
name = os.path.basename(datamanager.name)
self._stopwatch.start_task(name)
self._start_task(self._stopwatch, name)
self._dataset_name = name
self._logger = self._get_logger(name)
self._fit(datamanager, metric)
示例5: fit
def fit(self, X, y,
task=MULTICLASS_CLASSIFICATION,
metric='acc_metric',
feat_type=None,
dataset_name=None):
if not self._shared_mode:
self._backend.context.delete_directories()
else:
# If this fails, it's likely that this is the first call to get
# the data manager
try:
D = self._backend.load_datamanager()
dataset_name = D.name
except IOError:
pass
self._backend.context.create_directories()
if dataset_name is None:
dataset_name = hash_numpy_array(X)
self._backend.save_start_time(self._seed)
self._stopwatch = StopWatch()
self._dataset_name = dataset_name
self._stopwatch.start_task(self._dataset_name)
self._logger = self._get_logger(dataset_name)
if isinstance(metric, str):
metric = STRING_TO_METRIC[metric]
if feat_type is not None and len(feat_type) != X.shape[1]:
raise ValueError('Array feat_type does not have same number of '
'variables as X has features. %d vs %d.' %
(len(feat_type), X.shape[1]))
if feat_type is not None and not all([isinstance(f, str)
for f in feat_type]):
raise ValueError('Array feat_type must only contain strings.')
if feat_type is not None:
for ft in feat_type:
if ft.lower() not in ['categorical', 'numerical']:
raise ValueError('Only `Categorical` and `Numerical` are '
'valid feature types, you passed `%s`' % ft)
self._data_memory_limit = None
loaded_data_manager = XYDataManager(X, y,
task=task,
metric=metric,
feat_type=feat_type,
dataset_name=dataset_name,
encode_labels=False)
return self._fit(loaded_data_manager)
示例6: run
def run(self):
if self._parser is None:
raise ValueError('You must invoke run() only via start_automl()')
self._backend.save_start_time(self._seed)
self._stopwatch = StopWatch()
datamanager = get_data_manager(namespace=self._parser)
self._stopwatch.start_task(datamanager.name)
self._logger = self._get_logger(datamanager.name)
self._datamanager = datamanager
self._dataset_name = datamanager.name
self._fit(self._datamanager)
示例7: start_automl
def start_automl(self, parser):
self._backend.save_start_time(self._seed)
self._stopwatch = StopWatch()
datamanager = get_data_manager(namespace=parser)
self._stopwatch.start_task(datamanager.name)
logger_name = 'AutoML(%d):%s' % (self._seed, datamanager.name)
setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name)))
self._logger = get_logger(logger_name)
self._datamanager = datamanager
self._dataset_name = datamanager.name
self.start()
示例8: test_stopwatch_overhead
def test_stopwatch_overhead(self):
# Wall Overhead
start = time.time()
cpu_start = time.clock()
watch = StopWatch()
for i in range(1, 1000):
watch.start_task('task_%d' % i)
watch.stop_task('task_%d' % i)
cpu_stop = time.clock()
stop = time.time()
dur = stop - start
cpu_dur = cpu_stop - cpu_start
cpu_overhead = cpu_dur - watch.cpu_sum()
wall_overhead = dur - watch.wall_sum()
self.assertLess(cpu_overhead, 1)
self.assertLess(wall_overhead, 1)
self.assertLess(watch.cpu_sum(), 2 * watch.wall_sum())
示例9: fit
def fit(self, X, y,
task=MULTICLASS_CLASSIFICATION,
metric='acc_metric',
feat_type=None,
dataset_name=None):
if dataset_name is None:
m = hashlib.md5()
m.update(X.data)
dataset_name = m.hexdigest()
self._backend.save_start_time(self._seed)
self._stopwatch = StopWatch()
self._dataset_name = dataset_name
self._stopwatch.start_task(self._dataset_name)
logger_name = 'AutoML(%d):%s' % (self._seed, dataset_name)
setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name)))
self._logger = get_logger(logger_name)
if isinstance(metric, str):
metric = STRING_TO_METRIC[metric]
if feat_type is not None and len(feat_type) != X.shape[1]:
raise ValueError('Array feat_type does not have same number of '
'variables as X has features. %d vs %d.' %
(len(feat_type), X.shape[1]))
if feat_type is not None and not all([isinstance(f, str)
for f in feat_type]):
raise ValueError('Array feat_type must only contain strings.')
if feat_type is not None:
for ft in feat_type:
if ft.lower() not in ['categorical', 'numerical']:
raise ValueError('Only `Categorical` and `Numerical` are '
'valid feature types, you passed `%s`' % ft)
loaded_data_manager = XYDataManager(X, y,
task=task,
metric=metric,
feat_type=feat_type,
dataset_name=dataset_name,
encode_labels=False)
return self._fit(loaded_data_manager)
示例10: fit_automl_dataset
def fit_automl_dataset(self, basename, input_dir):
# == Creating a data object with data and information about it
self._basename = basename
self._stopwatch = StopWatch()
self._stopwatch.start_task(self._basename)
self._logger = get_automl_logger(self._log_dir, self._basename,
self._seed)
self._debug('======== Reading and converting data ==========')
# Encoding the labels will be done after the metafeature calculation!
loaded_data_manager = CompetitionDataManager(self._basename, input_dir,
verbose=True,
encode_labels=False)
loaded_data_manager_str = str(loaded_data_manager).split('\n')
for part in loaded_data_manager_str:
self._debug(part)
return self._fit(loaded_data_manager)
示例11: fit_automl_dataset
def fit_automl_dataset(self, dataset, metric):
self._stopwatch = StopWatch()
self._backend.save_start_time(self._seed)
name = os.path.basename(dataset)
self._stopwatch.start_task(name)
self._start_task(self._stopwatch, name)
self._dataset_name = name
self._logger = self._get_logger(name)
self._logger.debug('======== Reading and converting data ==========')
# Encoding the labels will be done after the metafeature calculation!
self._data_memory_limit = float(self._ml_memory_limit) / 3
loaded_data_manager = CompetitionDataManager(
dataset, max_memory_in_mb=self._data_memory_limit)
loaded_data_manager_str = str(loaded_data_manager).split('\n')
for part in loaded_data_manager_str:
self._logger.debug(part)
return self._fit(loaded_data_manager, metric)
示例12: fit_automl_dataset
def fit_automl_dataset(self, dataset):
self._stopwatch = StopWatch()
self._backend.save_start_time(self._seed)
name = os.path.basename(dataset)
self._stopwatch.start_task(name)
self._start_task(self._stopwatch, name)
self._dataset_name = name
logger_name = 'AutoML(%d):%s' % (self._seed, name)
setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name)))
self._logger = get_logger(logger_name)
self._logger.debug('======== Reading and converting data ==========')
# Encoding the labels will be done after the metafeature calculation!
loaded_data_manager = CompetitionDataManager(dataset,
encode_labels=False)
loaded_data_manager_str = str(loaded_data_manager).split('\n')
for part in loaded_data_manager_str:
self._logger.debug(part)
return self._fit(loaded_data_manager)
示例13: fit
def fit(self, data_x, y,
task=MULTICLASS_CLASSIFICATION,
metric='acc_metric',
feat_type=None,
dataset_name=None):
if dataset_name is None:
m = hashlib.md5()
m.update(data_x.data)
dataset_name = m.hexdigest()
self._basename = dataset_name
self._stopwatch = StopWatch()
self._stopwatch.start_task(self._basename)
loaded_data_manager = XYDataManager(data_x, y,
task=task,
metric=metric,
feat_type=feat_type,
dataset_name=dataset_name,
encode_labels=False)
return self._fit(loaded_data_manager)
示例14: main
def main(autosklearn_tmp_dir,
basename,
task_type,
metric,
limit,
output_dir,
ensemble_size=None,
ensemble_nbest=None,
seed=1,
shared_mode=False,
max_iterations=-1,
precision="32"):
watch = StopWatch()
watch.start_task('ensemble_builder')
used_time = 0
time_iter = 0
index_run = 0
num_iteration = 0
current_num_models = 0
backend = Backend(output_dir, autosklearn_tmp_dir)
dir_ensemble = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
'predictions_ensemble')
dir_valid = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
'predictions_valid')
dir_test = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
'predictions_test')
paths_ = [dir_ensemble, dir_valid, dir_test]
targets_ensemble = backend.load_targets_ensemble()
dir_ensemble_list_mtimes = []
while used_time < limit or (max_iterations > 0 and max_iterations >= num_iteration):
num_iteration += 1
logger.debug('Time left: %f', limit - used_time)
logger.debug('Time last iteration: %f', time_iter)
# Load the predictions from the models
exists = [os.path.isdir(dir_) for dir_ in paths_]
if not exists[0]: # all(exists):
logger.debug('Prediction directory %s does not exist!' %
dir_ensemble)
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
if shared_mode is False:
dir_ensemble_list = sorted(glob.glob(os.path.join(
dir_ensemble, 'predictions_ensemble_%s_*.npy' % seed)))
if exists[1]:
dir_valid_list = sorted(glob.glob(os.path.join(
dir_valid, 'predictions_valid_%s_*.npy' % seed)))
else:
dir_valid_list = []
if exists[2]:
dir_test_list = sorted(glob.glob(os.path.join(
dir_test, 'predictions_test_%s_*.npy' % seed)))
else:
dir_test_list = []
else:
dir_ensemble_list = sorted(os.listdir(dir_ensemble))
dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else []
dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else []
# Check the modification times because predictions can be updated
# over time!
old_dir_ensemble_list_mtimes = dir_ensemble_list_mtimes
dir_ensemble_list_mtimes = []
for dir_ensemble_file in dir_ensemble_list:
dir_ensemble_file = os.path.join(dir_ensemble, dir_ensemble_file)
mtime = os.path.getmtime(dir_ensemble_file)
dir_ensemble_list_mtimes.append(mtime)
if len(dir_ensemble_list) == 0:
logger.debug('Directories are empty')
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
if len(dir_ensemble_list) <= current_num_models and \
old_dir_ensemble_list_mtimes == dir_ensemble_list_mtimes:
logger.debug('Nothing has changed since the last time')
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
watch.start_task('ensemble_iter_' + str(index_run))
# List of num_runs (which are in the filename) which will be included
# later
include_num_runs = []
backup_num_runs = []
model_and_automl_re = re.compile(r'_([0-9]*)_([0-9]*)\.npy$')
if ensemble_nbest is not None:
# Keeps track of the single scores of each model in our ensemble
scores_nbest = []
#.........这里部分代码省略.........
示例15: main
def main(autosklearn_tmp_dir,
dataset_name,
task_type,
metric,
limit,
output_dir,
ensemble_size=None,
ensemble_nbest=None,
seed=1,
shared_mode=False,
max_iterations=-1,
precision="32"):
watch = StopWatch()
watch.start_task('ensemble_builder')
used_time = 0
time_iter = 0
index_run = 0
num_iteration = 0
current_num_models = 0
backend = Backend(output_dir, autosklearn_tmp_dir)
dir_ensemble = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
'predictions_ensemble')
dir_valid = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
'predictions_valid')
dir_test = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
'predictions_test')
paths_ = [dir_ensemble, dir_valid, dir_test]
dir_ensemble_list_mtimes = []
while used_time < limit or (max_iterations > 0 and max_iterations >= num_iteration):
num_iteration += 1
logger.debug('Time left: %f', limit - used_time)
logger.debug('Time last iteration: %f', time_iter)
# Reload the ensemble targets every iteration, important, because cv may
# update the ensemble targets in the cause of running auto-sklearn
# TODO update cv in order to not need this any more!
targets_ensemble = backend.load_targets_ensemble()
# Load the predictions from the models
exists = [os.path.isdir(dir_) for dir_ in paths_]
if not exists[0]: # all(exists):
logger.debug('Prediction directory %s does not exist!' %
dir_ensemble)
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
if shared_mode is False:
dir_ensemble_list = sorted(glob.glob(os.path.join(
dir_ensemble, 'predictions_ensemble_%s_*.npy' % seed)))
if exists[1]:
dir_valid_list = sorted(glob.glob(os.path.join(
dir_valid, 'predictions_valid_%s_*.npy' % seed)))
else:
dir_valid_list = []
if exists[2]:
dir_test_list = sorted(glob.glob(os.path.join(
dir_test, 'predictions_test_%s_*.npy' % seed)))
else:
dir_test_list = []
else:
dir_ensemble_list = sorted(os.listdir(dir_ensemble))
dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else []
dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else []
# Check the modification times because predictions can be updated
# over time!
old_dir_ensemble_list_mtimes = dir_ensemble_list_mtimes
dir_ensemble_list_mtimes = []
for dir_ensemble_file in dir_ensemble_list:
if dir_ensemble_file.endswith("/"):
dir_ensemble_file = dir_ensemble_file[:-1]
basename = os.path.basename(dir_ensemble_file)
dir_ensemble_file = os.path.join(dir_ensemble, basename)
mtime = os.path.getmtime(dir_ensemble_file)
dir_ensemble_list_mtimes.append(mtime)
if len(dir_ensemble_list) == 0:
logger.debug('Directories are empty')
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
if len(dir_ensemble_list) <= current_num_models and \
old_dir_ensemble_list_mtimes == dir_ensemble_list_mtimes:
logger.debug('Nothing has changed since the last time')
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue
watch.start_task('ensemble_iter_' + str(index_run))
# List of num_runs (which are in the filename) which will be included
# later
#.........这里部分代码省略.........