當前位置: 首頁>>代碼示例>>Python>>正文


Python StopWatch.start_task方法代碼示例

本文整理匯總了Python中autosklearn.util.StopWatch.start_task方法的典型用法代碼示例。如果您正苦於以下問題:Python StopWatch.start_task方法的具體用法?Python StopWatch.start_task怎麽用?Python StopWatch.start_task使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在autosklearn.util.StopWatch的用法示例。


在下文中一共展示了StopWatch.start_task方法的12個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: test_stopwatch_overhead

# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
    def test_stopwatch_overhead(self):

        # Wall Overhead
        start = time.time()
        cpu_start = time.clock()
        watch = StopWatch()
        for i in range(1, 1000):
            watch.start_task('task_%d' % i)
            watch.stop_task('task_%d' % i)
        cpu_stop = time.clock()
        stop = time.time()
        dur = stop - start
        cpu_dur = cpu_stop - cpu_start
        cpu_overhead = cpu_dur - watch.cpu_sum()
        wall_overhead = dur - watch.wall_sum()

        self.assertLess(cpu_overhead, 1)
        self.assertLess(wall_overhead, 1)
        self.assertLess(watch.cpu_sum(), 2 * watch.wall_sum())
開發者ID:Bryan-LL,項目名稱:auto-sklearn,代碼行數:21,代碼來源:test_StopWatch.py

示例2: test_stopwatch_overhead

# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
    def test_stopwatch_overhead(self):
        # CPU overhead
        start = time.clock()
        watch = StopWatch()
        for i in range(1, 100000):
            watch.start_task("task_%d" % i)
            watch.stop_task("task_%d" % i)
        stop = time.clock()
        dur = stop - start
        cpu_overhead = dur - watch.cpu_sum()
        self.assertLess(cpu_overhead, 1.5)

        # Wall Overhead
        start = time.time()
        watch = StopWatch()
        for i in range(1, 100000):
            watch.start_task("task_%d" % i)
            watch.stop_task("task_%d" % i)
        stop = time.time()
        dur = stop - start
        wall_overhead = dur - watch.wall_sum()

        self.assertLess(wall_overhead, 2)
        self.assertLess(cpu_overhead, wall_overhead)
開發者ID:ixtel,項目名稱:auto-sklearn,代碼行數:26,代碼來源:test_StopWatch.py

示例3: main

# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
    def main(self):

        watch = StopWatch()
        watch.start_task('ensemble_builder')

        used_time = 0
        time_iter = 0
        index_run = 0
        num_iteration = 0
        current_num_models = 0
        last_hash = None
        current_hash = None

        backend = Backend(self.output_dir, self.autosklearn_tmp_dir)
        dir_ensemble = os.path.join(self.autosklearn_tmp_dir,
                                    '.auto-sklearn',
                                    'predictions_ensemble')
        dir_valid = os.path.join(self.autosklearn_tmp_dir,
                                 '.auto-sklearn',
                                 'predictions_valid')
        dir_test = os.path.join(self.autosklearn_tmp_dir,
                                '.auto-sklearn',
                                'predictions_test')
        paths_ = [dir_ensemble, dir_valid, dir_test]

        dir_ensemble_list_mtimes = []

        self.logger.debug('Starting main loop with %f seconds and %d iterations '
                          'left.' % (self.limit - used_time, num_iteration))
        while used_time < self.limit or (self.max_iterations > 0 and
                                         self.max_iterations >= num_iteration):
            num_iteration += 1
            self.logger.debug('Time left: %f', self.limit - used_time)
            self.logger.debug('Time last ensemble building: %f', time_iter)

            # Reload the ensemble targets every iteration, important, because cv may
            # update the ensemble targets in the cause of running auto-sklearn
            # TODO update cv in order to not need this any more!
            targets_ensemble = backend.load_targets_ensemble()

            # Load the predictions from the models
            exists = [os.path.isdir(dir_) for dir_ in paths_]
            if not exists[0]:  # all(exists):
                self.logger.debug('Prediction directory %s does not exist!' %
                              dir_ensemble)
                time.sleep(2)
                used_time = watch.wall_elapsed('ensemble_builder')
                continue

            if self.shared_mode is False:
                dir_ensemble_list = sorted(glob.glob(os.path.join(
                    dir_ensemble, 'predictions_ensemble_%s_*.npy' % self.seed)))
                if exists[1]:
                    dir_valid_list = sorted(glob.glob(os.path.join(
                        dir_valid, 'predictions_valid_%s_*.npy' % self.seed)))
                else:
                    dir_valid_list = []
                if exists[2]:
                    dir_test_list = sorted(glob.glob(os.path.join(
                        dir_test, 'predictions_test_%s_*.npy' % self.seed)))
                else:
                    dir_test_list = []
            else:
                dir_ensemble_list = sorted(os.listdir(dir_ensemble))
                dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else []
                dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else []

            # Check the modification times because predictions can be updated
            # over time!
            old_dir_ensemble_list_mtimes = dir_ensemble_list_mtimes
            dir_ensemble_list_mtimes = []

            for dir_ensemble_file in dir_ensemble_list:
                if dir_ensemble_file.endswith("/"):
                    dir_ensemble_file = dir_ensemble_file[:-1]
                basename = os.path.basename(dir_ensemble_file)
                dir_ensemble_file = os.path.join(dir_ensemble, basename)
                mtime = os.path.getmtime(dir_ensemble_file)
                dir_ensemble_list_mtimes.append(mtime)

            if len(dir_ensemble_list) == 0:
                self.logger.debug('Directories are empty')
                time.sleep(2)
                used_time = watch.wall_elapsed('ensemble_builder')
                continue

            if len(dir_ensemble_list) <= current_num_models and \
                    old_dir_ensemble_list_mtimes == dir_ensemble_list_mtimes:
                self.logger.debug('Nothing has changed since the last time')
                time.sleep(2)
                used_time = watch.wall_elapsed('ensemble_builder')
                continue

            watch.start_task('index_run' + str(index_run))
            watch.start_task('ensemble_iter_' + str(num_iteration))

            # List of num_runs (which are in the filename) which will be included
            #  later
            include_num_runs = []
            backup_num_runs = []
#.........這裏部分代碼省略.........
開發者ID:Hanshan1988,項目名稱:auto-sklearn,代碼行數:103,代碼來源:ensemble_builder.py

示例4: AutoML

# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
class AutoML(BaseEstimator, multiprocessing.Process):

    def __init__(self,
                 tmp_dir,
                 output_dir,
                 time_left_for_this_task,
                 per_run_time_limit,
                 log_dir=None,
                 initial_configurations_via_metalearning=25,
                 ensemble_size=1,
                 ensemble_nbest=1,
                 seed=1,
                 ml_memory_limit=3000,
                 metadata_directory=None,
                 queue=None,
                 keep_models=True,
                 debug_mode=False,
                 include_estimators=None,
                 include_preprocessors=None,
                 resampling_strategy='holdout-iterative-fit',
                 resampling_strategy_arguments=None,
                 delete_tmp_folder_after_terminate=False,
                 delete_output_folder_after_terminate=False,
                 shared_mode=False,
                 precision=32):
        super(AutoML, self).__init__()

        self._tmp_dir = tmp_dir
        self._output_dir = output_dir
        self._time_for_task = time_left_for_this_task
        self._per_run_time_limit = per_run_time_limit
        self._log_dir = log_dir if log_dir is not None else self._tmp_dir
        self._initial_configurations_via_metalearning = \
            initial_configurations_via_metalearning
        self._ensemble_size = ensemble_size
        self._ensemble_nbest = ensemble_nbest
        self._seed = seed
        self._ml_memory_limit = ml_memory_limit
        self._metadata_directory = metadata_directory
        self._queue = queue
        self._keep_models = keep_models
        self._include_estimators = include_estimators
        self._include_preprocessors = include_preprocessors
        self._resampling_strategy = resampling_strategy
        self._resampling_strategy_arguments = resampling_strategy_arguments
        self.delete_tmp_folder_after_terminate = \
            delete_tmp_folder_after_terminate
        self.delete_output_folder_after_terminate = \
            delete_output_folder_after_terminate
        self._shared_mode = shared_mode
        self.precision = precision

        self._datamanager = None
        self._dataset_name = None
        self._stopwatch = StopWatch()
        self._logger = None
        self._task = None
        self._metric = None
        self._label_num = None
        self.models_ = None
        self.ensemble_indices_ = None

        self._debug_mode = debug_mode
        self._backend = Backend(self._output_dir, self._tmp_dir)

    def start_automl(self, parser):
        self._backend.save_start_time(self._seed)
        self._stopwatch = StopWatch()
        datamanager = get_data_manager(namespace=parser)
        self._stopwatch.start_task(datamanager.name)

        logger_name = 'AutoML(%d):%s' % (self._seed, datamanager.name)
        setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name)))
        self._logger = get_logger(logger_name)

        self._datamanager = datamanager
        self._dataset_name = datamanager.name
        self.start()

    def start(self):
        if self._datamanager is None:
            raise ValueError('You must invoke start() only via start_automl()')
        super(AutoML, self).start()

    def run(self):
        if self._datamanager is None:
            raise ValueError('You must invoke run() only via start_automl()')
        self._fit(self._datamanager)

    def fit(self, X, y,
            task=MULTICLASS_CLASSIFICATION,
            metric='acc_metric',
            feat_type=None,
            dataset_name=None):
        if dataset_name is None:
            m = hashlib.md5()
            m.update(X.data)
            dataset_name = m.hexdigest()

        self._backend.save_start_time(self._seed)
#.........這裏部分代碼省略.........
開發者ID:stokasto,項目名稱:auto-sklearn,代碼行數:103,代碼來源:automl.py

示例5: main

# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
def main(autosklearn_tmp_dir,
         basename,
         task_type,
         metric,
         limit,
         output_dir,
         ensemble_size=None,
         ensemble_nbest=None,
         seed=1,
         shared_mode=False,
         max_iterations=-1,
         precision="32"):

    watch = StopWatch()
    watch.start_task('ensemble_builder')

    used_time = 0
    time_iter = 0
    index_run = 0
    num_iteration = 0
    current_num_models = 0

    backend = Backend(output_dir, autosklearn_tmp_dir)
    dir_ensemble = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
                                'predictions_ensemble')
    dir_valid = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
                             'predictions_valid')
    dir_test = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
                            'predictions_test')
    paths_ = [dir_ensemble, dir_valid, dir_test]

    targets_ensemble = backend.load_targets_ensemble()

    dir_ensemble_list_mtimes = []

    while used_time < limit or (max_iterations > 0 and max_iterations >= num_iteration):
        num_iteration += 1
        logger.debug('Time left: %f', limit - used_time)
        logger.debug('Time last iteration: %f', time_iter)

        # Load the predictions from the models
        exists = [os.path.isdir(dir_) for dir_ in paths_]
        if not exists[0]:  # all(exists):
            logger.debug('Prediction directory %s does not exist!' %
                          dir_ensemble)
            time.sleep(2)
            used_time = watch.wall_elapsed('ensemble_builder')
            continue

        if shared_mode is False:
            dir_ensemble_list = sorted(glob.glob(os.path.join(
                dir_ensemble, 'predictions_ensemble_%s_*.npy' % seed)))
            if exists[1]:
                dir_valid_list = sorted(glob.glob(os.path.join(
                    dir_valid, 'predictions_valid_%s_*.npy' % seed)))
            else:
                dir_valid_list = []
            if exists[2]:
                dir_test_list = sorted(glob.glob(os.path.join(
                    dir_test, 'predictions_test_%s_*.npy' % seed)))
            else:
                dir_test_list = []
        else:
            dir_ensemble_list = sorted(os.listdir(dir_ensemble))
            dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else []
            dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else []

        # Check the modification times because predictions can be updated
        # over time!
        old_dir_ensemble_list_mtimes = dir_ensemble_list_mtimes
        dir_ensemble_list_mtimes = []

        for dir_ensemble_file in dir_ensemble_list:
            dir_ensemble_file = os.path.join(dir_ensemble, dir_ensemble_file)
            mtime = os.path.getmtime(dir_ensemble_file)
            dir_ensemble_list_mtimes.append(mtime)

        if len(dir_ensemble_list) == 0:
            logger.debug('Directories are empty')
            time.sleep(2)
            used_time = watch.wall_elapsed('ensemble_builder')
            continue

        if len(dir_ensemble_list) <= current_num_models and \
                old_dir_ensemble_list_mtimes == dir_ensemble_list_mtimes:
            logger.debug('Nothing has changed since the last time')
            time.sleep(2)
            used_time = watch.wall_elapsed('ensemble_builder')
            continue

        watch.start_task('ensemble_iter_' + str(index_run))

        # List of num_runs (which are in the filename) which will be included
        #  later
        include_num_runs = []
        backup_num_runs = []
        model_and_automl_re = re.compile(r'_([0-9]*)_([0-9]*)\.npy$')
        if ensemble_nbest is not None:
            # Keeps track of the single scores of each model in our ensemble
            scores_nbest = []
#.........這裏部分代碼省略.........
開發者ID:ixtel,項目名稱:auto-sklearn,代碼行數:103,代碼來源:ensemble_selection_script.py

示例6: main

# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
def main(autosklearn_tmp_dir,
         dataset_name,
         task_type,
         metric,
         limit,
         output_dir,
         ensemble_size=None,
         ensemble_nbest=None,
         seed=1,
         shared_mode=False,
         max_iterations=-1,
         precision="32"):

    watch = StopWatch()
    watch.start_task('ensemble_builder')

    used_time = 0
    time_iter = 0
    index_run = 0
    num_iteration = 0
    current_num_models = 0

    backend = Backend(output_dir, autosklearn_tmp_dir)
    dir_ensemble = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
                                'predictions_ensemble')
    dir_valid = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
                             'predictions_valid')
    dir_test = os.path.join(autosklearn_tmp_dir, '.auto-sklearn',
                            'predictions_test')
    paths_ = [dir_ensemble, dir_valid, dir_test]

    dir_ensemble_list_mtimes = []

    while used_time < limit or (max_iterations > 0 and max_iterations >= num_iteration):
        num_iteration += 1
        logger.debug('Time left: %f', limit - used_time)
        logger.debug('Time last iteration: %f', time_iter)

        # Reload the ensemble targets every iteration, important, because cv may
        # update the ensemble targets in the cause of running auto-sklearn
        # TODO update cv in order to not need this any more!
        targets_ensemble = backend.load_targets_ensemble()

        # Load the predictions from the models
        exists = [os.path.isdir(dir_) for dir_ in paths_]
        if not exists[0]:  # all(exists):
            logger.debug('Prediction directory %s does not exist!' %
                          dir_ensemble)
            time.sleep(2)
            used_time = watch.wall_elapsed('ensemble_builder')
            continue

        if shared_mode is False:
            dir_ensemble_list = sorted(glob.glob(os.path.join(
                dir_ensemble, 'predictions_ensemble_%s_*.npy' % seed)))
            if exists[1]:
                dir_valid_list = sorted(glob.glob(os.path.join(
                    dir_valid, 'predictions_valid_%s_*.npy' % seed)))
            else:
                dir_valid_list = []
            if exists[2]:
                dir_test_list = sorted(glob.glob(os.path.join(
                    dir_test, 'predictions_test_%s_*.npy' % seed)))
            else:
                dir_test_list = []
        else:
            dir_ensemble_list = sorted(os.listdir(dir_ensemble))
            dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else []
            dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else []

        # Check the modification times because predictions can be updated
        # over time!
        old_dir_ensemble_list_mtimes = dir_ensemble_list_mtimes
        dir_ensemble_list_mtimes = []

        for dir_ensemble_file in dir_ensemble_list:
            if dir_ensemble_file.endswith("/"):
                dir_ensemble_file = dir_ensemble_file[:-1]
            basename = os.path.basename(dir_ensemble_file)
            dir_ensemble_file = os.path.join(dir_ensemble, basename)
            mtime = os.path.getmtime(dir_ensemble_file)
            dir_ensemble_list_mtimes.append(mtime)

        if len(dir_ensemble_list) == 0:
            logger.debug('Directories are empty')
            time.sleep(2)
            used_time = watch.wall_elapsed('ensemble_builder')
            continue

        if len(dir_ensemble_list) <= current_num_models and \
                old_dir_ensemble_list_mtimes == dir_ensemble_list_mtimes:
            logger.debug('Nothing has changed since the last time')
            time.sleep(2)
            used_time = watch.wall_elapsed('ensemble_builder')
            continue

        watch.start_task('ensemble_iter_' + str(index_run))

        # List of num_runs (which are in the filename) which will be included
        #  later
#.........這裏部分代碼省略.........
開發者ID:Allen1203,項目名稱:auto-sklearn,代碼行數:103,代碼來源:ensemble_selection_script.py

示例7: main

# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
def main(logger, predictions_dir, basename, task_type, metric, limit, output_dir,
         ensemble_size=None):
    watch = StopWatch()
    watch.start_task('ensemble_builder')


    used_time = 0
    time_iter = 0
    index_run = 0
    current_num_models = 0

    while used_time < limit:
        logger.debug('Time left: %f' % (limit - used_time))
        logger.debug('Time last iteration: %f' % time_iter)
        # Load the true labels of the validation data
        true_labels = np.load(os.path.join(predictions_dir,
                                           'true_labels_ensemble.npy'))

        # Load the predictions from the models
        all_predictions_train = []
        dir_ensemble = os.path.join(predictions_dir, 'predictions_ensemble/')
        dir_valid = os.path.join(predictions_dir, 'predictions_valid/')
        dir_test = os.path.join(predictions_dir, 'predictions_test/')

        if not os.path.isdir(dir_ensemble) or not os.path.isdir(dir_valid) or \
                not os.path.isdir(dir_test):
            logger.debug('Prediction directory does not exist')
            time.sleep(2)
            used_time = watch.wall_elapsed('ensemble_builder')
            continue

        dir_ensemble_list = sorted(os.listdir(dir_ensemble))
        dir_valid_list = sorted(os.listdir(dir_valid))
        dir_test_list = sorted(os.listdir(dir_test))

        if check_data(logger, len(dir_ensemble_list), len(dir_valid_list),
                      len(dir_test_list), current_num_models):
            time.sleep(2)
            used_time = watch.wall_elapsed('ensemble_builder')
            continue

        watch.start_task('ensemble_iter_' + str(index_run))

        # Binary mask where True indicates that the corresponding will be
        # excluded from the ensemble
        exclude_mask = []
        if ensemble_size is not None:
            # Keeps track of the single scores of each model in our ensemble
            scores_nbest = []
            # The indices of the model that are currently in our ensemble
            indices_nbest = []

        model_idx = 0
        for f in dir_ensemble_list:
            predictions = np.load(os.path.join(dir_ensemble, f))
            score = calculate_score(true_labels, predictions,
                                              task_type, metric,
                                              predictions.shape[1])

            if ensemble_size is not None:
                if score <= 0.001:
                    exclude_mask.append(True)
                    logger.error('Model only predicts at random: ' + f +
                                  ' has score: ' + str(score))
                # If we have less model in our ensemble than ensemble_size add
                # the current model if it is better than random
                elif len(scores_nbest) < ensemble_size:
                    scores_nbest.append(score)
                    indices_nbest.append(model_idx)
                    exclude_mask.append(False)
                else:
                    # Take the worst performing model in our ensemble so far
                    idx = np.argmin(np.array([scores_nbest]))

                    # If the current model is better than the worst model in
                    # our ensemble replace it by the current model
                    if scores_nbest[idx] < score:
                        logger.debug(
                            'Worst model in our ensemble: %d with score %f will be replaced by model %d with score %f'
                            % (idx, scores_nbest[idx], model_idx, score))
                        scores_nbest[idx] = score
                        # Exclude the old model
                        exclude_mask[int(indices_nbest[idx])] = True
                        indices_nbest[idx] = model_idx
                        exclude_mask.append(False)
                    # Otherwise exclude the current model from the ensemble
                    else:
                        exclude_mask.append(True)

            else:
                # Load all predictions that are better than random
                if score <= 0.001:
                    exclude_mask.append(True)
                    logger.error('Model only predicts at random: ' + f +
                                  ' has score: ' + str(score))
                else:
                    exclude_mask.append(False)
                    all_predictions_train.append(predictions)

            model_idx += 1
#.........這裏部分代碼省略.........
開發者ID:WarmongeR1,項目名稱:auto-sklearn,代碼行數:103,代碼來源:ensemble_script.py

示例8: AutoML

# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
class AutoML(multiprocessing.Process, BaseEstimator):

    def __init__(self,
                 tmp_dir,
                 output_dir,
                 time_left_for_this_task,
                 per_run_time_limit,
                 log_dir=None,
                 initial_configurations_via_metalearning=25,
                 ensemble_size=1,
                 ensemble_nbest=1,
                 seed=1,
                 ml_memory_limit=3000,
                 metadata_directory=None,
                 queue=None,
                 keep_models=True,
                 debug_mode=False,
                 logger=None):
        super(AutoML, self).__init__()
        self._seed = seed
        self._tmp_dir = tmp_dir
        self._output_dir = output_dir
        self._model_dir = join(self._tmp_dir, 'models_%d' % self._seed)
        self._ensemble_indices_dir = join(self._tmp_dir,
                                          'ensemble_indices_%d' % self._seed)
        self._create_folder(self._tmp_dir, to_log=False)

        self._time_for_task = time_left_for_this_task
        self._per_run_time_limit = per_run_time_limit
        self._log_dir = log_dir if log_dir is not None else self._tmp_dir
        self._initial_configurations_via_metalearning = initial_configurations_via_metalearning
        self._ensemble_size = ensemble_size
        self._ensemble_nbest = ensemble_nbest

        self._ml_memory_limit = ml_memory_limit
        self._metadata_directory = metadata_directory
        self._queue = queue
        self._keep_models = keep_models

        self._basename = None
        self._stopwatch = None
        self._logger = logger if logger is not None else get_automl_logger(
            self._log_dir, self._basename, self._seed)
        self._ohe = None
        self._task = None
        self._metric = None
        self._target_num = None

        self._debug_mode = debug_mode
        self._create_folders()

    def _create_folder(self, folder, to_log=True):
        if to_log:
            self._debug("CREATE folder: %s" % folder)
        else:
            print("CREATE folder: %s" % folder)
        if os.path.isdir(folder):
            if not self._debug_mode:
                raise OSError("Folder '%s' exists" % folder)
        else:
            os.mkdir(folder)

    def _create_folders(self):
        # == Set up a directory where all the trained models will be pickled to

        self._create_folder(self._output_dir)
        if self._log_dir != self._tmp_dir:
            self._create_folder(self._log_dir)
        self._create_folder(self._model_dir)
        self._create_folder(self._ensemble_indices_dir)

    def run(self):
        raise NotImplementedError()

    def fit(self, data_x, y,
            task=MULTICLASS_CLASSIFICATION,
            metric='acc_metric',
            feat_type=None,
            dataset_name=None):
        if dataset_name is None:
            m = hashlib.md5()
            m.update(data_x.data)
            dataset_name = m.hexdigest()

        self._basename = dataset_name

        self._stopwatch = StopWatch()
        self._stopwatch.start_task(self._basename)

        loaded_data_manager = XYDataManager(data_x, y,
                                            task=task,
                                            metric=metric,
                                            feat_type=feat_type,
                                            dataset_name=dataset_name,
                                            encode_labels=False)

        return self._fit(loaded_data_manager)

    def _debug(self, text):
        self._logger.debug(text)
#.........這裏部分代碼省略.........
開發者ID:WarmongeR1,項目名稱:auto-sklearn,代碼行數:103,代碼來源:automl.py

示例9: AutoML

# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
class AutoML(BaseEstimator, multiprocessing.Process):

    def __init__(self,
                 tmp_dir,
                 output_dir,
                 time_left_for_this_task,
                 per_run_time_limit,
                 log_dir=None,
                 initial_configurations_via_metalearning=25,
                 ensemble_size=1,
                 ensemble_nbest=1,
                 seed=1,
                 ml_memory_limit=3000,
                 metadata_directory=None,
                 queue=None,
                 keep_models=True,
                 debug_mode=False,
                 include_estimators=None,
                 include_preprocessors=None,
                 resampling_strategy='holdout-iterative-fit',
                 resampling_strategy_arguments=None,
                 delete_tmp_folder_after_terminate=False,
                 delete_output_folder_after_terminate=False,
                 shared_mode=False,
                 precision=32,
                 max_iter_smac=None,
                 acquisition_function='EI'):
        super(AutoML, self).__init__()

        self._tmp_dir = tmp_dir
        self._output_dir = output_dir
        self._time_for_task = time_left_for_this_task
        self._per_run_time_limit = per_run_time_limit
        self._log_dir = log_dir if log_dir is not None else self._tmp_dir
        self._initial_configurations_via_metalearning = \
            initial_configurations_via_metalearning
        self._ensemble_size = ensemble_size
        self._ensemble_nbest = ensemble_nbest
        self._seed = seed
        self._ml_memory_limit = ml_memory_limit
        self._data_memory_limit = None
        self._metadata_directory = metadata_directory
        self._queue = queue
        self._keep_models = keep_models
        self._include_estimators = include_estimators
        self._include_preprocessors = include_preprocessors
        self._resampling_strategy = resampling_strategy
        self._resampling_strategy_arguments = resampling_strategy_arguments
        self._max_iter_smac = max_iter_smac
        self.delete_tmp_folder_after_terminate = \
            delete_tmp_folder_after_terminate
        self.delete_output_folder_after_terminate = \
            delete_output_folder_after_terminate
        self._shared_mode = shared_mode
        self.precision = precision
        self.acquisition_function = acquisition_function

        self._datamanager = None
        self._dataset_name = None
        self._stopwatch = StopWatch()
        self._logger = None
        self._task = None
        self._metric = None
        self._label_num = None
        self._parser = None
        self.models_ = None
        self.ensemble_ = None
        self._can_predict = False

        self._debug_mode = debug_mode

        if not isinstance(self._time_for_task, int):
            raise ValueError("time_left_for_this_task not of type integer, "
                             "but %s" % str(type(self._time_for_task)))
        if not isinstance(self._per_run_time_limit, int):
            raise ValueError("per_run_time_limit not of type integer, but %s" %
                             str(type(self._per_run_time_limit)))

        # After assignging and checking variables...
        self._backend = Backend(self._output_dir, self._tmp_dir)

    def start_automl(self, parser):
        self._parser = parser
        self.start()

    def start(self):
        if self._parser is None:
            raise ValueError('You must invoke start() only via start_automl()')
        super(AutoML, self).start()

    def run(self):
        if self._parser is None:
            raise ValueError('You must invoke run() only via start_automl()')
        self._backend.save_start_time(self._seed)
        self._stopwatch = StopWatch()
        datamanager = get_data_manager(namespace=self._parser)
        self._stopwatch.start_task(datamanager.name)

        self._logger = self._get_logger(datamanager.name)

#.........這裏部分代碼省略.........
開發者ID:Ayaro,項目名稱:auto-sklearn,代碼行數:103,代碼來源:automl.py

示例10: main

# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
def main(logger,
         predictions_dir,
         basename,
         task_type,
         metric,
         limit,
         output_dir,
         ensemble_size=None,
         seed=1,
         indices_output_dir='.'):

    watch = StopWatch()
    watch.start_task('ensemble_builder')

    task_type = STRING_TO_TASK_TYPES[task_type]

    used_time = 0
    time_iter = 0
    index_run = 0
    current_num_models = 0

    dir_ensemble = join(predictions_dir, 'predictions_ensemble_%s/' % seed)
    dir_valid = join(predictions_dir, 'predictions_valid_%s/' % seed)
    dir_test = join(predictions_dir, 'predictions_test_%s/' % seed)
    paths_ = [dir_ensemble, dir_valid, dir_test]

    tru_labels_path = join(predictions_dir, 'true_labels_ensemble.npy')

    while used_time < limit:
        logger.debug('Time left: %f', limit - used_time)
        logger.debug('Time last iteration: %f', time_iter)
        # Load the true labels of the validation data
        true_labels = np.load(tru_labels_path)

        # Load the predictions from the models
        exists = [os.path.isdir(dir_) for dir_ in paths_]
        if not exists[0]:  # all(exists):
            logger.debug('Prediction directory %s does not exist!' %
                          dir_ensemble)
            time.sleep(2)
            used_time = watch.wall_elapsed('ensemble_builder')
            continue

        dir_ensemble_list = sorted(os.listdir(dir_ensemble))
        dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else []
        dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else []

        if check_data(logger, len(dir_ensemble_list), current_num_models):
            time.sleep(2)
            used_time = watch.wall_elapsed('ensemble_builder')
            continue

        watch.start_task('ensemble_iter_' + str(index_run))

        # List of num_runs (which are in the filename) which will be included
        #  later
        include_num_runs = []
        re_num_run = re.compile(r'_([0-9]*)\.npy$')
        if ensemble_size is not None:
            # Keeps track of the single scores of each model in our ensemble
            scores_nbest = []
            # The indices of the model that are currently in our ensemble
            indices_nbest = []
            # The names of the models
            model_names = []
            # The num run of the models
            num_runs = []

        model_names_to_scores = dict()

        model_idx = 0
        for model_name in dir_ensemble_list:
            predictions = np.load(os.path.join(dir_ensemble, model_name))
            score = calculate_score(true_labels, predictions,
                                              task_type, metric,
                                              predictions.shape[1])
            model_names_to_scores[model_name] = score
            num_run = int(re_num_run.search(model_name).group(1))

            if ensemble_size is not None:
                if score <= 0.001:
                    # include_num_runs.append(True)
                    logger.error('Model only predicts at random: ' +
                                  model_name + ' has score: ' + str(score))
                # If we have less models in our ensemble than ensemble_size add
                # the current model if it is better than random
                elif len(scores_nbest) < ensemble_size:
                    scores_nbest.append(score)
                    indices_nbest.append(model_idx)
                    include_num_runs.append(num_run)
                    model_names.append(model_name)
                    num_runs.append(num_run)
                else:
                    # Take the worst performing model in our ensemble so far
                    idx = np.argmin(np.array([scores_nbest]))

                    # If the current model is better than the worst model in
                    # our ensemble replace it by the current model
                    if scores_nbest[idx] < score:
                        logger.debug('Worst model in our ensemble: %s with '
#.........這裏部分代碼省略.........
開發者ID:WarmongeR1,項目名稱:auto-sklearn,代碼行數:103,代碼來源:ensemble_selection_script.py

示例11: AutoML

# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]
class AutoML(BaseEstimator):

    def __init__(self,
                 backend,
                 time_left_for_this_task,
                 per_run_time_limit,
                 log_dir=None,
                 initial_configurations_via_metalearning=25,
                 ensemble_size=1,
                 ensemble_nbest=1,
                 seed=1,
                 ml_memory_limit=3000,
                 metadata_directory=None,
                 keep_models=True,
                 debug_mode=False,
                 include_estimators=None,
                 include_preprocessors=None,
                 resampling_strategy='holdout-iterative-fit',
                 resampling_strategy_arguments=None,
                 delete_tmp_folder_after_terminate=False,
                 delete_output_folder_after_terminate=False,
                 shared_mode=False,
                 precision=32,
                 max_iter_smac=None,
                 acquisition_function='EI'):
        super(AutoML, self).__init__()
        self._backend = backend
        #self._tmp_dir = tmp_dir
        #self._output_dir = output_dir
        self._time_for_task = time_left_for_this_task
        self._per_run_time_limit = per_run_time_limit
        #self._log_dir = log_dir if log_dir is not None else self._tmp_dir
        self._initial_configurations_via_metalearning = \
            initial_configurations_via_metalearning
        self._ensemble_size = ensemble_size
        self._ensemble_nbest = ensemble_nbest
        self._seed = seed
        self._ml_memory_limit = ml_memory_limit
        self._data_memory_limit = None
        self._metadata_directory = metadata_directory
        self._keep_models = keep_models
        self._include_estimators = include_estimators
        self._include_preprocessors = include_preprocessors
        self._resampling_strategy = resampling_strategy
        self._resampling_strategy_arguments = resampling_strategy_arguments \
            if resampling_strategy_arguments is not None else {}
        self._max_iter_smac = max_iter_smac
        #self.delete_tmp_folder_after_terminate = \
        #    delete_tmp_folder_after_terminate
        #self.delete_output_folder_after_terminate = \
        #    delete_output_folder_after_terminate
        self._shared_mode = shared_mode
        self.precision = precision
        self.acquisition_function = acquisition_function

        self._datamanager = None
        self._dataset_name = None
        self._stopwatch = StopWatch()
        self._logger = None
        self._task = None
        self._metric = None
        self._label_num = None
        self._parser = None
        self.models_ = None
        self.ensemble_ = None
        self._can_predict = False

        self._debug_mode = debug_mode

        if not isinstance(self._time_for_task, int):
            raise ValueError("time_left_for_this_task not of type integer, "
                             "but %s" % str(type(self._time_for_task)))
        if not isinstance(self._per_run_time_limit, int):
            raise ValueError("per_run_time_limit not of type integer, but %s" %
                             str(type(self._per_run_time_limit)))

        # After assignging and checking variables...
        #self._backend = Backend(self._output_dir, self._tmp_dir)

    def start_automl(self, parser):
        self._parser = parser
        self.start()

    def start(self):
        if self._parser is None:
            raise ValueError('You must invoke start() only via start_automl()')
        super(AutoML, self).start()

    def run(self):
        if self._parser is None:
            raise ValueError('You must invoke run() only via start_automl()')
        self._backend.save_start_time(self._seed)
        self._stopwatch = StopWatch()
        datamanager = get_data_manager(namespace=self._parser)
        self._stopwatch.start_task(datamanager.name)

        self._logger = self._get_logger(datamanager.name)

        self._datamanager = datamanager
        self._dataset_name = datamanager.name
#.........這裏部分代碼省略.........
開發者ID:automl,項目名稱:auto-sklearn,代碼行數:103,代碼來源:automl.py

示例12: AutoML

# 需要導入模塊: from autosklearn.util import StopWatch [as 別名]
# 或者: from autosklearn.util.StopWatch import start_task [as 別名]

#.........這裏部分代碼省略.........
                             "but %s" % str(type(self._time_for_task)))
        if not isinstance(self._per_run_time_limit, int):
            raise ValueError("per_run_time_limit not of type integer, but %s" %
                             str(type(self._per_run_time_limit)))

        # After assignging and checking variables...
        #self._backend = Backend(self._output_dir, self._tmp_dir)

    def fit(
        self, X, y,
        task,
        metric,
        X_test=None,
        y_test=None,
        feat_type=None,
        dataset_name=None,
        only_return_configuration_space=False,
    ):
        if self._shared_mode:
            # If this fails, it's likely that this is the first call to get
            # the data manager
            try:
                D = self._backend.load_datamanager()
                dataset_name = D.name
            except IOError:
                pass

        if dataset_name is None:
            dataset_name = hash_array_or_matrix(X)

        self._backend.save_start_time(self._seed)
        self._stopwatch = StopWatch()
        self._dataset_name = dataset_name
        self._stopwatch.start_task(self._dataset_name)

        self._logger = self._get_logger(dataset_name)

        if metric is None:
            raise ValueError('No metric given.')
        if not isinstance(metric, Scorer):
            raise ValueError('Metric must be instance of '
                             'autosklearn.metrics.Scorer.')

        if feat_type is not None and len(feat_type) != X.shape[1]:
            raise ValueError('Array feat_type does not have same number of '
                             'variables as X has features. %d vs %d.' %
                             (len(feat_type), X.shape[1]))
        if feat_type is not None and not all([isinstance(f, str)
                                              for f in feat_type]):
            raise ValueError('Array feat_type must only contain strings.')
        if feat_type is not None:
            for ft in feat_type:
                if ft.lower() not in ['categorical', 'numerical']:
                    raise ValueError('Only `Categorical` and `Numerical` are '
                                     'valid feature types, you passed `%s`' % ft)

        self._data_memory_limit = None
        loaded_data_manager = XYDataManager(
            X, y,
            X_test=X_test,
            y_test=y_test,
            task=task,
            feat_type=feat_type,
            dataset_name=dataset_name,
        )
開發者ID:Bryan-LL,項目名稱:auto-sklearn,代碼行數:69,代碼來源:automl.py


注:本文中的autosklearn.util.StopWatch.start_task方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。