当前位置: 首页>>代码示例>>Python>>正文


Python experiments.run_configuration函数代码示例

本文整理汇总了Python中skll.experiments.run_configuration函数的典型用法代码示例。如果您正苦于以下问题:Python run_configuration函数的具体用法?Python run_configuration怎么用?Python run_configuration使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了run_configuration函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_folds_file_logging_num_folds

def test_folds_file_logging_num_folds():
    """
    Test when using `folds_file`, log shows number of folds and appropriate warning.
    """
    # Run experiment
    suffix = '.jsonlines'
    train_path = join(_my_dir, 'train', 'f0{}'.format(suffix))

    config_path = fill_in_config_paths_for_single_file(join(_my_dir,
                                                            "configs",
                                                            "test_folds_file"
                                                            ".template.cfg"),
                                                       train_path,
                                                       None)
    run_configuration(config_path, quiet=True)

    # Check experiment log output
    with open(join(_my_dir,
                   'output',
                   'test_folds_file_logging.log')) as f:
        cv_file_pattern = re.compile('Specifying "folds_file" overrides both explicit and default "num_cv_folds".')
        matches = re.findall(cv_file_pattern, f.read())
        assert_equal(len(matches), 1)

    # Check job log output
    with open(join(_my_dir,
                   'output',
                   'test_folds_file_logging_train_f0.'
                   'jsonlines_LogisticRegression.log')) as f:
        cv_folds_pattern = re.compile("(Task: cross_validate\n)(.+)(Cross-validating \([0-9]+ folds\))")
        matches = re.findall(cv_folds_pattern, f.read())
        assert_equal(len(matches), 1)
开发者ID:EducationalTestingService,项目名称:skll,代码行数:32,代码来源:test_cv.py

示例2: check_specified_cv_folds

def check_specified_cv_folds(numeric_ids):
    make_cv_folds_data(numeric_ids)

    # test_cv_folds1.cfg has prespecified folds and should have ~50% accuracy
    # test_cv_folds2.cfg doesn't have prespecified folds and >95% accuracy
    for experiment_name, test_func, grid_size in [('test_cv_folds1',
                                                   lambda x: x < 0.6,
                                                   3),
                                                  ('test_cv_folds2',
                                                   lambda x: x > 0.95,
                                                   10)]:
        config_template_file = '{}.template.cfg'.format(experiment_name)
        config_template_path = os.path.join(_my_dir, 'configs',
                                            config_template_file)
        config_path = os.path.join(_my_dir,
                                   fill_in_config_paths(config_template_path))

        # Modify config file to change ids_to_floats depending on numeric_ids
        # setting
        with open(config_path, 'r+') as config_template_file:
            lines = config_template_file.readlines()
            config_template_file.seek(0)
            config_template_file.truncate()
            for line in lines:
                if line.startswith('ids_to_floats='):
                    if numeric_ids:
                        line = 'ids_to_floats=true\n'
                    else:
                        line = 'ids_to_floats=false\n'
                config_template_file.write(line)

        run_configuration(config_path, quiet=True)
        result_filename = ('{}_test_cv_folds_LogisticRegression.' +
                           'results').format(experiment_name)
        with open(os.path.join(_my_dir, 'output', result_filename)) as f:
            # check held out scores
            outstr = f.read()
            score = float(SCORE_OUTPUT_RE.search(outstr).groups()[-1])
            assert test_func(score)

            grid_score_matches = GRID_RE.findall(outstr)
            assert len(grid_score_matches) == grid_size
            for match_str in grid_score_matches:
                assert test_func(float(match_str))

    # try the same tests for just training (and specifying the folds for the
    # grid search)
    dirpath = os.path.join(_my_dir, 'train')
    suffix = '.jsonlines'
    featureset = ['test_cv_folds']
    examples = _load_featureset(dirpath, featureset, suffix, quiet=True)
    clf = Learner('LogisticRegression', probability=True)
    cv_folds = _load_cv_folds(os.path.join(_my_dir, 'train',
                                           'test_cv_folds.csv'))
    grid_search_score = clf.train(examples, grid_search_folds=cv_folds,
                                  grid_objective='accuracy', grid_jobs=1)
    assert grid_search_score < 0.6
    grid_search_score = clf.train(examples, grid_search_folds=5,
                                  grid_objective='accuracy', grid_jobs=1)
    assert grid_search_score > 0.95
开发者ID:wavelets,项目名称:skll,代码行数:60,代码来源:test_skll.py

示例3: test_scaling

def test_scaling():
    '''
    Test to validate whether feature scaling works
    '''
    make_scaling_data()

    # run the experiment without scaling
    config_template_path = os.path.join(_my_dir, 'configs', 'test_scaling_without.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True)

    # now run the version with scaling
    config_template_path = os.path.join(_my_dir, 'configs', 'test_scaling_with.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True)

    # make sure that the result with and without scaling aren't the same
    with open(os.path.join(_my_dir, 'output', 'without_scaling_summary.tsv')) as f:
        reader = csv.DictReader(f, dialect=csv.excel_tab)
        row = list(reader)[0]
        without_scaling_score = row['score']
        without_scaling_scaling_value = row['feature_scaling']

    with open(os.path.join(_my_dir, 'output', 'with_scaling_summary.tsv')) as f:
        reader = csv.DictReader(f, dialect=csv.excel_tab)
        row = list(reader)[0]
        with_scaling_score = row['score']
        with_scaling_scaling_value = row['feature_scaling']

    assert_not_equal(without_scaling_score, with_scaling_score)
    eq_(without_scaling_scaling_value, 'none')
    eq_(with_scaling_scaling_value, 'both')
开发者ID:wavelets,项目名称:skll,代码行数:34,代码来源:test_skll.py

示例4: test_regression1

def test_regression1():
    '''
    This is a bit of a contrived test, but it should fail
    if anything drastic happens to the regression code.
    '''

    y = make_regression_data()

    config_template_path = os.path.join(_my_dir, 'configs', 'test_regression1.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    config_template_path = "test_regression1.cfg"

    run_configuration(os.path.join(_my_dir, config_path), quiet=True)

    with open(os.path.join(_my_dir, 'output', 'test_regression1_test_regression1_RescaledRidge.results')) as f:
        # check held out scores
        outstr = f.read()
        score = float(SCORE_OUTPUT_RE.search(outstr).groups()[-1])
        assert score > 0.7

    with open(os.path.join(_my_dir, 'output', 'test_regression1_test_regression1_RescaledRidge.predictions'), 'r') as f:
        reader = csv.reader(f, dialect='excel-tab')
        next(reader)
        pred = [float(row[1]) for row in reader]

        assert np.min(pred) >= np.min(y)
        assert np.max(pred) <= np.max(y)

        assert abs(np.mean(pred) - np.mean(y)) < 0.1
        assert abs(np.std(pred) - np.std(y)) < 0.1
开发者ID:wavelets,项目名称:skll,代码行数:31,代码来源:test_skll.py

示例5: test_ablation_cv_feature_hasher

def test_ablation_cv_feature_hasher():
    """
    Test if ablation works with cross-validate and feature_hasher
    """
    make_ablation_data()

    config_template_path = join(_my_dir, 'configs',
                                'test_ablation_feature_hasher.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True, ablation=1)

    # read in the summary file and make sure it has
    # 7 ablated featuresets * (10 folds + 1 average line) * 2 learners = 154
    # lines
    with open(join(_my_dir, 'output',
                   'ablation_cv_feature_hasher_summary.tsv')) as f:
        reader = csv.DictReader(f, dialect=csv.excel_tab)
        num_rows = check_ablation_rows(reader)
        eq_(num_rows, 154)

    # make sure there are 6 ablated featuresets * 2 learners = 12 results files
    num_result_files = len(glob.glob(join(_my_dir, 'output',
                                          ('ablation_cv_feature_hasher_'
                                           '*.results'))))
    eq_(num_result_files, 14)
开发者ID:BK-University,项目名称:skll,代码行数:26,代码来源:test_ablation.py

示例6: test_learning_curve_output

def test_learning_curve_output():
    """
    Test learning curve output for experiment with metrics option
    """

    # Test to validate learning curve output
    make_learning_curve_data()

    config_template_path = join(_my_dir, 'configs', 'test_learning_curve.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    # run the learning curve experiment
    run_configuration(config_path, quiet=True)
    outprefix = 'test_learning_curve'

    # make sure that the TSV file is created with the right columns
    output_tsv_path = join(_my_dir, 'output', '{}_summary.tsv'.format(outprefix))
    ok_(exists(output_tsv_path))
    with open(output_tsv_path, 'r') as tsvf:
        r = csv.reader(tsvf, dialect=csv.excel_tab)
        header = next(r)
        # make sure we have the expected number of columns
        eq_(len(header), 11)
        num_rows = len(list(r))
        # we should have 2 featuresets x 3 learners x 2 objectives x 5 (default)
        # training sizes = 60 rows
        eq_(num_rows, 60)

    # make sure that the two PNG files (one per featureset) are created
    # if the requirements are satisfied
    if _HAVE_SEABORN:
        for featureset_name in ["test_learning_curve1", "test_learning_curve2"]:
            ok_(exists(join(_my_dir,
                            'output',
                            '{}_{}.png'.format(outprefix, featureset_name))))
开发者ID:EducationalTestingService,项目名称:skll,代码行数:35,代码来源:test_output.py

示例7: test_train_file_test_file_ablation

def test_train_file_test_file_ablation():
    """
    Test that specifying ablation with train and test file is ignored
    """
    # Create data files
    make_single_file_featureset_data()

    # Run experiment
    config_path = fill_in_config_paths_for_single_file(join(_my_dir, "configs",
                                                            "test_single_file"
                                                            ".template.cfg"),
                                                       join(_my_dir, 'train',
                                                            'train_single_file'
                                                            '.jsonlines'),
                                                       join(_my_dir, 'test',
                                                            'test_single_file.'
                                                            'jsonlines'))
    run_configuration(config_path, quiet=True, ablation=None)

    # check that we see the message that ablation was ignored in the experiment log
    # Check experiment log output
    with open(join(_my_dir,
                   'output',
                   'train_test_single_file.log')) as f:
        cv_file_pattern = re.compile('Not enough featuresets for ablation. Ignoring.')
        matches = re.findall(cv_file_pattern, f.read())
        eq_(len(matches), 1)
开发者ID:EducationalTestingService,项目名称:skll,代码行数:27,代码来源:test_classification.py

示例8: test_ablation_cv_feature_hasher_all_combos_sampler

def test_ablation_cv_feature_hasher_all_combos_sampler():
    """
    Test to validate whether ablation works with cross-validate
    and feature_hasher
    """
    make_ablation_data()

    config_template_path = join(_my_dir, 'configs', ('test_ablation_feature_'
                                                     'hasher_sampler.template'
                                                     '.cfg'))
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True, ablation=None)

    # read in the summary file and make sure it has
    # 10 ablated featuresets * (10 folds + 1 average line) * 2 learners = 220
    # lines
    with open(join(_my_dir, 'output',
                   'ablation_cv_feature_hasher_summary.tsv')) as f:
        reader = csv.DictReader(f, dialect=csv.excel_tab)
        num_rows = check_ablation_rows(reader)
        eq_(num_rows, 220)

    # make sure there are 10 ablated featuresets * 2 learners = 20 results
    # files
    num_result_files = len(glob.glob(join(_my_dir, 'output',
                                          ('ablation_cv_feature_hasher_'
                                           '*results'))))
    eq_(num_result_files, 20)
开发者ID:BK-University,项目名称:skll,代码行数:29,代码来源:test_ablation.py

示例9: test_predict_on_subset_with_existing_model

def test_predict_on_subset_with_existing_model():
    """
    Test generating predictions on subset with existing model
    """
    # Create data files
    make_single_file_featureset_data()

    # train and save a model on the training file
    train_fs = NDJReader.for_path(join(_my_dir, 'train', 'train_single_file.jsonlines')).read()
    learner = Learner('RandomForestClassifier')
    learner.train(train_fs, grid_search=True, grid_objective="accuracy")
    model_filename = join(_my_dir, 'output', ('train_test_single_file_train_train_'
                                              'single_file.jsonlines_test_test_single'
                                              '_file_subset.jsonlines_RandomForestClassifier'
                                              '.model'))

    learner.save(model_filename)

    # Run experiment
    config_path = fill_in_config_paths_for_single_file(join(_my_dir, "configs",
                                                            "test_single_file_saved_subset"
                                                            ".template.cfg"),
                                                       join(_my_dir, 'train', 'train_single_file.jsonlines'),
                                                       join(_my_dir, 'test',
                                                            'test_single_file_subset.'
                                                            'jsonlines'))
    run_configuration(config_path, quiet=True, overwrite=False)

    # Check results
    with open(join(_my_dir, 'output', ('train_test_single_file_train_train_'
                                       'single_file.jsonlines_test_test_single'
                                       '_file_subset.jsonlines_RandomForestClassifier'
                                       '.results.json'))) as f:
        result_dict = json.load(f)[0]
    assert_almost_equal(result_dict['accuracy'], 0.7333333)
开发者ID:EducationalTestingService,项目名称:skll,代码行数:35,代码来源:test_classification.py

示例10: test_ablation_cv_feature_hasher_all_combos

def test_ablation_cv_feature_hasher_all_combos():
    """
    Test ablation all-combos + cross-validation + feature hashing
    """

    config_template_path = join(_my_dir,
                                'configs',
                                'test_ablation_feature_hasher_all_combos.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True, ablation=None)

    # read in the summary file and make sure it has
    #    10 ablated featuresets
    #      * (10 folds + 1 average line)
    #      * 2 learners
    #    = 220 lines in total
    with open(join(_my_dir,
                   'output',
                   'ablation_cv_feature_hasher_all_combos_summary.tsv')) as f:
        reader = csv.DictReader(f, dialect=csv.excel_tab)
        num_rows = check_ablation_rows(reader)
        eq_(num_rows, 220)

    # make sure there are 10 ablated featuresets * 2 learners = 20 results
    # files
    num_result_files = len(glob(join(_my_dir,
                                     'output',
                                     'ablation_cv_feature_hasher_all_combos*.results')))
    eq_(num_result_files, 20)
开发者ID:EducationalTestingService,项目名称:skll,代码行数:30,代码来源:test_ablation.py

示例11: test_train_file_test_file

def test_train_file_test_file():
    """
    Test that train_file and test_file experiments work
    """
    # Create data files
    make_single_file_featureset_data()

    # Run experiment
    config_path = fill_in_config_paths_for_single_file(join(_my_dir, "configs",
                                                            "test_single_file"
                                                            ".template.cfg"),
                                                       join(_my_dir, 'train',
                                                            'train_single_file'
                                                            '.jsonlines'),
                                                       join(_my_dir, 'test',
                                                            'test_single_file.'
                                                            'jsonlines'))
    run_configuration(config_path, quiet=True)

    # Check results
    with open(join(_my_dir, 'output', ('train_test_single_file_train_train_'
                                       'single_file.jsonlines_test_test_single'
                                       '_file.jsonlines_RandomForestClassifier'
                                       '.results.json'))) as f:
        result_dict = json.load(f)[0]

    assert_almost_equal(result_dict['score'], 0.925)
开发者ID:MechCoder,项目名称:skll,代码行数:27,代码来源:test_classification.py

示例12: test_custom_learner_model_loading

def test_custom_learner_model_loading():
    num_labels = 10

    class_weights = [(0.5 / (num_labels - 1))
                     for x in range(num_labels - 1)] + [0.5]
    train_fs, test_fs = make_classification_data(num_examples=600,
                                                 train_test_ratio=0.8,
                                                 num_labels=num_labels,
                                                 num_features=5,
                                                 non_negative=True,
                                                 class_weights=class_weights)

    # Write training feature set to a file
    train_path = join(_my_dir, 'train',
                      'test_model_custom_learner.jsonlines')
    writer = NDJWriter(train_path, train_fs)
    writer.write()

    # Write test feature set to a file
    test_path = join(_my_dir, 'test',
                     'test_model_custom_learner.jsonlines')
    writer = NDJWriter(test_path, test_fs)
    writer.write()

    # run the configuration that trains the custom model and saves it
    cfgfile = 'test_model_save_custom_learner.template.cfg'
    config_template_path = join(_my_dir, 'configs', cfgfile)
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True)

    # save the predictions from disk into memory
    # and delete the predictions file
    outprefix = 'test_model_custom_learner'
    pred_file = join(_my_dir, 'output',
                     '{}_{}_CustomLogisticRegressionWrapper'
                     '.predictions'.format(outprefix,
                                           outprefix))
    preds1 = read_predictions(pred_file)
    os.unlink(pred_file)

    # run the configuration that loads the saved model
    # and generates the predictions again
    cfgfile = 'test_model_load_custom_learner.template.cfg'
    config_template_path = join(_my_dir, 'configs', cfgfile)
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, overwrite=False, quiet=True)

    # load the newly generated predictions
    preds2 = read_predictions(pred_file)

    # make sure that they are the same as before
    assert_array_equal(preds1, preds2)
开发者ID:BK-University,项目名称:skll,代码行数:54,代码来源:test_custom_learner.py

示例13: train_rst_parsing_model

def train_rst_parsing_model(working_path, model_path, parameter_settings):
    '''
    parameter_settings is a dict of scikit-learn hyperparameter settings
    '''

    C_value = parameter_settings['C']
    working_subdir = os.path.join(working_path, 'C{}'.format(C_value))
    assert not os.path.exists(working_subdir)
    os.makedirs(working_subdir)

    if not os.path.exists(model_path):
        os.makedirs(model_path)

    learner_name = 'LogisticRegression'
    fixed_parameters = [{'random_state': 123456789, 'penalty': 'l1',
                         'C': C_value}]

    # Make the SKLL config file.
    cfg_dict = {"General": {"task": "train",
                            "experiment_name": "rst_parsing"},
                "Input": {"train_location": working_path,
                          "ids_to_floats": "False",
                          "featuresets": json.dumps([["rst_parsing"]]),
                          "featureset_names": json.dumps(["all_feats"]),
                          "suffix": '.jsonlines',
                          "fixed_parameters": json.dumps(fixed_parameters),
                          "learners": json.dumps([learner_name])},
                "Tuning": {"feature_scaling": "none",
                           "grid_search": "False",
                           "min_feature_count": "1"},
                "Output": {"probability": "True",
                           "models": model_path,
                           "log": working_subdir}
               }

    # write config file
    cfg_path = os.path.join(working_subdir, 'rst_parsing.cfg')
    cfg = ConfigParser()
    for section_name, section_dict in list(cfg_dict.items()):
        cfg.add_section(section_name)
        for key, val in section_dict.items():
            cfg.set(section_name, key, val)

    assert not os.path.exists(cfg_path)
    with open(cfg_path, 'w') as config_file:
        cfg.write(config_file)

    # run SKLL
    run_configuration(cfg_path)

    # make the model smaller/faster
    minimize_model(model_path,
                   'rst_parsing_all_feats_LogisticRegression.model')
开发者ID:BinbinBian,项目名称:discourse-parsing,代码行数:53,代码来源:tune_rst_parser.py

示例14: test_class_map

def test_class_map():
    make_class_map_data()

    config_template_path = os.path.join(_my_dir, 'configs', 'test_class_map.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True)

    with open(os.path.join(_my_dir, 'output', 'test_class_map_test_class_map_LogisticRegression.results')) as f:
        outstr = f.read()
        logistic_result_score = float(SCORE_OUTPUT_RE.search(outstr).groups()[0])

    assert_almost_equal(logistic_result_score, 0.5)
开发者ID:wavelets,项目名称:skll,代码行数:13,代码来源:test_skll.py

示例15: test_sparse_predict

def test_sparse_predict():
    '''
    Test to validate whether predict works with sparse data
    '''
    make_sparse_data()

    config_template_path = os.path.join(_my_dir, 'configs', 'test_sparse.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True)

    with open(os.path.join(_my_dir, 'output', 'test_sparse_test_sparse_LogisticRegression.results')) as f:
        outstr = f.read()
        logistic_result_score = float(SCORE_OUTPUT_RE.search(outstr).groups()[0])

    assert_almost_equal(logistic_result_score, 0.5)
开发者ID:wavelets,项目名称:skll,代码行数:16,代码来源:test_skll.py


注:本文中的skll.experiments.run_configuration函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。