本文整理汇总了Python中skll.experiments.run_configuration函数的典型用法代码示例。如果您正苦于以下问题:Python run_configuration函数的具体用法?Python run_configuration怎么用?Python run_configuration使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了run_configuration函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_folds_file_logging_num_folds
def test_folds_file_logging_num_folds():
"""
Test when using `folds_file`, log shows number of folds and appropriate warning.
"""
# Run experiment
suffix = '.jsonlines'
train_path = join(_my_dir, 'train', 'f0{}'.format(suffix))
config_path = fill_in_config_paths_for_single_file(join(_my_dir,
"configs",
"test_folds_file"
".template.cfg"),
train_path,
None)
run_configuration(config_path, quiet=True)
# Check experiment log output
with open(join(_my_dir,
'output',
'test_folds_file_logging.log')) as f:
cv_file_pattern = re.compile('Specifying "folds_file" overrides both explicit and default "num_cv_folds".')
matches = re.findall(cv_file_pattern, f.read())
assert_equal(len(matches), 1)
# Check job log output
with open(join(_my_dir,
'output',
'test_folds_file_logging_train_f0.'
'jsonlines_LogisticRegression.log')) as f:
cv_folds_pattern = re.compile("(Task: cross_validate\n)(.+)(Cross-validating \([0-9]+ folds\))")
matches = re.findall(cv_folds_pattern, f.read())
assert_equal(len(matches), 1)
示例2: check_specified_cv_folds
def check_specified_cv_folds(numeric_ids):
make_cv_folds_data(numeric_ids)
# test_cv_folds1.cfg has prespecified folds and should have ~50% accuracy
# test_cv_folds2.cfg doesn't have prespecified folds and >95% accuracy
for experiment_name, test_func, grid_size in [('test_cv_folds1',
lambda x: x < 0.6,
3),
('test_cv_folds2',
lambda x: x > 0.95,
10)]:
config_template_file = '{}.template.cfg'.format(experiment_name)
config_template_path = os.path.join(_my_dir, 'configs',
config_template_file)
config_path = os.path.join(_my_dir,
fill_in_config_paths(config_template_path))
# Modify config file to change ids_to_floats depending on numeric_ids
# setting
with open(config_path, 'r+') as config_template_file:
lines = config_template_file.readlines()
config_template_file.seek(0)
config_template_file.truncate()
for line in lines:
if line.startswith('ids_to_floats='):
if numeric_ids:
line = 'ids_to_floats=true\n'
else:
line = 'ids_to_floats=false\n'
config_template_file.write(line)
run_configuration(config_path, quiet=True)
result_filename = ('{}_test_cv_folds_LogisticRegression.' +
'results').format(experiment_name)
with open(os.path.join(_my_dir, 'output', result_filename)) as f:
# check held out scores
outstr = f.read()
score = float(SCORE_OUTPUT_RE.search(outstr).groups()[-1])
assert test_func(score)
grid_score_matches = GRID_RE.findall(outstr)
assert len(grid_score_matches) == grid_size
for match_str in grid_score_matches:
assert test_func(float(match_str))
# try the same tests for just training (and specifying the folds for the
# grid search)
dirpath = os.path.join(_my_dir, 'train')
suffix = '.jsonlines'
featureset = ['test_cv_folds']
examples = _load_featureset(dirpath, featureset, suffix, quiet=True)
clf = Learner('LogisticRegression', probability=True)
cv_folds = _load_cv_folds(os.path.join(_my_dir, 'train',
'test_cv_folds.csv'))
grid_search_score = clf.train(examples, grid_search_folds=cv_folds,
grid_objective='accuracy', grid_jobs=1)
assert grid_search_score < 0.6
grid_search_score = clf.train(examples, grid_search_folds=5,
grid_objective='accuracy', grid_jobs=1)
assert grid_search_score > 0.95
示例3: test_scaling
def test_scaling():
'''
Test to validate whether feature scaling works
'''
make_scaling_data()
# run the experiment without scaling
config_template_path = os.path.join(_my_dir, 'configs', 'test_scaling_without.template.cfg')
config_path = fill_in_config_paths(config_template_path)
run_configuration(config_path, quiet=True)
# now run the version with scaling
config_template_path = os.path.join(_my_dir, 'configs', 'test_scaling_with.template.cfg')
config_path = fill_in_config_paths(config_template_path)
run_configuration(config_path, quiet=True)
# make sure that the result with and without scaling aren't the same
with open(os.path.join(_my_dir, 'output', 'without_scaling_summary.tsv')) as f:
reader = csv.DictReader(f, dialect=csv.excel_tab)
row = list(reader)[0]
without_scaling_score = row['score']
without_scaling_scaling_value = row['feature_scaling']
with open(os.path.join(_my_dir, 'output', 'with_scaling_summary.tsv')) as f:
reader = csv.DictReader(f, dialect=csv.excel_tab)
row = list(reader)[0]
with_scaling_score = row['score']
with_scaling_scaling_value = row['feature_scaling']
assert_not_equal(without_scaling_score, with_scaling_score)
eq_(without_scaling_scaling_value, 'none')
eq_(with_scaling_scaling_value, 'both')
示例4: test_regression1
def test_regression1():
'''
This is a bit of a contrived test, but it should fail
if anything drastic happens to the regression code.
'''
y = make_regression_data()
config_template_path = os.path.join(_my_dir, 'configs', 'test_regression1.template.cfg')
config_path = fill_in_config_paths(config_template_path)
config_template_path = "test_regression1.cfg"
run_configuration(os.path.join(_my_dir, config_path), quiet=True)
with open(os.path.join(_my_dir, 'output', 'test_regression1_test_regression1_RescaledRidge.results')) as f:
# check held out scores
outstr = f.read()
score = float(SCORE_OUTPUT_RE.search(outstr).groups()[-1])
assert score > 0.7
with open(os.path.join(_my_dir, 'output', 'test_regression1_test_regression1_RescaledRidge.predictions'), 'r') as f:
reader = csv.reader(f, dialect='excel-tab')
next(reader)
pred = [float(row[1]) for row in reader]
assert np.min(pred) >= np.min(y)
assert np.max(pred) <= np.max(y)
assert abs(np.mean(pred) - np.mean(y)) < 0.1
assert abs(np.std(pred) - np.std(y)) < 0.1
示例5: test_ablation_cv_feature_hasher
def test_ablation_cv_feature_hasher():
"""
Test if ablation works with cross-validate and feature_hasher
"""
make_ablation_data()
config_template_path = join(_my_dir, 'configs',
'test_ablation_feature_hasher.template.cfg')
config_path = fill_in_config_paths(config_template_path)
run_configuration(config_path, quiet=True, ablation=1)
# read in the summary file and make sure it has
# 7 ablated featuresets * (10 folds + 1 average line) * 2 learners = 154
# lines
with open(join(_my_dir, 'output',
'ablation_cv_feature_hasher_summary.tsv')) as f:
reader = csv.DictReader(f, dialect=csv.excel_tab)
num_rows = check_ablation_rows(reader)
eq_(num_rows, 154)
# make sure there are 6 ablated featuresets * 2 learners = 12 results files
num_result_files = len(glob.glob(join(_my_dir, 'output',
('ablation_cv_feature_hasher_'
'*.results'))))
eq_(num_result_files, 14)
示例6: test_learning_curve_output
def test_learning_curve_output():
"""
Test learning curve output for experiment with metrics option
"""
# Test to validate learning curve output
make_learning_curve_data()
config_template_path = join(_my_dir, 'configs', 'test_learning_curve.template.cfg')
config_path = fill_in_config_paths(config_template_path)
# run the learning curve experiment
run_configuration(config_path, quiet=True)
outprefix = 'test_learning_curve'
# make sure that the TSV file is created with the right columns
output_tsv_path = join(_my_dir, 'output', '{}_summary.tsv'.format(outprefix))
ok_(exists(output_tsv_path))
with open(output_tsv_path, 'r') as tsvf:
r = csv.reader(tsvf, dialect=csv.excel_tab)
header = next(r)
# make sure we have the expected number of columns
eq_(len(header), 11)
num_rows = len(list(r))
# we should have 2 featuresets x 3 learners x 2 objectives x 5 (default)
# training sizes = 60 rows
eq_(num_rows, 60)
# make sure that the two PNG files (one per featureset) are created
# if the requirements are satisfied
if _HAVE_SEABORN:
for featureset_name in ["test_learning_curve1", "test_learning_curve2"]:
ok_(exists(join(_my_dir,
'output',
'{}_{}.png'.format(outprefix, featureset_name))))
示例7: test_train_file_test_file_ablation
def test_train_file_test_file_ablation():
"""
Test that specifying ablation with train and test file is ignored
"""
# Create data files
make_single_file_featureset_data()
# Run experiment
config_path = fill_in_config_paths_for_single_file(join(_my_dir, "configs",
"test_single_file"
".template.cfg"),
join(_my_dir, 'train',
'train_single_file'
'.jsonlines'),
join(_my_dir, 'test',
'test_single_file.'
'jsonlines'))
run_configuration(config_path, quiet=True, ablation=None)
# check that we see the message that ablation was ignored in the experiment log
# Check experiment log output
with open(join(_my_dir,
'output',
'train_test_single_file.log')) as f:
cv_file_pattern = re.compile('Not enough featuresets for ablation. Ignoring.')
matches = re.findall(cv_file_pattern, f.read())
eq_(len(matches), 1)
示例8: test_ablation_cv_feature_hasher_all_combos_sampler
def test_ablation_cv_feature_hasher_all_combos_sampler():
"""
Test to validate whether ablation works with cross-validate
and feature_hasher
"""
make_ablation_data()
config_template_path = join(_my_dir, 'configs', ('test_ablation_feature_'
'hasher_sampler.template'
'.cfg'))
config_path = fill_in_config_paths(config_template_path)
run_configuration(config_path, quiet=True, ablation=None)
# read in the summary file and make sure it has
# 10 ablated featuresets * (10 folds + 1 average line) * 2 learners = 220
# lines
with open(join(_my_dir, 'output',
'ablation_cv_feature_hasher_summary.tsv')) as f:
reader = csv.DictReader(f, dialect=csv.excel_tab)
num_rows = check_ablation_rows(reader)
eq_(num_rows, 220)
# make sure there are 10 ablated featuresets * 2 learners = 20 results
# files
num_result_files = len(glob.glob(join(_my_dir, 'output',
('ablation_cv_feature_hasher_'
'*results'))))
eq_(num_result_files, 20)
示例9: test_predict_on_subset_with_existing_model
def test_predict_on_subset_with_existing_model():
"""
Test generating predictions on subset with existing model
"""
# Create data files
make_single_file_featureset_data()
# train and save a model on the training file
train_fs = NDJReader.for_path(join(_my_dir, 'train', 'train_single_file.jsonlines')).read()
learner = Learner('RandomForestClassifier')
learner.train(train_fs, grid_search=True, grid_objective="accuracy")
model_filename = join(_my_dir, 'output', ('train_test_single_file_train_train_'
'single_file.jsonlines_test_test_single'
'_file_subset.jsonlines_RandomForestClassifier'
'.model'))
learner.save(model_filename)
# Run experiment
config_path = fill_in_config_paths_for_single_file(join(_my_dir, "configs",
"test_single_file_saved_subset"
".template.cfg"),
join(_my_dir, 'train', 'train_single_file.jsonlines'),
join(_my_dir, 'test',
'test_single_file_subset.'
'jsonlines'))
run_configuration(config_path, quiet=True, overwrite=False)
# Check results
with open(join(_my_dir, 'output', ('train_test_single_file_train_train_'
'single_file.jsonlines_test_test_single'
'_file_subset.jsonlines_RandomForestClassifier'
'.results.json'))) as f:
result_dict = json.load(f)[0]
assert_almost_equal(result_dict['accuracy'], 0.7333333)
示例10: test_ablation_cv_feature_hasher_all_combos
def test_ablation_cv_feature_hasher_all_combos():
"""
Test ablation all-combos + cross-validation + feature hashing
"""
config_template_path = join(_my_dir,
'configs',
'test_ablation_feature_hasher_all_combos.template.cfg')
config_path = fill_in_config_paths(config_template_path)
run_configuration(config_path, quiet=True, ablation=None)
# read in the summary file and make sure it has
# 10 ablated featuresets
# * (10 folds + 1 average line)
# * 2 learners
# = 220 lines in total
with open(join(_my_dir,
'output',
'ablation_cv_feature_hasher_all_combos_summary.tsv')) as f:
reader = csv.DictReader(f, dialect=csv.excel_tab)
num_rows = check_ablation_rows(reader)
eq_(num_rows, 220)
# make sure there are 10 ablated featuresets * 2 learners = 20 results
# files
num_result_files = len(glob(join(_my_dir,
'output',
'ablation_cv_feature_hasher_all_combos*.results')))
eq_(num_result_files, 20)
示例11: test_train_file_test_file
def test_train_file_test_file():
"""
Test that train_file and test_file experiments work
"""
# Create data files
make_single_file_featureset_data()
# Run experiment
config_path = fill_in_config_paths_for_single_file(join(_my_dir, "configs",
"test_single_file"
".template.cfg"),
join(_my_dir, 'train',
'train_single_file'
'.jsonlines'),
join(_my_dir, 'test',
'test_single_file.'
'jsonlines'))
run_configuration(config_path, quiet=True)
# Check results
with open(join(_my_dir, 'output', ('train_test_single_file_train_train_'
'single_file.jsonlines_test_test_single'
'_file.jsonlines_RandomForestClassifier'
'.results.json'))) as f:
result_dict = json.load(f)[0]
assert_almost_equal(result_dict['score'], 0.925)
示例12: test_custom_learner_model_loading
def test_custom_learner_model_loading():
num_labels = 10
class_weights = [(0.5 / (num_labels - 1))
for x in range(num_labels - 1)] + [0.5]
train_fs, test_fs = make_classification_data(num_examples=600,
train_test_ratio=0.8,
num_labels=num_labels,
num_features=5,
non_negative=True,
class_weights=class_weights)
# Write training feature set to a file
train_path = join(_my_dir, 'train',
'test_model_custom_learner.jsonlines')
writer = NDJWriter(train_path, train_fs)
writer.write()
# Write test feature set to a file
test_path = join(_my_dir, 'test',
'test_model_custom_learner.jsonlines')
writer = NDJWriter(test_path, test_fs)
writer.write()
# run the configuration that trains the custom model and saves it
cfgfile = 'test_model_save_custom_learner.template.cfg'
config_template_path = join(_my_dir, 'configs', cfgfile)
config_path = fill_in_config_paths(config_template_path)
run_configuration(config_path, quiet=True)
# save the predictions from disk into memory
# and delete the predictions file
outprefix = 'test_model_custom_learner'
pred_file = join(_my_dir, 'output',
'{}_{}_CustomLogisticRegressionWrapper'
'.predictions'.format(outprefix,
outprefix))
preds1 = read_predictions(pred_file)
os.unlink(pred_file)
# run the configuration that loads the saved model
# and generates the predictions again
cfgfile = 'test_model_load_custom_learner.template.cfg'
config_template_path = join(_my_dir, 'configs', cfgfile)
config_path = fill_in_config_paths(config_template_path)
run_configuration(config_path, overwrite=False, quiet=True)
# load the newly generated predictions
preds2 = read_predictions(pred_file)
# make sure that they are the same as before
assert_array_equal(preds1, preds2)
示例13: train_rst_parsing_model
def train_rst_parsing_model(working_path, model_path, parameter_settings):
'''
parameter_settings is a dict of scikit-learn hyperparameter settings
'''
C_value = parameter_settings['C']
working_subdir = os.path.join(working_path, 'C{}'.format(C_value))
assert not os.path.exists(working_subdir)
os.makedirs(working_subdir)
if not os.path.exists(model_path):
os.makedirs(model_path)
learner_name = 'LogisticRegression'
fixed_parameters = [{'random_state': 123456789, 'penalty': 'l1',
'C': C_value}]
# Make the SKLL config file.
cfg_dict = {"General": {"task": "train",
"experiment_name": "rst_parsing"},
"Input": {"train_location": working_path,
"ids_to_floats": "False",
"featuresets": json.dumps([["rst_parsing"]]),
"featureset_names": json.dumps(["all_feats"]),
"suffix": '.jsonlines',
"fixed_parameters": json.dumps(fixed_parameters),
"learners": json.dumps([learner_name])},
"Tuning": {"feature_scaling": "none",
"grid_search": "False",
"min_feature_count": "1"},
"Output": {"probability": "True",
"models": model_path,
"log": working_subdir}
}
# write config file
cfg_path = os.path.join(working_subdir, 'rst_parsing.cfg')
cfg = ConfigParser()
for section_name, section_dict in list(cfg_dict.items()):
cfg.add_section(section_name)
for key, val in section_dict.items():
cfg.set(section_name, key, val)
assert not os.path.exists(cfg_path)
with open(cfg_path, 'w') as config_file:
cfg.write(config_file)
# run SKLL
run_configuration(cfg_path)
# make the model smaller/faster
minimize_model(model_path,
'rst_parsing_all_feats_LogisticRegression.model')
示例14: test_class_map
def test_class_map():
make_class_map_data()
config_template_path = os.path.join(_my_dir, 'configs', 'test_class_map.template.cfg')
config_path = fill_in_config_paths(config_template_path)
run_configuration(config_path, quiet=True)
with open(os.path.join(_my_dir, 'output', 'test_class_map_test_class_map_LogisticRegression.results')) as f:
outstr = f.read()
logistic_result_score = float(SCORE_OUTPUT_RE.search(outstr).groups()[0])
assert_almost_equal(logistic_result_score, 0.5)
示例15: test_sparse_predict
def test_sparse_predict():
'''
Test to validate whether predict works with sparse data
'''
make_sparse_data()
config_template_path = os.path.join(_my_dir, 'configs', 'test_sparse.template.cfg')
config_path = fill_in_config_paths(config_template_path)
run_configuration(config_path, quiet=True)
with open(os.path.join(_my_dir, 'output', 'test_sparse_test_sparse_LogisticRegression.results')) as f:
outstr = f.read()
logistic_result_score = float(SCORE_OUTPUT_RE.search(outstr).groups()[0])
assert_almost_equal(logistic_result_score, 0.5)