当前位置: 首页>>代码示例>>Python>>正文


Python window_based_tagger_config.get_config函数代码示例

本文整理汇总了Python中window_based_tagger_config.get_config函数的典型用法代码示例。如果您正苦于以下问题:Python get_config函数的具体用法?Python get_config怎么用?Python get_config使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了get_config函数的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

    def __init__(self, models_folder, essays_folder, spell_check_dict):

        logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
        if not models_folder.endswith("/"):
            models_folder += "/"
        if not essays_folder.endswith("/"):
            essays_folder += "/"

        self.logger = logging.getLogger()
        cfg = get_config(essays_folder)
        self.config = cfg
        self.essays_folder = essays_folder

        # Create spell checker
        # Need annotations here purely to load the tags
        tagged_essays = load_bratt_essays(essays_folder, include_vague=cfg["include_vague"], include_normal=cfg["include_normal"], load_annotations=True)
        self.__set_tags_(tagged_essays)
        self.wd_sent_freq = defaultdict(int)
        self.spelling_corrector = build_spelling_corrector(tagged_essays, self.config["lower_case"], self.wd_sent_freq, folder=spell_check_dict)

        # has to be an int as used in slices. In python 3.x this will automatically be a float
        offset = int((self.config["window_size"] - 1) / 2)

        unigram_window_stemmed = fact_extract_positional_word_features_stemmed(offset)
        biigram_window_stemmed = fact_extract_ngram_features_stemmed(offset, 2)

        extractors = [unigram_window_stemmed, biigram_window_stemmed]

        # most params below exist ONLY for the purposes of the hashing to and from disk
        self.feature_extractor = FeatureExtractorTransformer(extractors)

        # load models
        self.logger.info("Loading pickled models")
        store = ModelStore(models_folder=models_folder)

        self.feature_transformer =  store.get_transformer()
        self.logger.info("Loaded Transformer")
        self.tag_2_wd_classifier = store.get_tag_2_wd_classifier()
        self.logger.info("Loaded word tagging model")
        self.tag_2_sent_classifier = store.get_tag_2_sent_classifier()
        self.logger.info("Loaded sentence classifier")
开发者ID:simonhughes22,项目名称:PythonNlpResearch,代码行数:41,代码来源:annotator.py

示例2: get_config

CV_FOLDS            = 5

MIN_TAG_FREQ        = 5
LOOK_BACK           = 0     # how many sentences to look back when predicting tags
# end not hashed

# construct unique key using settings for pickling

settings = Settings.Settings()

root_folder = settings.data_directory + "CoralBleaching/Thesis_Dataset/"
folder =                            root_folder + "Training/"
processed_essay_filename_prefix =   root_folder + "Pickled/essays_proc_pickled_"
features_filename_prefix =          root_folder + "Pickled/feats_pickled_"

config = get_config(folder)

""" Load Essays """
mem_process_essays = memoize_to_disk(filename_prefix=processed_essay_filename_prefix)(load_process_essays)
tagged_essays = mem_process_essays( **config )
logger.info("Essays loaded")
""" End load Essays """

def evaluate_window_size(config, window_size, features_filename_prefix):

    config["window_size"] = window_size

    """ FEATURE EXTRACTION """
    offset = (config["window_size"] - 1) / 2

    unigram_window = fact_extract_positional_word_features(offset)
开发者ID:simonhughes22,项目名称:PythonNlpResearch,代码行数:31,代码来源:feature_selection.py

示例3: open

root_folder = settings.data_directory + "SkinCancer/Thesis_Dataset/"

coref_root = root_folder + "CoReference/"
coref_output_folder = coref_root + "CRel/"

train_fname = coref_output_folder + "training_crel_anatagged_essays_most_recent_code.dill"
with open(train_fname, "rb") as f:
    pred_tagged_essays_train = dill.load(f)

test_fname = coref_output_folder + "test_crel_anatagged_essays_most_recent_code.dill"
with open(test_fname, "rb") as f:
    pred_tagged_essays_test = dill.load(f)

# Get Test Data In Order to Get Test CRELS
# load the test essays to make sure we compute metrics over the test CR labels
config = get_config(coref_output_folder)
results_processor = ResultsProcessor(dbname="metrics_causal_model")
########################################################

logger.info("Started at: " + str(datetime.datetime.now()))
logger.info("Number of pred tagged essays %i" % len(pred_tagged_essays_train))  # should be 902

cr_tags = get_cr_tags(train_tagged_essays=pred_tagged_essays_train, tag_essays_test=pred_tagged_essays_test)
cv_folds  = [(pred_tagged_essays_train, pred_tagged_essays_test)]  # type: List[Tuple[Any,Any]]

def evaluate_model(
        collection_prefix: str,
        folds: List[Tuple[Any, Any]],
        extractor_fn_names_lst: List[str],
        cost_function_name: str,
        beta: float,
开发者ID:simonhughes22,项目名称:PythonNlpResearch,代码行数:31,代码来源:train_causal_relation_extractor_hyperparam_tuning_logreg_test.py

示例4: get_config

from featureextractortransformer import FeatureExtractorTransformer
from load_data import load_process_essays

from featureextractionfunctions import *
from window_based_tagger_config import get_config

import cPickle as pickle
import logging

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
logger = logging.getLogger()

# not hashed as don't affect persistence of feature processing

config = get_config(data)

""" FEATURE EXTRACTION """
offset = (config["window_size"] - 1) / 2

unigram_window_stemmed = fact_extract_positional_word_features_stemmed(offset)
biigram_window_stemmed = fact_extract_ngram_features_stemmed(offset, 2)

extractors = [unigram_window_stemmed, biigram_window_stemmed]
feat_config = dict(config.items() + [("extractors", extractors)])

""" LOAD DATA """
tagged_essays = load_process_essays( **config )
logger.info("Essays loaded")
# most params below exist ONLY for the purposes of the hashing to and from disk
开发者ID:simonhughes22,项目名称:PythonNlpResearch,代码行数:29,代码来源:CreateTrainingData.py

示例5: Settings

# Data Set Partition
CV_FOLDS = 5
MIN_FEAT_FREQ = 5

# Global settings

settings = Settings()
root_folder = settings.data_directory + "CoralBleaching/Thesis_Dataset/"
training_folder = root_folder + "Training" + "/"
test_folder = root_folder + "Test" + "/"
training_pickled = settings.data_directory + "CoralBleaching/Thesis_Dataset/training.pl"
# NOTE: These predictions are generated from the "./notebooks/SEARN/Keras - Train Tagger and Save CV Predictions For Word Tags.ipynb" notebook
# used as inputs to parsing model
rnn_predictions_folder = root_folder + "Predictions/Bi-LSTM-4-SEARN/"

config = get_config(training_folder)
results_processor = ResultsProcessor(dbname="metrics_causal")

# Get Test Data In Order to Get Test CRELS
# load the test essays to make sure we compute metrics over the test CR labels
test_config = get_config(test_folder)
tagged_essays_test = load_process_essays(**test_config)
########################################################

fname = rnn_predictions_folder + "essays_train_bi_directional-True_hidden_size-256_merge_mode-sum_num_rnns-2_use_pretrained_embedding-True.dill"
with open(fname, "rb") as f:
    pred_tagged_essays = dill.load(f)

logger.info("Started at: " + str(datetime.datetime.now()))
logger.info("Number of pred tagged essays %i" % len(pred_tagged_essays))  # should be 902
开发者ID:simonhughes22,项目名称:PythonNlpResearch,代码行数:30,代码来源:train_causal_relation_extractor_feature_selection_logreg.py

示例6: get_config

MIN_FEAT_FREQ       = 5        # 5 best so far
CV_FOLDS            = 5

MIN_TAG_FREQ        = 5
LOOK_BACK           = 0     # how many sentences to look back when predicting tags
# end not hashed

# construct unique key using settings for pickling

settings = Settings.Settings()

root_folder = settings.data_directory + "SkinCancer/Thesis_Dataset/"
training_folder                     = root_folder + "Training/"
test_folder                         = root_folder + "Test/"

train_config = get_config(training_folder)

""" FEATURE EXTRACTION """
train_config["window_size"] = 9
offset = (train_config["window_size"] - 1) / 2

unigram_window_stemmed  = fact_extract_positional_word_features_stemmed(offset)
biigram_window_stemmed  = fact_extract_ngram_features_stemmed(offset, 2)
triigram_window_stemmed = fact_extract_ngram_features_stemmed(offset, 3)
unigram_bow_window      = fact_extract_bow_ngram_features(offset, 1)

#optimal SC feature set
extractors = [unigram_bow_window,
              unigram_window_stemmed,
              biigram_window_stemmed,
              #trigram_window_stemmed,
开发者ID:simonhughes22,项目名称:PythonNlpResearch,代码行数:31,代码来源:windowbasedtagger_most_common_tag_multiclass_test.py

示例7: ModelStore

MIN_TAG_FREQ        = 5
LOOK_BACK           = 0     # how many sentences to look back when predicting tags
# end not hashed

# construct unique key using settings for pickling
settings = Settings.Settings()

model_store = ModelStore()

""" PETER - CHANGE THESE FILE PATHS """
folder =                settings.data_directory + "CoralBleaching/BrattData/EBA1415_Merged/"   # Location where the training data is, use EBA_Pre and Post test essays preferably
test_folder=            settings.data_directory + "CoralBleaching/BrattData/Merged/"                # Location where the new essays to tag are located
out_predictions_file =  settings.data_directory + "CoralBleaching/Results/predictions.txt"          # File to dump the predictions to

config = get_config(folder)

""" FEATURE EXTRACTION """
offset = (config["window_size"] - 1) / 2

unigram_window_stemmed = fact_extract_positional_word_features_stemmed(offset)
biigram_window_stemmed = fact_extract_ngram_features_stemmed(offset, 2)

#pos_tag_window = fact_extract_positional_POS_features(offset)
#pos_tag_plus_wd_window = fact_extract_positional_POS_features_plus_word(offset)
#head_wd_window = fact_extract_positional_head_word_features(offset)
#pos_dep_vecs = fact_extract_positional_dependency_vectors(offset)

extractors = [unigram_window_stemmed, biigram_window_stemmed]
feat_config = dict(config.items() + [("extractors", extractors)])
开发者ID:simonhughes22,项目名称:PythonNlpResearch,代码行数:29,代码来源:windowbasedclassifier_train_THEN_predict.py

示例8: Settings

SPARSE_WD_FEATS     = True

MIN_FEAT_FREQ       = 5        # 5 best so far
CV_FOLDS            = 5

MIN_TAG_FREQ        = 5
LOOK_BACK           = 0     # how many sentences to look back when predicting tags
# end not hashed

settings = Settings()
root_folder = settings.data_directory + "CoralBleaching/Thesis_Dataset/"
partition = "Training" # Training | Test
target_folder = root_folder + partition + "/"
processed_essay_filename_prefix =  root_folder + "Pickled/essays_proc_pickled_"

config = get_config(target_folder)

# LOAD ESSAYS
mem_process_essays = memoize_to_disk(filename_prefix=processed_essay_filename_prefix)(load_process_essays)
tagged_essays = mem_process_essays(**config)
# map parsed essays to essay name

print("{0} essays loaded".format(len(tagged_essays)))

# LOAD COREF RESULTS
coref_root = root_folder + "CoReference/"
coref_folder = coref_root + partition
coref_files = find_files(coref_folder, ".*\.tagged")
print("{0} co-ref tagged files loaded".format(len(coref_files)))
assert len(coref_files) == len(tagged_essays)
开发者ID:simonhughes22,项目名称:PythonNlpResearch,代码行数:30,代码来源:CVTrainWordWindowTaggingModel.py

示例9: get_config

settings = Settings.Settings()

root     =  settings.data_directory + "/GlobalWarming/BrattFiles/merged/"

""" INPUT - two serialized files, one for the pre-processed essays, the other for the features """

""" OUTPUT """
processed_essay_filename_prefix =  root + "Pickled/essays_proc_pickled_"
features_filename_prefix =         root + "Pickled/feats_pickled_"

out_predictions_file        = root + "Experiment/Output/predictions.txt"
out_predicted_margins_file  = root + "Experiment/Output/predicted_confidence.txt"
out_metrics_file            = root + "Experiment/Output/metrics.txt"
out_categories_file         = root + "Experiment/Output/categories.txt"

config = get_config(root)

""" FEATURE EXTRACTION """
offset = (config["window_size"] - 1) / 2

unigram_window_stemmed = fact_extract_positional_word_features_stemmed(offset)
biigram_window_stemmed = fact_extract_ngram_features_stemmed(offset, 2)

extractors = [unigram_window_stemmed, biigram_window_stemmed]
feat_config = dict(config.items() + [("extractors", extractors)])

""" LOAD DATA """
mem_process_essays = memoize_to_disk(filename_prefix=processed_essay_filename_prefix)(load_process_essays)
tagged_essays = mem_process_essays( **config )
# replace periods in tags so that we can store results in mongo
replace_periods(tagged_essays)
开发者ID:simonhughes22,项目名称:PythonNlpResearch,代码行数:31,代码来源:windowBased_classifier_cause_effect_lbls.py


注:本文中的window_based_tagger_config.get_config函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。