本文整理汇总了Python中corpus.Corpus.load方法的典型用法代码示例。如果您正苦于以下问题:Python Corpus.load方法的具体用法?Python Corpus.load怎么用?Python Corpus.load使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类corpus.Corpus
的用法示例。
在下文中一共展示了Corpus.load方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_bb_target_state_halfed
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load [as 别名]
def test_bb_target_state_halfed(self):
feature_table = FeatureTable.load(get_feature_table_fixture("a_b_and_cons_feature_table.json"))
constraint_set = ConstraintSet.load(get_constraint_set_fixture("bb_target_constraint_set.json"),
feature_table)
target_lexicon_words = Corpus.load(get_corpus_fixture("bb_target_lexicon_halfed.txt")).get_words()
lexicon = Lexicon(target_lexicon_words, feature_table)
grammar = Grammar(feature_table, constraint_set, lexicon)
corpus = Corpus.load(get_corpus_fixture("bb_corpus.txt"))
traversable_hypothesis = TraversableGrammarHypothesis(grammar, corpus)
self.assertEqual(traversable_hypothesis.get_energy(), 407430)
示例2: test_aspiration_and_lengthening_extended_augmented_target_state
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load [as 别名]
def test_aspiration_and_lengthening_extended_augmented_target_state(self):
configurations["DATA_ENCODING_LENGTH_MULTIPLIER"] = 100
configurations["RESTRICTION_ON_ALPHABET"] = True
feature_table = FeatureTable.load(
get_feature_table_fixture("aspiration_and_lengthening_extended_augmented_feature_table.json"))
constraint_set = ConstraintSet.load(
get_constraint_set_fixture("aspiration_and_lengthening_augmented_target_constraint_set.json"),
feature_table)
target_lexicon_words = Corpus.load(get_corpus_fixture("aspiration_and_lengthening_extended_target_lexicon.txt")).get_words()
lexicon = Lexicon(target_lexicon_words, feature_table)
grammar = Grammar(feature_table, constraint_set, lexicon)
corpus = Corpus.load(get_corpus_fixture("aspiration_and_lengthening_extended_corpus.txt"))
traversable_hypothesis = TraversableGrammarHypothesis(grammar, corpus)
示例3: run_simulation
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load [as 别名]
def run_simulation(configurations_tuples, simulation_number, log_file_template, feature_table_file_name, corpus_file_name, constraint_set_file_name,
sample_target_lexicon=None, sample_target_outputs=None, target_lexicon_indicator_function=None,
target_constraint_set_file_name=None, target_lexicon_file_name=None, convert_corpus_word_to_target_word_function=None,
initial_lexicon_file_name=None):
for configurations_tuple in configurations_tuples:
configurations[configurations_tuple[0]] = configurations_tuple[1]
log_file_name = log_file_template.format(platform.node(), simulation_number)
dirname, filename = split(abspath(__file__))
log_file_path = join(dirname, "../logging/", log_file_name)
# if os.path.exists(log_file_path):
# raise ValueError("log name already exits")
logger = logging.getLogger()
logger.setLevel(logging.INFO)
file_log_formatter = logging.Formatter('%(asctime)s %(levelname)s %(name)s %(message)s', "%Y-%m-%d %H:%M:%S")
file_log_handler = logging.FileHandler(log_file_path, mode='w')
file_log_handler.setFormatter(file_log_formatter)
logger.addHandler(file_log_handler)
feature_table = FeatureTable.load(get_feature_table_fixture(feature_table_file_name))
corpus = Corpus.load(get_corpus_fixture(corpus_file_name))
constraint_set = ConstraintSet.load(get_constraint_set_fixture(constraint_set_file_name),
feature_table)
if initial_lexicon_file_name:
corpus_for_lexicon = Corpus.load(get_corpus_fixture(initial_lexicon_file_name))
lexicon = Lexicon(corpus_for_lexicon.get_words(), feature_table)
else:
lexicon = Lexicon(corpus.get_words(), feature_table)
grammar = Grammar(feature_table, constraint_set, lexicon)
data = corpus.get_words()
traversable_hypothesis = TraversableGrammarHypothesis(grammar, data)
keyargs_dict = {}
if sample_target_lexicon and sample_target_outputs and target_lexicon_indicator_function:
keyargs_dict["sample_target_lexicon"] = sample_target_lexicon
keyargs_dict["sample_target_outputs"] = sample_target_outputs
keyargs_dict["target_lexicon_indicator_function"] = target_lexicon_indicator_function
if target_constraint_set_file_name and (target_lexicon_file_name or convert_corpus_word_to_target_word_function):
target_energy = get_target_hypothesis_energy(feature_table, target_constraint_set_file_name, corpus,
target_lexicon_file_name, convert_corpus_word_to_target_word_function)
keyargs_dict["target_energy"] = target_energy
simulated_annealing = SimulatedAnnealing(traversable_hypothesis, **keyargs_dict)
simulated_annealing.run()
示例4: test_t_aspiration_target_state
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load [as 别名]
def test_t_aspiration_target_state(self):
configurations["DATA_ENCODING_LENGTH_MULTIPLIER"] = 25
feature_table = FeatureTable.load(get_feature_table_fixture("t_aspiration_feature_table.json"))
constraint_set = ConstraintSet.load(get_constraint_set_fixture("t_aspiration_target_constraint_set.json"),
feature_table)
target_lexicon_words = Corpus.load(get_corpus_fixture("t_aspiration_target_lexicon.txt")).get_words()
lexicon = Lexicon(target_lexicon_words, feature_table)
grammar = Grammar(feature_table, constraint_set, lexicon)
corpus = Corpus.load(get_corpus_fixture("t_aspiration_corpus.txt"))
traversable_hypothesis = TraversableGrammarHypothesis(grammar, corpus)
configurations["RESTRICTION_ON_ALPHABET"] = True
self.assertEqual(traversable_hypothesis.get_energy(), 167838)
configurations["RESTRICTION_ON_ALPHABET"] = False
self.assertEqual(traversable_hypothesis.get_energy(), 173676)
示例5: load_modules_and_run
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load [as 别名]
def load_modules_and_run(feature_table_file_path, corpus_file_path, constraint_set_file_path,
configuration_files_dir_path):
#TODO finish the loading from file
#file, path, desc = imp.find_module("bb", [configuration_files_dir_path])
#
#module = imp.load_module("bb", file, path, desc)
#print(type(module))
#print(dir(module))
#module.print_()
#importing in here because it is after OtmlConfigurationManager initialization
from grammar.lexicon import Lexicon
from grammar.feature_table import FeatureTable
from grammar.constraint_set import ConstraintSet
from grammar.grammar import Grammar
from traversable_grammar_hypothesis import TraversableGrammarHypothesis
from corpus import Corpus
from simulated_annealing import SimulatedAnnealing
feature_table = FeatureTable.load(feature_table_file_path)
corpus = Corpus.load(corpus_file_path)
constraint_set = ConstraintSet.load(constraint_set_file_path, feature_table)
lexicon = Lexicon(corpus.get_words(), feature_table)
grammar = Grammar(feature_table, constraint_set, lexicon)
data = corpus.get_words()
traversable_hypothesis = TraversableGrammarHypothesis(grammar, data)
simulated_annealing = SimulatedAnnealing(traversable_hypothesis)
simulated_annealing.run()
示例6: setUp
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load [as 别名]
def setUp(self):
configurations["CONSTRAINT_SET_MUTATION_WEIGHTS"] = {
"insert_constraint": 1,
"remove_constraint": 1,
"demote_constraint": 1,
"insert_feature_bundle_phonotactic_constraint": 1,
"remove_feature_bundle_phonotactic_constraint": 1,
"augment_feature_bundle": 0}
configurations["CONSTRAINT_INSERTION_WEIGHTS"] = {
"Dep": 1,
"Max": 1,
"Ident": 0,
"Phonotactic": 1}
configurations["LEXICON_MUTATION_WEIGHTS"] = {
"insert_segment": 1,
"delete_segment": 1,
"change_segment": 0}
configurations["RANDOM_SEED"] = True
#configurations["SEED"] = 84
configurations["INITIAL_TEMPERATURE"] = 100
configurations["COOLING_PARAMETER"] = 0.999985
configurations["INITIAL_NUMBER_OF_BUNDLES_IN_PHONOTACTIC_CONSTRAINT"] = 1
configurations["MIN_FEATURE_BUNDLES_IN_PHONOTACTIC_CONSTRAINT"] = 1
configurations["DATA_ENCODING_LENGTH_MULTIPLIER"] = 100
configurations["RESTRICTION_ON_ALPHABET"] = True
configurations["MAX_FEATURE_BUNDLES_IN_PHONOTACTIC_CONSTRAINT"] = float("INF")
configurations["MAX_NUMBER_OF_CONSTRAINTS_IN_CONSTRAINT_SET"] = float("INF")
configurations["DEBUG_LOGGING_INTERVAL"] = 50
configurations["LOG_FILE_NAME"] = "{}_d_lengthening_INF_INF_{}.txt".format(platform.node(), simulation_number)
self._set_up_logging()
configurations["CORPUS_DUPLICATION_FACTOR"] = 1
self.feature_table = FeatureTable.load(get_feature_table_fixture("d_lengthening_feature_table.json"))
corpus = Corpus.load(get_corpus_fixture("d_lengthening_corpus.txt"))
self.constraint_set = ConstraintSet.load(get_constraint_set_fixture("faith_constraint_set.json"),
self.feature_table)
self.lexicon = Lexicon(corpus.get_words(), self.feature_table)
self.grammar = Grammar(self.feature_table, self.constraint_set, self.lexicon)
self.data = corpus.get_words()
self.traversable_hypothesis = TraversableGrammarHypothesis(self.grammar, self.data)
def desired_lexicon_indicator_function(words):
number_of_long_vowels = sum([word.count(":") for word in words])
return "number of long vowels: {}".format(number_of_long_vowels)
def convert_corpus_word_to_target_word(word):
return word.replace(':', '')
target_energy = self.get_target_hypothesis_energy(self.feature_table, "d_lengthening_target_constraint_set.json", corpus,
convert_corpus_word_to_target_word_function=convert_corpus_word_to_target_word)
#391689
self.simulated_annealing = SimulatedAnnealing(self.traversable_hypothesis,
target_lexicon_indicator_function=desired_lexicon_indicator_function,
sample_target_lexicon=["id", "ad"],
sample_target_outputs=["i:d", "a:d"],
target_energy=target_energy)
示例7: setUp
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load [as 别名]
def setUp(self):
self.feature_table = FeatureTable.load(get_feature_table_fixture("a_b_and_son_feature_table.json"))
self.constraint_set_filename = get_constraint_set_fixture("no_bb_Max_Dep_constraint_set.json")
self.corpus = Corpus.load(get_corpus_fixture("small_ab_corpus.txt"))
self.word = Word("abababa",self.feature_table)
self.constraint = PhonotacticConstraint([{'son': '+'}, {'son': '+'}], self.feature_table)
self.constraint_set = ConstraintSet.load(self.constraint_set_filename, self.feature_table)
self.lexicon = Lexicon(self.corpus.get_words(), self.feature_table)
self.grammar = Grammar(self.feature_table, self.constraint_set, self.lexicon)
示例8: setUp
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load [as 别名]
def setUp(self):
self.phonotactic_test_feature_table = FeatureTable.load(get_feature_table_fixture("phonotactic_test_feature_table"
".json"))
self.feature_table = FeatureTable.load(get_feature_table_fixture("minimal_feature_table.json"))
self.constraint_set_filename = get_constraint_set_fixture("minimal_constraint_set.json")
self.corpus = Corpus.load(get_corpus_fixture("small_ab_corpus.txt"))
self.constraint_set = ConstraintSet.load(self.constraint_set_filename, self.feature_table)
self.lexicon = Lexicon(self.corpus.get_words(), self.feature_table)
self.grammar = Grammar(self.feature_table, self.constraint_set, self.lexicon)
示例9: setUp
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load [as 别名]
def setUp(self):
self.feature_table = FeatureTable.load(get_feature_table_fixture("a_b_and_cons_feature_table.json"))
corpus = Corpus.load(get_corpus_fixture("bb_corpus.txt"))
self.constraint_set = ConstraintSet.load(get_constraint_set_fixture("faith_constraint_set.json"),
self.feature_table)
self.lexicon = Lexicon(corpus.get_words(), self.feature_table)
self.grammar = Grammar(self.feature_table, self.constraint_set, self.lexicon)
self.data = corpus.get_words()
self.traversable_hypothesis = TraversableGrammarHypothesis(self.grammar, self.data)
self.simulated_annealing = SimulatedAnnealing(self.traversable_hypothesis)
示例10: setUp
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load [as 别名]
def setUp(self):
self.feature_table = FeatureTable.load(get_feature_table_fixture("a_b_and_son_feature_table.json"))
self.constraint_set = ConstraintSet.load(get_constraint_set_fixture("no_bb_Max_Dep_constraint_set.json"),
self.feature_table)
self.corpus = Corpus.load(get_corpus_fixture("testing_parser_suite_corpus.txt"))
self.lexicon = Lexicon(self.corpus.get_words(),self.feature_table)
self.grammar = Grammar(self.feature_table, self.constraint_set, self.lexicon)
self.bb = Word("bb", self.feature_table)
self.bab = Word("bab", self.feature_table)
self.abba = Word("abba", self.feature_table)
self.ababa = Word("ababa", self.feature_table)
示例11: test_t_aspiration_initial_state
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load [as 别名]
def test_t_aspiration_initial_state(self):
configurations["DATA_ENCODING_LENGTH_MULTIPLIER"] = 25
configurations["RESTRICTION_ON_ALPHABET"] = True
feature_table = FeatureTable.load(get_feature_table_fixture("t_aspiration_feature_table.json"))
corpus = Corpus.load(get_corpus_fixture("t_aspiration_corpus.txt"))
constraint_set = ConstraintSet.load(get_constraint_set_fixture("faith_constraint_set.json"),
feature_table)
lexicon = Lexicon(corpus.get_words(), feature_table)
grammar = Grammar(feature_table, constraint_set, lexicon)
data = corpus.get_words()
traversable_hypothesis = TraversableGrammarHypothesis(grammar, data)
示例12: setUp
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load [as 别名]
def setUp(self):
self._set_up_logging()
configurations["CORPUS_DUPLICATION_FACTOR"] = 25
self.feature_table = FeatureTable.load(get_feature_table_fixture("french_deletion_feature_table.json"))
corpus = Corpus.load(get_corpus_fixture("french_deletion_corpus_for_with_restrictions.txt"))
self.constraint_set = ConstraintSet.load(get_constraint_set_fixture("french_deletion_constraint_set.json"),
self.feature_table)
self.lexicon = Lexicon(get_corpus_fixture("french_deletion_corpus_for_with_restrictions.txt"), self.feature_table)
self.grammar = Grammar(self.feature_table, self.constraint_set, self.lexicon)
self.data = corpus.get_words()
self.traversable_hypothesis = TraversableGrammarHypothesis(self.grammar, self.data)
self.simulated_annealing = SimulatedAnnealing(self.traversable_hypothesis)
示例13: setUp
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load [as 别名]
def setUp(self):
self._set_up_logging()
configurations["CORPUS_DUPLICATION_FACTOR"] = 1
self.feature_table = FeatureTable.load(get_feature_table_fixture(feature_table_file_name))
corpus = Corpus.load(get_corpus_fixture(corpus_file_name))
self.constraint_set = ConstraintSet.load(get_constraint_set_fixture(constraint_set_file_name),
self.feature_table)
self.lexicon = Lexicon(corpus.get_words(), self.feature_table)
self.grammar = Grammar(self.feature_table, self.constraint_set, self.lexicon)
self.data = corpus.get_words()
self.traversable_hypothesis = TraversableGrammarHypothesis(self.grammar, self.data)
self.simulated_annealing = SimulatedAnnealing(self.traversable_hypothesis,
target_lexicon_indicator_function=indicator_function,
sample_target_lexicon=sample_desired_lexicon,
sample_target_outputs=sample_desired_outputs)
示例14: setUp
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load [as 别名]
def setUp(self):
self._set_up_logging()
configurations["CORPUS_DUPLICATION_FACTOR"] = 1
self.feature_table = FeatureTable.load(get_feature_table_fixture("tk_aspiration_feature_table.json"))
corpus = Corpus.load(get_corpus_fixture("tk_aspiration_corpus.txt"))
self.constraint_set = ConstraintSet.load(get_constraint_set_fixture("faith_constraint_set.json"),
self.feature_table)
self.lexicon = Lexicon(corpus.get_words(), self.feature_table)
self.grammar = Grammar(self.feature_table, self.constraint_set, self.lexicon)
self.data = corpus.get_words()
self.traversable_hypothesis = TraversableGrammarHypothesis(self.grammar, self.data)
def function(words):
number_of_aspirated_consonants = sum([word.count("h") for word in words])
return "number of aspirated consonants = {})".format(number_of_aspirated_consonants)
self.simulated_annealing = SimulatedAnnealing(self.traversable_hypothesis,
target_lexicon_indicator_function=function,
sample_target_lexicon=["ti", "ta", "ki", "ka"],
sample_target_outputs=["thi", "tha", "khi", "kha"])
示例15: setUp
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import load [as 别名]
def setUp(self):
self._set_up_logging()
configurations["CORPUS_DUPLICATION_FACTOR"] = 1
self.feature_table = FeatureTable.load(get_feature_table_fixture("a_b_and_cons_feature_table.json"))
corpus = Corpus.load(get_corpus_fixture("bb_for_paper_corpus.txt"))
self.constraint_set = ConstraintSet.load(get_constraint_set_fixture("faith_constraint_set.json"),
self.feature_table)
self.lexicon = Lexicon(corpus.get_words(), self.feature_table)
self.grammar = Grammar(self.feature_table, self.constraint_set, self.lexicon)
self.data = corpus.get_words()
self.traversable_hypothesis = TraversableGrammarHypothesis(self.grammar, self.data)
def function(words):
return "number of bab's: {}".format(sum([word.count("bab") for word in words]))
self.simulated_annealing = SimulatedAnnealing(self.traversable_hypothesis,
target_lexicon_indicator_function=function,
sample_target_lexicon=["bb", "abb"],
sample_target_outputs=["bab", "abab"])