本文整理汇总了Python中rasa_nlu.training_data.load_data函数的典型用法代码示例。如果您正苦于以下问题:Python load_data函数的具体用法?Python load_data怎么用?Python load_data使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了load_data函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_markdown_single_sections
def test_markdown_single_sections():
td_regex_only = training_data.load_data('data/test/markdown_single_sections/regex_only.md')
assert td_regex_only.regex_features == [{"name": "greet", "pattern": "hey[^\s]*"}]
td_syn_only = training_data.load_data('data/test/markdown_single_sections/synonyms_only.md')
assert td_syn_only.entity_synonyms == {'Chines': 'chinese',
'Chinese': 'chinese'}
示例2: test_data_merging
def test_data_merging(files):
td_reference = training_data.load_data(files[0])
td = training_data.load_data(files[1])
assert len(td.entity_examples) == len(td_reference.entity_examples)
assert len(td.intent_examples) == len(td_reference.intent_examples)
assert len(td.training_examples) == len(td_reference.training_examples)
assert td.intents == td_reference.intents
assert td.entities == td_reference.entities
assert td.entity_synonyms == td_reference.entity_synonyms
assert td.regex_features == td_reference.regex_features
示例3: test_multiword_entities
def test_multiword_entities():
data = """
{
"rasa_nlu_data": {
"common_examples" : [
{
"text": "show me flights to New York City",
"intent": "unk",
"entities": [
{
"entity": "destination",
"start": 19,
"end": 32,
"value": "New York City"
}
]
}
]
}
}"""
with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f:
f.write(data.encode("utf-8"))
f.flush()
td = training_data.load_data(f.name)
assert len(td.entity_examples) == 1
example = td.entity_examples[0]
entities = example.get("entities")
assert len(entities) == 1
tokens = WhitespaceTokenizer().tokenize(example.text)
start, end = MitieEntityExtractor.find_entity(entities[0],
example.text,
tokens)
assert start == 4
assert end == 7
示例4: test_run_cv_evaluation
def test_run_cv_evaluation():
td = training_data.load_data('data/examples/rasa/demo-rasa.json')
nlu_config = config.load(
"sample_configs/config_pretrained_embeddings_spacy.yml")
n_folds = 2
results, entity_results = cross_validate(td, n_folds, nlu_config)
assert len(results.train["Accuracy"]) == n_folds
assert len(results.train["Precision"]) == n_folds
assert len(results.train["F1-score"]) == n_folds
assert len(results.test["Accuracy"]) == n_folds
assert len(results.test["Precision"]) == n_folds
assert len(results.test["F1-score"]) == n_folds
assert len(entity_results.train[
'CRFEntityExtractor']["Accuracy"]) == n_folds
assert len(entity_results.train[
'CRFEntityExtractor']["Precision"]) == n_folds
assert len(entity_results.train[
'CRFEntityExtractor']["F1-score"]) == n_folds
assert len(entity_results.test[
'CRFEntityExtractor']["Accuracy"]) == n_folds
assert len(entity_results.test[
'CRFEntityExtractor']["Precision"]) == n_folds
assert len(entity_results.test[
'CRFEntityExtractor']["F1-score"]) == n_folds
示例5: do_train
def do_train(cfg, # type: RasaNLUModelConfig
data, # type: Text
path=None, # type: Text
project=None, # type: Optional[Text]
fixed_model_name=None, # type: Optional[Text]
storage=None, # type: Text
component_builder=None, # type: Optional[ComponentBuilder]
**kwargs # type: Any
):
# type: (...) -> Tuple[Trainer, Interpreter, Text]
"""Loads the trainer and the data and runs the training of the model."""
# Ensure we are training a model that we can save in the end
# WARN: there is still a race condition if a model with the same name is
# trained in another subprocess
trainer = Trainer(cfg, component_builder)
persistor = create_persistor(storage)
training_data = load_data(data, cfg.language)
interpreter = trainer.train(training_data, **kwargs)
if path:
persisted_path = trainer.persist(path,
persistor,
project,
fixed_model_name)
else:
persisted_path = None
return trainer, interpreter, persisted_path
示例6: run_evaluation
def run_evaluation(data_path, model_path,
component_builder=None): # pragma: no cover
"""Evaluate intent classification and entity extraction."""
# get the metadata config from the package data
interpreter = Interpreter.load(model_path, component_builder)
test_data = training_data.load_data(data_path,
interpreter.model_metadata.language)
extractors = get_entity_extractors(interpreter)
entity_predictions, tokens = get_entity_predictions(interpreter,
test_data)
if duckling_extractors.intersection(extractors):
entity_predictions = remove_duckling_entities(entity_predictions)
extractors = remove_duckling_extractors(extractors)
if is_intent_classifier_present(interpreter):
intent_targets = get_intent_targets(test_data)
intent_predictions = get_intent_predictions(interpreter, test_data)
logger.info("Intent evaluation results:")
evaluate_intents(intent_targets, intent_predictions)
if extractors:
entity_targets = get_entity_targets(test_data)
logger.info("Entity evaluation results:")
evaluate_entities(entity_targets, entity_predictions, tokens,
extractors)
示例7: test_repeated_entities
def test_repeated_entities():
data = """
{
"rasa_nlu_data": {
"common_examples" : [
{
"text": "book a table today from 3 to 6 for 3 people",
"intent": "unk",
"entities": [
{
"entity": "description",
"start": 35,
"end": 36,
"value": "3"
}
]
}
]
}
}"""
with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f:
f.write(data.encode("utf-8"))
f.flush()
td = training_data.load_data(f.name)
assert len(td.entity_examples) == 1
example = td.entity_examples[0]
entities = example.get("entities")
assert len(entities) == 1
tokens = WhitespaceTokenizer().tokenize(example.text)
start, end = MitieEntityExtractor.find_entity(entities[0],
example.text,
tokens)
assert start == 9
assert end == 10
示例8: test_nonascii_entities
def test_nonascii_entities():
data = """
{
"luis_schema_version": "2.0",
"utterances" : [
{
"text": "I am looking for a ßäæ ?€ö) item",
"intent": "unk",
"entities": [
{
"entity": "description",
"startPos": 19,
"endPos": 26
}
]
}
]
}"""
with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f:
f.write(data.encode("utf-8"))
f.flush()
td = training_data.load_data(f.name)
assert len(td.entity_examples) == 1
example = td.entity_examples[0]
entities = example.get("entities")
assert len(entities) == 1
entity = entities[0]
assert entity["value"] == "ßäæ ?€ö)"
assert entity["start"] == 19
assert entity["end"] == 27
assert entity["entity"] == "description"
示例9: test_drop_intents_below_freq
def test_drop_intents_below_freq():
td = training_data.load_data('data/examples/rasa/demo-rasa.json')
clean_td = drop_intents_below_freq(td, 0)
assert clean_td.intents == {'affirm', 'goodbye', 'greet',
'restaurant_search'}
clean_td = drop_intents_below_freq(td, 10)
assert clean_td.intents == {'affirm', 'restaurant_search'}
示例10: test_luis_data
def test_luis_data():
td = training_data.load_data('data/examples/luis/demo-restaurants.json')
assert len(td.entity_examples) == 8
assert len(td.intent_examples) == 28
assert len(td.training_examples) == 28
assert td.entity_synonyms == {}
assert td.intents == {"affirm", "goodbye", "greet", "inform"}
assert td.entities == {"location", "cuisine"}
示例11: test_wit_data
def test_wit_data():
td = training_data.load_data('data/examples/wit/demo-flights.json')
assert len(td.entity_examples) == 4
assert len(td.intent_examples) == 1
assert len(td.training_examples) == 4
assert td.entity_synonyms == {}
assert td.intents == {"flight_booking"}
assert td.entities == {"location", "datetime"}
示例12: test_prepare_data
def test_prepare_data():
td = training_data.load_data('data/examples/rasa/demo-rasa.json')
clean_data = prepare_data(td, 0)
unique_intents = sorted(set([i.data["intent"] for i in clean_data]))
assert(unique_intents == ['affirm', 'goodbye', 'greet', 'restaurant_search'])
clean_data = prepare_data(td, 10)
unique_intents = sorted(set([i.data["intent"] for i in clean_data]))
assert(unique_intents == ['affirm', 'restaurant_search'])
示例13: train
def train(cfg_name, project_name):
from rasa_nlu import training_data
cfg = config.load(cfg_name)
trainer = Trainer(cfg, component_builder)
training_data = training_data.load_data(data)
trainer.train(training_data)
trainer.persist("test_projects", project_name=project_name)
示例14: test_lookup_table_md
def test_lookup_table_md():
lookup_fname = 'data/test/lookup_tables/plates.txt'
td_lookup = training_data.load_data(
'data/test/lookup_tables/lookup_table.md')
assert td_lookup.lookup_tables[0]['name'] == 'plates'
assert td_lookup.lookup_tables[0]['elements'] == lookup_fname
assert td_lookup.lookup_tables[1]['name'] == 'drinks'
assert td_lookup.lookup_tables[1]['elements'] == [
'mojito', 'lemonade', 'sweet berry wine', 'tea', 'club mate']
示例15: run_evaluation
def run_evaluation(data_path, model,
report_folder=None,
successes_filename=None,
errors_filename='errors.json',
confmat_filename=None,
intent_hist_filename=None,
component_builder=None): # pragma: no cover
"""Evaluate intent classification and entity extraction."""
# get the metadata config from the package data
if isinstance(model, Interpreter):
interpreter = model
else:
interpreter = Interpreter.load(model, component_builder)
test_data = training_data.load_data(data_path,
interpreter.model_metadata.language)
extractors = get_entity_extractors(interpreter)
entity_predictions, tokens = get_entity_predictions(interpreter,
test_data)
if duckling_extractors.intersection(extractors):
entity_predictions = remove_duckling_entities(entity_predictions)
extractors = remove_duckling_extractors(extractors)
result = {
"intent_evaluation": None,
"entity_evaluation": None
}
if report_folder:
utils.create_dir(report_folder)
if is_intent_classifier_present(interpreter):
intent_targets = get_intent_targets(test_data)
intent_results = get_intent_predictions(
intent_targets, interpreter, test_data)
logger.info("Intent evaluation results:")
result['intent_evaluation'] = evaluate_intents(intent_results,
report_folder,
successes_filename,
errors_filename,
confmat_filename,
intent_hist_filename)
if extractors:
entity_targets = get_entity_targets(test_data)
logger.info("Entity evaluation results:")
result['entity_evaluation'] = evaluate_entities(entity_targets,
entity_predictions,
tokens,
extractors,
report_folder)
return result