本文整理汇总了Python中rasa_nlu.training_data.TrainingData方法的典型用法代码示例。如果您正苦于以下问题:Python training_data.TrainingData方法的具体用法?Python training_data.TrainingData怎么用?Python training_data.TrainingData使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类rasa_nlu.training_data
的用法示例。
在下文中一共展示了training_data.TrainingData方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train
# 需要导入模块: from rasa_nlu import training_data [as 别名]
# 或者: from rasa_nlu.training_data import TrainingData [as 别名]
def train(self,
training_data: TrainingData,
cfg: RasaNLUModelConfig,
**kwargs: Any) -> None:
"""Train the intent classifier on a data set."""
num_threads = kwargs.get("num_threads", 1)
labels = [e.get("intent")
for e in training_data.intent_examples]
if len(set(labels)) < 2:
logger.warning("Can not train an intent classifier. "
"Need at least 2 different classes. "
"Skipping training of intent classifier.")
else:
y = self.transform_labels_str2num(labels)
X = np.stack([example.get("text_features")
for example in training_data.intent_examples])
self.clf = self._create_classifier(num_threads, y)
self.clf.fit(X, y)
示例2: _prepare_data_for_training
# 需要导入模块: from rasa_nlu import training_data [as 别名]
# 或者: from rasa_nlu.training_data import TrainingData [as 别名]
def _prepare_data_for_training(
self,
training_data: 'TrainingData',
intent_dict: Dict[Text, int]
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""Prepare data for training"""
X = np.stack([e.get("text_features")
for e in training_data.intent_examples])
intents_for_X = np.array([intent_dict[e.get("intent")]
for e in training_data.intent_examples])
Y = np.stack([self.encoded_all_intents[intent_idx]
for intent_idx in intents_for_X])
return X, Y, intents_for_X
# tf helpers:
示例3: read
# 需要导入模块: from rasa_nlu import training_data [as 别名]
# 或者: from rasa_nlu.training_data import TrainingData [as 别名]
def read(self, fn: Text, **kwargs: Any) -> 'TrainingData':
"""Loads training data stored in the Dialogflow data format."""
from rasa_nlu.training_data import TrainingData
language = kwargs["language"]
fformat = kwargs["fformat"]
if fformat not in {DIALOGFLOW_INTENT, DIALOGFLOW_ENTITIES}:
raise ValueError("fformat must be either {}, or {}"
"".format(DIALOGFLOW_INTENT, DIALOGFLOW_ENTITIES))
root_js = utils.read_json_file(fn)
examples_js = self._read_examples_js(fn, language, fformat)
if not examples_js:
logger.warning("No training examples found for dialogflow file {}!"
"".format(fn))
return TrainingData()
elif fformat == DIALOGFLOW_INTENT:
return self._read_intent(root_js, examples_js)
elif fformat == DIALOGFLOW_ENTITIES:
return self._read_entities(root_js, examples_js)
示例4: reads
# 需要导入模块: from rasa_nlu import training_data [as 别名]
# 或者: from rasa_nlu.training_data import TrainingData [as 别名]
def reads(self, s: Text, **kwargs: Any) -> 'TrainingData':
"""Read markdown string and create TrainingData object"""
from rasa_nlu.training_data import TrainingData
self.__init__()
s = self._strip_comments(s)
for line in s.splitlines():
line = line.strip()
header = self._find_section_header(line)
if header:
self._set_current_section(header[0], header[1])
else:
self._parse_item(line)
self._load_files(line)
return TrainingData(self.training_examples, self.entity_synonyms,
self.regex_features, self.lookup_tables)
示例5: load_data
# 需要导入模块: from rasa_nlu import training_data [as 别名]
# 或者: from rasa_nlu.training_data import TrainingData [as 别名]
def load_data(resource_name: Text,
language: Optional[Text] = 'en') -> 'TrainingData':
"""Load training data from disk.
Merges them if loaded from disk and multiple files are found."""
from rasa_nlu.training_data import TrainingData
files = utils.list_files(resource_name)
data_sets = [_load(f, language) for f in files]
data_sets = [ds for ds in data_sets if ds]
if len(data_sets) == 0:
training_data = TrainingData()
elif len(data_sets) == 1:
training_data = data_sets[0]
else:
training_data = data_sets[0].merge(*data_sets[1:])
return training_data
示例6: load_data_from_endpoint
# 需要导入模块: from rasa_nlu import training_data [as 别名]
# 或者: from rasa_nlu.training_data import TrainingData [as 别名]
def load_data_from_endpoint(data_endpoint: EndpointConfig,
language: Optional[Text] = 'en') -> 'TrainingData':
"""Load training data from a URL."""
if not utils.is_url(data_endpoint.url):
raise requests.exceptions.InvalidURL(data_endpoint.url)
try:
response = data_endpoint.request("get")
response.raise_for_status()
temp_data_file = utils.create_temporary_file(response.content,
mode="w+b")
training_data = _load(temp_data_file, language)
return training_data
except Exception as e:
logger.warning("Could not retrieve training data "
"from URL:\n{}".format(e))
示例7: train
# 需要导入模块: from rasa_nlu import training_data [as 别名]
# 或者: from rasa_nlu.training_data import TrainingData [as 别名]
def train(self,
training_data: TrainingData,
config: RasaNLUModelConfig,
**kwargs: Any) -> None:
# checks whether there is at least one
# example with an entity annotation
if training_data.entity_examples:
self._check_spacy_doc(training_data.training_examples[0])
# filter out pre-trained entity examples
filtered_entity_examples = self.filter_trainable_entities(
training_data.training_examples)
# convert the dataset into features
# this will train on ALL examples, even the ones
# without annotations
dataset = self._create_dataset(filtered_entity_examples)
self._train_model(dataset)
示例8: test_count_vector_featurizer_oov_token
# 需要导入模块: from rasa_nlu import training_data [as 别名]
# 或者: from rasa_nlu.training_data import TrainingData [as 别名]
def test_count_vector_featurizer_oov_token(sentence, expected):
from rasa_nlu.featurizers.count_vectors_featurizer import \
CountVectorsFeaturizer
ftr = CountVectorsFeaturizer({"token_pattern": r'(?u)\b\w+\b',
"OOV_token": '__oov__'})
train_message = Message(sentence)
# this is needed for a valid training example
train_message.set("intent", "bla")
data = TrainingData([train_message])
ftr.train(data)
test_message = Message(sentence)
ftr.process(test_message)
assert np.all(test_message.get("text_features") == expected)
示例9: test_count_vector_featurizer
# 需要导入模块: from rasa_nlu import training_data [as 别名]
# 或者: from rasa_nlu.training_data import TrainingData [as 别名]
def test_count_vector_featurizer(sentence, expected):
from rasa_nlu.featurizers.count_vectors_featurizer import \
CountVectorsFeaturizer
ftr = CountVectorsFeaturizer({"min_ngram": 1,
"max_ngram": 2,
"analyzer": 'char'})
train_message = Message(sentence)
# this is needed for a valid training example
train_message.set("intent", "bla")
data = TrainingData([train_message])
ftr.train(data)
test_message = Message(sentence)
ftr.process(test_message)
assert np.all(test_message.get("text_features") == expected)
示例10: test_unintentional_synonyms_capitalized
# 需要导入模块: from rasa_nlu import training_data [as 别名]
# 或者: from rasa_nlu.training_data import TrainingData [as 别名]
def test_unintentional_synonyms_capitalized(component_builder):
_config = utilities.base_test_conf("pretrained_embeddings_spacy")
ner_syn = component_builder.create_component(_config.for_component(5),
_config)
examples = [
Message("Any Mexican restaurant will do", {
"intent": "restaurant_search",
"entities": [{"start": 4,
"end": 11,
"value": "Mexican",
"entity": "cuisine"}]
}),
Message("I want Tacos!", {
"intent": "restaurant_search",
"entities": [{"start": 7,
"end": 12,
"value": "Mexican",
"entity": "cuisine"}]
})
]
ner_syn.train(TrainingData(training_examples=examples), _config)
assert ner_syn.synonyms.get("mexican") is None
assert ner_syn.synonyms.get("tacos") == "Mexican"
示例11: __init__
# 需要导入模块: from rasa_nlu import training_data [as 别名]
# 或者: from rasa_nlu.training_data import TrainingData [as 别名]
def __init__(self,
cfg, # type: RasaNLUModelConfig
component_builder=None, # type: Optional[ComponentBuilder]
skip_validation=False # type: bool
):
# type: (...) -> None
self.config = cfg
self.skip_validation = skip_validation
self.training_data = None # type: Optional[TrainingData]
if component_builder is None:
# If no builder is passed, every interpreter creation will result in
# a new builder. hence, no components are reused.
component_builder = components.ComponentBuilder()
# Before instantiating the component classes, lets check if all
# required packages are available
if not self.skip_validation:
components.validate_requirements(cfg.component_names)
# build pipeline
self.pipeline = self._build_pipeline(cfg, component_builder)
示例12: train
# 需要导入模块: from rasa_nlu import training_data [as 别名]
# 或者: from rasa_nlu.training_data import TrainingData [as 别名]
def train(self, training_data, cfg, **kwargs):
# type: (TrainingData, RasaNLUModelConfig, **Any) -> None
"""Train the intent classifier on a data set."""
num_threads = kwargs.get("num_threads", 1)
labels = [e.get("intent")
for e in training_data.intent_examples]
if len(set(labels)) < 2:
logger.warn("Can not train an intent classifier. "
"Need at least 2 different classes. "
"Skipping training of intent classifier.")
else:
y = self.transform_labels_str2num(labels)
X = np.stack([example.get("text_features")
for example in training_data.intent_examples])
self.clf = self._create_classifier(num_threads, y)
self.clf.fit(X, y)
示例13: train
# 需要导入模块: from rasa_nlu import training_data [as 别名]
# 或者: from rasa_nlu.training_data import TrainingData [as 别名]
def train(self, training_data, cfg, **kwargs):
# type: (TrainingData, RasaNLUModelConfig, **Any) -> None
import mitie
model_file = kwargs.get("mitie_file")
if not model_file:
raise Exception("Can not run MITIE entity extractor without a "
"language model. Make sure this component is "
"preceeded by the 'nlp_mitie' component.")
trainer = mitie.text_categorizer_trainer(model_file)
trainer.num_threads = kwargs.get("num_threads", 1)
for example in training_data.intent_examples:
tokens = self._tokens_of_message(example)
trainer.add_labeled_text(tokens, example.get("intent"))
if training_data.intent_examples:
# we can not call train if there are no examples!
self.clf = trainer.train()
示例14: read
# 需要导入模块: from rasa_nlu import training_data [as 别名]
# 或者: from rasa_nlu.training_data import TrainingData [as 别名]
def read(self, fn, **kwargs):
# type: ([Text]) -> TrainingData
"""Loads training data stored in the Dialogflow data format."""
language = kwargs["language"]
fformat = kwargs["fformat"]
if fformat not in {DIALOGFLOW_INTENT, DIALOGFLOW_ENTITIES}:
raise ValueError("fformat must be either {}, or {}".format(DIALOGFLOW_INTENT, DIALOGFLOW_ENTITIES))
root_js = utils.read_json_file(fn)
examples_js = self._read_examples_js(fn, language, fformat)
if not examples_js:
logger.warning("No training examples found for dialogflow file {}!".format(fn))
return TrainingData()
elif fformat == DIALOGFLOW_INTENT:
return self._read_intent(root_js, examples_js)
elif fformat == DIALOGFLOW_ENTITIES:
return self._read_entities(examples_js)
示例15: test_unintentional_synonyms_capitalized
# 需要导入模块: from rasa_nlu import training_data [as 别名]
# 或者: from rasa_nlu.training_data import TrainingData [as 别名]
def test_unintentional_synonyms_capitalized(component_builder):
_config = utilities.base_test_conf("spacy_sklearn")
ner_syn = component_builder.create_component("ner_synonyms", _config)
examples = [
Message("Any Mexican restaurant will do", {
"intent": "restaurant_search",
"entities": [{"start": 4,
"end": 11,
"value": "Mexican",
"entity": "cuisine"}]
}),
Message("I want Tacos!", {
"intent": "restaurant_search",
"entities": [{"start": 7,
"end": 12,
"value": "Mexican",
"entity": "cuisine"}]
})
]
ner_syn.train(TrainingData(training_examples=examples), _config)
assert ner_syn.synonyms.get("mexican") is None
assert ner_syn.synonyms.get("tacos") == "Mexican"