本文整理汇总了Python中adapt.tools.text.trie.Trie类的典型用法代码示例。如果您正苦于以下问题:Python Trie类的具体用法?Python Trie怎么用?Python Trie使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Trie类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse
def parse(self, utterance, context=None, N=1):
"""
:param utterance:
:param context: a list of entities
:param N:
:return:
"""
start = time.time()
context_trie = None
if context and isinstance(context, list):
# sort by confidence in ascending order, so
# highest confidence for an entity is last.
# see comment on TrieNode ctor
context.sort(key=lambda x: x.get('confidence'))
context_trie = Trie()
for entity in context:
entity_value, entity_type = entity.get('data')[0]
context_trie.insert(entity_value.lower(),
data=(entity_value, entity_type),
weight=entity.get('confidence'))
tagged = self._tagger.tag(utterance.lower(), context_trie=context_trie)
self.emit("tagged_entities",
{
'utterance': utterance,
'tags': list(tagged),
'time': time.time() - start
})
start = time.time()
bke = BronKerboschExpander(self._tokenizer)
def score_clique(clique):
score = 0.0
for tagged_entity in clique:
ec = tagged_entity.get('entities', [{'confidence': 0.0}])[0].get('confidence')
score += ec * len(tagged_entity.get('entities', [{'match': ''}])[0].get('match')) / (
len(utterance) + 1)
return score
parse_results = bke.expand(tagged, clique_scoring_func=score_clique)
count = 0
for result in parse_results:
count += 1
parse_confidence = 0.0
for tag in result:
sample_entity = tag['entities'][0]
entity_confidence = sample_entity.get('confidence', 0.0) * float(
len(sample_entity.get('match'))) / len(utterance)
parse_confidence += entity_confidence
yield {
'utterance': utterance,
'tags': result,
'time': time.time() - start,
'confidence': parse_confidence
}
if count >= N:
break
示例2: test_data_is_correct_on_insert
def test_data_is_correct_on_insert(self):
trie = Trie()
trie.insert("restaurant", "Concept")
results = list(trie.lookup("restaurant"))
assert len(results) == 1
assert len(results[0].get('data')) == 1
data = list(results[0].get('data'))
assert data[0] == 'Concept'
示例3: tag
def tag(self, utterance):
"""
Tag known entities within the utterance.
:param utterance: a string of natural language text
:return: dictionary, with the following keys
match: str - the proper entity matched
key: str - the string that was matched to the entity
start_token: int - 0-based index of the first token matched
end_token: int - 0-based index of the last token matched
entities: list - a list of entity kinds as strings (Ex: Artist, Location)
"""
tokens = self.tokenizer.tokenize(utterance)
entities = []
if len(self.regex_entities) > 0:
for part, idx in self._iterate_subsequences(tokens):
local_trie = Trie()
for regex_entity in self.regex_entities:
match = regex_entity.match(part)
groups = match.groupdict() if match else {}
for key in list(groups):
match_str = groups.get(key)
local_trie.insert(match_str, key)
sub_tagger = EntityTagger(local_trie, self.tokenizer, max_tokens=self.max_tokens)
for sub_entity in sub_tagger.tag(part):
sub_entity['start_token'] += idx
sub_entity['end_token'] += idx
for e in sub_entity['entities']:
e['confidence'] = 0.5
entities.append(sub_entity)
additional_sort = len(entities) > 0
for i in xrange(len(tokens)):
part = ' '.join(tokens[i:])
for new_entity in self.trie.gather(part):
new_entity['data'] = list(new_entity['data'])
entities.append({
'match': new_entity.get('match'),
'key': new_entity.get('key'),
'start_token': i,
'entities': [new_entity],
'end_token': i + len(self.tokenizer.tokenize(new_entity.get('match'))) - 1
})
if additional_sort:
entities = self._sort_and_merge_tags(entities)
return entities
示例4: test_retrieval_based_on_insertion_order
def test_retrieval_based_on_insertion_order(self):
trie = Trie()
trie.insert("rest")
trie.insert("restaurant")
results = list(trie.lookup("rest"))
assert len(results) == 1
results = list(trie.lookup("restaurant"))
assert len(results) == 1
示例5: test_named_remove
def test_named_remove(self):
trie = Trie()
trie.insert("1", "Number")
trie.insert("1", "The Loneliest")
results = list(trie.lookup("1"))
assert len(results) == 1
assert len(results[0].get('data')) == 2
assert trie.remove("1", "Number")
results = list(trie.lookup("1"))
assert len(results) == 1
assert len(results[0].get('data')) == 1
示例6: setUp
def setUp(self):
self.trie = Trie()
self.tokenizer = EnglishTokenizer()
self.regex_entities = []
self.tagger = EntityTagger(self.trie, self.tokenizer, regex_entities=self.regex_entities)
self.trie.insert("play", "PlayVerb")
self.trie.insert("the big bang theory", "Television Show")
self.trie.insert("the big", "Not a Thing")
self.trie.insert("barenaked ladies", "Radio Station")
self.parser = Parser(self.tokenizer, self.tagger)
示例7: EntityTaggerTest
class EntityTaggerTest(unittest.TestCase):
def setUp(self):
self.trie = Trie()
self.tagger = EntityTagger(self.trie, EnglishTokenizer())
self.trie.insert("play", "PlayVerb")
self.trie.insert("the big bang theory", "Television Show")
self.trie.insert("the big", "Not a Thing")
def tearDown(self):
pass
def test_tag(self):
tags = list(self.tagger.tag("play season 1 of the big bang theory"))
assert len(tags) == 3
def test_regex_tag(self):
regex = re.compile(r"the (?P<Event>\w+\s\w+) theory")
tagger = EntityTagger(self.trie, EnglishTokenizer(), regex_entities=[regex])
tags = tagger.tag("the big bang theory")
assert len(tags) == 3
event_tags = [tag for tag in tags if tag.get('match') == 'big bang']
assert len(event_tags) == 1
assert len(event_tags[0].get('entities')) == 1
assert len(event_tags[0].get('entities')[0].get('data')) == 1
assert 'Event' in event_tags[0].get('entities')[0].get('data')
示例8: test_gather
def test_gather(self):
trie = Trie()
trie.insert("rest")
trie.insert("restaurant")
results = list(trie.gather("restaurant"))
assert len(results) == 1
assert results[0].get('key') == "restaurant"
示例9: EntityTaggerTest
class EntityTaggerTest(unittest.TestCase):
def setUp(self):
self.trie = Trie()
self.tagger = EntityTagger(self.trie, EnglishTokenizer())
self.trie.insert("play", "PlayVerb")
self.trie.insert("the big bang theory", "Television Show")
self.trie.insert("the big", "Not a Thing")
def tearDown(self):
pass
def test_tag(self):
tags = list(self.tagger.tag("play season 1 of the big bang theory"))
assert len(tags) == 3
def test_regex_tag(self):
regex = re.compile(r"the (?P<Event>\w+\s\w+) theory")
tagger = EntityTagger(self.trie, EnglishTokenizer(), regex_entities=[regex])
tags = tagger.tag("the big bang theory")
assert len(tags) == 3
event_tags = [tag for tag in tags if tag.get('match') == 'big bang']
assert len(event_tags) == 1
assert len(event_tags[0].get('entities')) == 1
assert len(event_tags[0].get('entities')[0].get('data')) == 1
assert ('big bang', 'Event') in event_tags[0].get('entities')[0].get('data')
def test_start_end_token_match_when_sorting_tagged_entities(self):
repro_payload = [{"end_token": 1, "key": "1", "entities": [{"key": "1", "data": [["1", "Which"]], "confidence": 0.5, "match": "1"}], "start_token": 1, "match": "1"}, {"end_token": 1, "key": "1", "entities": [{"key": "1", "data": [["1", "Which"]], "confidence": 0.5, "match": "1"}], "start_token": 1, "match": "1"}, {"end_token": 1, "key": "1", "entities": [{"key": "1", "data": [["1", "Which"]], "confidence": 0.5, "match": "1"}], "start_token": 1, "match": "1"}, {"end_token": 1, "key": "1", "entities": [{"key": "1", "data": [["1", "Which"]], "confidence": 0.5, "match": "1"}], "start_token": 1, "match": "1"}, {"end_token": 3, "key": "20", "entities": [{"key": "20", "data": [["20", "SnoozeTime"]], "confidence": 0.5, "match": "20"}], "start_token": 3, "match": "20"}, {"end_token": 4, "key": "20 minutes", "entities": [{"key": "20 minutes", "data": [["20 minutes", "SnoozeTime"]], "confidence": 0.5, "match": "20 minutes"}], "start_token": 3, "match": "20 minutes"}, {"end_token": 3, "key": "20", "entities": [{"key": "20", "data": [["20", "Which"]], "confidence": 0.5, "match": "20"}], "start_token": 3, "match": "20"}, {"end_token": 3, "key": "20", "entities": [{"key": "20", "data": [["20", "Which"]], "confidence": 0.5, "match": "20"}], "start_token": 3, "match": "20"}, {"end_token": 0, "key": "snooze", "entities": [{"key": "snooze", "data": [["snooze", "SnoozeKeyword"]], "confidence": 1.0, "match": "snooze"}], "start_token": 0, "match": "snooze"}, {"end_token": 2, "key": "for", "entities": [{"key": "for", "data": [["for", "SnoozeFiller"]], "confidence": 1.0, "match": "for"}], "start_token": 2, "match": "for"}]
# just asserting that the sort does not crash in py3
self.tagger._sort_and_merge_tags(repro_payload)
示例10: setUp
def setUp(self):
self.tokenizer = EnglishTokenizer()
self.trie = Trie(max_edit_distance=2)
self.trie.insert("x-play", "Television Show")
self.trie.insert("play", "Play Verb")
self.trie.insert("play season", "Time Period")
self.trie.insert("play", "Player Control")
self.trie.insert("season", "Season Prefix")
self.trie.insert("1", "Number")
self.trie.insert("the big bang theory", "Television Show")
self.trie.insert("the big", "Television Show")
self.trie.insert("big bang", "event")
self.trie.insert("bang theory", "Scientific Theory")
self.tagger = EntityTagger(self.trie, self.tokenizer)
示例11: test_edit_distance
def test_edit_distance(self):
trie = Trie(max_edit_distance=1)
trie.insert("restaurant")
results = list(trie.lookup("restauran"))
assert len(results) == 1
results = list(trie.lookup("estaurant"))
assert len(results) == 1
results = list(trie.lookup("estauran"))
assert len(results) == 0
示例12: test_simple_remove
def test_simple_remove(self):
trie = Trie()
trie.insert("1", "Number")
results = list(trie.lookup("1"))
assert len(results) == 1
assert len(results[0].get('data')) == 1
assert trie.remove("1")
results = list(trie.lookup("1"))
assert len(results) == 0
示例13: test_intent_with_regex_entity
def test_intent_with_regex_entity(self):
self.trie = Trie()
self.tagger = EntityTagger(self.trie, self.tokenizer, self.regex_entities)
self.parser = Parser(self.tokenizer, self.tagger)
self.trie.insert("theory", ("theory", "Concept"))
regex = re.compile(r"the (?P<Event>.*)")
self.regex_entities.append(regex)
intent = IntentBuilder("mock intent")\
.require("Event")\
.require("Concept").build()
for result in self.parser.parse("the big bang theory"):
result_intent = intent.validate(result.get('tags'), result.get('confidence'))
assert result_intent.get('confidence') > 0.0
assert result_intent.get('Event') == 'big bang'
assert result_intent.get('Concept') == "theory"
示例14: test_edit_distance_confidence
def test_edit_distance_confidence(self):
trie = Trie(max_edit_distance=2)
trie.insert("a")
trie.insert("bb")
trie.insert("ccc")
trie.insert("dddd")
trie.insert("100")
results = list(trie.gather("b"))
assert len(results) == 1
assert results[0].get('confidence') == 0.5
results = list(trie.gather("1 of"))
assert len(results) == 3
示例15: test_basic_retrieval
def test_basic_retrieval(self):
trie = Trie()
trie.insert("restaurant")
results = list(trie.lookup("restaurant"))
assert len(results) == 1