當前位置: 首頁>>代碼示例>>Python>>正文


Python trie.Trie類代碼示例

本文整理匯總了Python中adapt.tools.text.trie.Trie的典型用法代碼示例。如果您正苦於以下問題:Python Trie類的具體用法?Python Trie怎麽用?Python Trie使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


在下文中一共展示了Trie類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: parse

    def parse(self, utterance, context=None, N=1):
        """

        :param utterance:
        :param context: a list of entities
        :param N:
        :return:
        """
        start = time.time()
        context_trie = None
        if context and isinstance(context, list):
            # sort by confidence in ascending order, so
            # highest confidence for an entity is last.
            # see comment on TrieNode ctor
            context.sort(key=lambda x: x.get('confidence'))

            context_trie = Trie()
            for entity in context:
                entity_value, entity_type = entity.get('data')[0]
                context_trie.insert(entity_value.lower(),
                                    data=(entity_value, entity_type),
                                    weight=entity.get('confidence'))

        tagged = self._tagger.tag(utterance.lower(), context_trie=context_trie)
        self.emit("tagged_entities",
                  {
                      'utterance': utterance,
                      'tags': list(tagged),
                      'time': time.time() - start
                  })
        start = time.time()
        bke = BronKerboschExpander(self._tokenizer)

        def score_clique(clique):
            score = 0.0
            for tagged_entity in clique:
                ec = tagged_entity.get('entities', [{'confidence': 0.0}])[0].get('confidence')
                score += ec * len(tagged_entity.get('entities', [{'match': ''}])[0].get('match')) / (
                    len(utterance) + 1)
            return score

        parse_results = bke.expand(tagged, clique_scoring_func=score_clique)
        count = 0
        for result in parse_results:
            count += 1
            parse_confidence = 0.0
            for tag in result:
                sample_entity = tag['entities'][0]
                entity_confidence = sample_entity.get('confidence', 0.0) * float(
                    len(sample_entity.get('match'))) / len(utterance)
                parse_confidence += entity_confidence
            yield {
                'utterance': utterance,
                'tags': result,
                'time': time.time() - start,
                'confidence': parse_confidence
            }

            if count >= N:
                break
開發者ID:MycroftAI,項目名稱:adapt,代碼行數:60,代碼來源:parser.py

示例2: test_data_is_correct_on_insert

 def test_data_is_correct_on_insert(self):
     trie = Trie()
     trie.insert("restaurant", "Concept")
     results = list(trie.lookup("restaurant"))
     assert len(results) == 1
     assert len(results[0].get('data')) == 1
     data = list(results[0].get('data'))
     assert data[0] == 'Concept'
開發者ID:Ace139,項目名稱:adapt,代碼行數:8,代碼來源:TrieTest.py

示例3: tag

    def tag(self, utterance):
        """
        Tag known entities within the utterance.

        :param utterance: a string of natural language text

        :return: dictionary, with the following keys

        match: str - the proper entity matched

        key: str - the string that was matched to the entity

        start_token: int - 0-based index of the first token matched

        end_token: int - 0-based index of the last token matched

        entities: list - a list of entity kinds as strings (Ex: Artist, Location)
        """
        tokens = self.tokenizer.tokenize(utterance)
        entities = []
        if len(self.regex_entities) > 0:
            for part, idx in self._iterate_subsequences(tokens):
                local_trie = Trie()
                for regex_entity in self.regex_entities:
                    match = regex_entity.match(part)
                    groups = match.groupdict() if match else {}
                    for key in list(groups):
                        match_str = groups.get(key)
                        local_trie.insert(match_str, key)
                sub_tagger = EntityTagger(local_trie, self.tokenizer, max_tokens=self.max_tokens)
                for sub_entity in sub_tagger.tag(part):
                    sub_entity['start_token'] += idx
                    sub_entity['end_token'] += idx
                    for e in sub_entity['entities']:
                        e['confidence'] = 0.5
                    entities.append(sub_entity)
        additional_sort = len(entities) > 0

        for i in xrange(len(tokens)):
            part = ' '.join(tokens[i:])

            for new_entity in self.trie.gather(part):
                new_entity['data'] = list(new_entity['data'])
                entities.append({
                    'match': new_entity.get('match'),
                    'key': new_entity.get('key'),
                    'start_token': i,
                    'entities': [new_entity],
                    'end_token': i + len(self.tokenizer.tokenize(new_entity.get('match'))) - 1
                })

        if additional_sort:
            entities = self._sort_and_merge_tags(entities)

        return entities
開發者ID:paulscott56,項目名稱:adapt,代碼行數:55,代碼來源:entity_tagger.py

示例4: test_retrieval_based_on_insertion_order

 def test_retrieval_based_on_insertion_order(self):
     trie = Trie()
     trie.insert("rest")
     trie.insert("restaurant")
     results = list(trie.lookup("rest"))
     assert len(results) == 1
     results = list(trie.lookup("restaurant"))
     assert len(results) == 1
開發者ID:Ace139,項目名稱:adapt,代碼行數:8,代碼來源:TrieTest.py

示例5: test_named_remove

    def test_named_remove(self):
        trie = Trie()
        trie.insert("1", "Number")
        trie.insert("1", "The Loneliest")
        results = list(trie.lookup("1"))
        assert len(results) == 1
        assert len(results[0].get('data')) == 2

        assert trie.remove("1", "Number")
        results = list(trie.lookup("1"))
        assert len(results) == 1
        assert len(results[0].get('data')) == 1
開發者ID:Ace139,項目名稱:adapt,代碼行數:12,代碼來源:TrieTest.py

示例6: setUp

 def setUp(self):
     self.trie = Trie()
     self.tokenizer = EnglishTokenizer()
     self.regex_entities = []
     self.tagger = EntityTagger(self.trie, self.tokenizer, regex_entities=self.regex_entities)
     self.trie.insert("play", "PlayVerb")
     self.trie.insert("the big bang theory", "Television Show")
     self.trie.insert("the big", "Not a Thing")
     self.trie.insert("barenaked ladies", "Radio Station")
     self.parser = Parser(self.tokenizer, self.tagger)
開發者ID:stuartskelton,項目名稱:adapt,代碼行數:10,代碼來源:IntentTest.py

示例7: EntityTaggerTest

class EntityTaggerTest(unittest.TestCase):

    def setUp(self):
        self.trie = Trie()
        self.tagger = EntityTagger(self.trie, EnglishTokenizer())
        self.trie.insert("play", "PlayVerb")
        self.trie.insert("the big bang theory", "Television Show")
        self.trie.insert("the big", "Not a Thing")

    def tearDown(self):
        pass

    def test_tag(self):
        tags = list(self.tagger.tag("play season 1 of the big bang theory"))
        assert len(tags) == 3

    def test_regex_tag(self):
        regex = re.compile(r"the (?P<Event>\w+\s\w+) theory")
        tagger = EntityTagger(self.trie, EnglishTokenizer(), regex_entities=[regex])
        tags = tagger.tag("the big bang theory")
        assert len(tags) == 3
        event_tags = [tag for tag in tags if tag.get('match') == 'big bang']
        assert len(event_tags) == 1
        assert len(event_tags[0].get('entities')) == 1
        assert len(event_tags[0].get('entities')[0].get('data')) == 1
        assert 'Event' in event_tags[0].get('entities')[0].get('data')
開發者ID:stuartskelton,項目名稱:adapt,代碼行數:26,代碼來源:EntityTaggerTest.py

示例8: test_gather

 def test_gather(self):
     trie = Trie()
     trie.insert("rest")
     trie.insert("restaurant")
     results = list(trie.gather("restaurant"))
     assert len(results) == 1
     assert results[0].get('key') == "restaurant"
開發者ID:Ace139,項目名稱:adapt,代碼行數:7,代碼來源:TrieTest.py

示例9: EntityTaggerTest

class EntityTaggerTest(unittest.TestCase):

    def setUp(self):
        self.trie = Trie()
        self.tagger = EntityTagger(self.trie, EnglishTokenizer())
        self.trie.insert("play", "PlayVerb")
        self.trie.insert("the big bang theory", "Television Show")
        self.trie.insert("the big", "Not a Thing")

    def tearDown(self):
        pass

    def test_tag(self):
        tags = list(self.tagger.tag("play season 1 of the big bang theory"))
        assert len(tags) == 3

    def test_regex_tag(self):
        regex = re.compile(r"the (?P<Event>\w+\s\w+) theory")
        tagger = EntityTagger(self.trie, EnglishTokenizer(), regex_entities=[regex])
        tags = tagger.tag("the big bang theory")
        assert len(tags) == 3
        event_tags = [tag for tag in tags if tag.get('match') == 'big bang']
        assert len(event_tags) == 1
        assert len(event_tags[0].get('entities')) == 1
        assert len(event_tags[0].get('entities')[0].get('data')) == 1
        assert ('big bang', 'Event') in event_tags[0].get('entities')[0].get('data')

    def test_start_end_token_match_when_sorting_tagged_entities(self):
        repro_payload = [{"end_token": 1, "key": "1", "entities": [{"key": "1", "data": [["1", "Which"]], "confidence": 0.5, "match": "1"}], "start_token": 1, "match": "1"}, {"end_token": 1, "key": "1", "entities": [{"key": "1", "data": [["1", "Which"]], "confidence": 0.5, "match": "1"}], "start_token": 1, "match": "1"}, {"end_token": 1, "key": "1", "entities": [{"key": "1", "data": [["1", "Which"]], "confidence": 0.5, "match": "1"}], "start_token": 1, "match": "1"}, {"end_token": 1, "key": "1", "entities": [{"key": "1", "data": [["1", "Which"]], "confidence": 0.5, "match": "1"}], "start_token": 1, "match": "1"}, {"end_token": 3, "key": "20", "entities": [{"key": "20", "data": [["20", "SnoozeTime"]], "confidence": 0.5, "match": "20"}], "start_token": 3, "match": "20"}, {"end_token": 4, "key": "20 minutes", "entities": [{"key": "20 minutes", "data": [["20 minutes", "SnoozeTime"]], "confidence": 0.5, "match": "20 minutes"}], "start_token": 3, "match": "20 minutes"}, {"end_token": 3, "key": "20", "entities": [{"key": "20", "data": [["20", "Which"]], "confidence": 0.5, "match": "20"}], "start_token": 3, "match": "20"}, {"end_token": 3, "key": "20", "entities": [{"key": "20", "data": [["20", "Which"]], "confidence": 0.5, "match": "20"}], "start_token": 3, "match": "20"}, {"end_token": 0, "key": "snooze", "entities": [{"key": "snooze", "data": [["snooze", "SnoozeKeyword"]], "confidence": 1.0, "match": "snooze"}], "start_token": 0, "match": "snooze"}, {"end_token": 2, "key": "for", "entities": [{"key": "for", "data": [["for", "SnoozeFiller"]], "confidence": 1.0, "match": "for"}], "start_token": 2, "match": "for"}]
        # just asserting that the sort does not crash in py3
        self.tagger._sort_and_merge_tags(repro_payload)
開發者ID:Ace139,項目名稱:adapt,代碼行數:31,代碼來源:EntityTaggerTest.py

示例10: setUp

 def setUp(self):
     self.tokenizer = EnglishTokenizer()
     self.trie = Trie(max_edit_distance=2)
     self.trie.insert("x-play", "Television Show")
     self.trie.insert("play", "Play Verb")
     self.trie.insert("play season", "Time Period")
     self.trie.insert("play", "Player Control")
     self.trie.insert("season", "Season Prefix")
     self.trie.insert("1", "Number")
     self.trie.insert("the big bang theory", "Television Show")
     self.trie.insert("the big", "Television Show")
     self.trie.insert("big bang", "event")
     self.trie.insert("bang theory", "Scientific Theory")
     self.tagger = EntityTagger(self.trie, self.tokenizer)
開發者ID:Ace139,項目名稱:adapt,代碼行數:14,代碼來源:EntityExpanderTest.py

示例11: test_edit_distance

 def test_edit_distance(self):
     trie = Trie(max_edit_distance=1)
     trie.insert("restaurant")
     results = list(trie.lookup("restauran"))
     assert len(results) == 1
     results = list(trie.lookup("estaurant"))
     assert len(results) == 1
     results = list(trie.lookup("estauran"))
     assert len(results) == 0
開發者ID:Ace139,項目名稱:adapt,代碼行數:9,代碼來源:TrieTest.py

示例12: test_simple_remove

    def test_simple_remove(self):
        trie = Trie()
        trie.insert("1", "Number")
        results = list(trie.lookup("1"))
        assert len(results) == 1
        assert len(results[0].get('data')) == 1

        assert trie.remove("1")
        results = list(trie.lookup("1"))
        assert len(results) == 0
開發者ID:Ace139,項目名稱:adapt,代碼行數:10,代碼來源:TrieTest.py

示例13: test_intent_with_regex_entity

    def test_intent_with_regex_entity(self):
        self.trie = Trie()
        self.tagger = EntityTagger(self.trie, self.tokenizer, self.regex_entities)
        self.parser = Parser(self.tokenizer, self.tagger)
        self.trie.insert("theory", ("theory", "Concept"))
        regex = re.compile(r"the (?P<Event>.*)")
        self.regex_entities.append(regex)
        intent = IntentBuilder("mock intent")\
            .require("Event")\
            .require("Concept").build()

        for result in self.parser.parse("the big bang theory"):
            result_intent = intent.validate(result.get('tags'), result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('Event') == 'big bang'
            assert result_intent.get('Concept') == "theory"
開發者ID:amitrai1095,項目名稱:adapt,代碼行數:16,代碼來源:IntentTest.py

示例14: test_edit_distance_confidence

 def test_edit_distance_confidence(self):
     trie = Trie(max_edit_distance=2)
     trie.insert("a")
     trie.insert("bb")
     trie.insert("ccc")
     trie.insert("dddd")
     trie.insert("100")
     results = list(trie.gather("b"))
     assert len(results) == 1
     assert results[0].get('confidence') == 0.5
     results = list(trie.gather("1 of"))
     assert len(results) == 3
開發者ID:Ace139,項目名稱:adapt,代碼行數:12,代碼來源:TrieTest.py

示例15: test_basic_retrieval

 def test_basic_retrieval(self):
     trie = Trie()
     trie.insert("restaurant")
     results = list(trie.lookup("restaurant"))
     assert len(results) == 1
開發者ID:Ace139,項目名稱:adapt,代碼行數:5,代碼來源:TrieTest.py


注:本文中的adapt.tools.text.trie.Trie類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。