当前位置: 首页>>代码示例>>Python>>正文


Python Trie.insert方法代码示例

本文整理汇总了Python中adapt.tools.text.trie.Trie.insert方法的典型用法代码示例。如果您正苦于以下问题:Python Trie.insert方法的具体用法?Python Trie.insert怎么用?Python Trie.insert使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在adapt.tools.text.trie.Trie的用法示例。


在下文中一共展示了Trie.insert方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_gather

# 需要导入模块: from adapt.tools.text.trie import Trie [as 别名]
# 或者: from adapt.tools.text.trie.Trie import insert [as 别名]
 def test_gather(self):
     trie = Trie()
     trie.insert("rest")
     trie.insert("restaurant")
     results = list(trie.gather("restaurant"))
     assert len(results) == 1
     assert results[0].get('key') == "restaurant"
开发者ID:Ace139,项目名称:adapt,代码行数:9,代码来源:TrieTest.py

示例2: parse

# 需要导入模块: from adapt.tools.text.trie import Trie [as 别名]
# 或者: from adapt.tools.text.trie.Trie import insert [as 别名]
    def parse(self, utterance, context=None, N=1):
        """

        :param utterance:
        :param context: a list of entities
        :param N:
        :return:
        """
        start = time.time()
        context_trie = None
        if context and isinstance(context, list):
            # sort by confidence in ascending order, so
            # highest confidence for an entity is last.
            # see comment on TrieNode ctor
            context.sort(key=lambda x: x.get('confidence'))

            context_trie = Trie()
            for entity in context:
                entity_value, entity_type = entity.get('data')[0]
                context_trie.insert(entity_value.lower(),
                                    data=(entity_value, entity_type),
                                    weight=entity.get('confidence'))

        tagged = self._tagger.tag(utterance.lower(), context_trie=context_trie)
        self.emit("tagged_entities",
                  {
                      'utterance': utterance,
                      'tags': list(tagged),
                      'time': time.time() - start
                  })
        start = time.time()
        bke = BronKerboschExpander(self._tokenizer)

        def score_clique(clique):
            score = 0.0
            for tagged_entity in clique:
                ec = tagged_entity.get('entities', [{'confidence': 0.0}])[0].get('confidence')
                score += ec * len(tagged_entity.get('entities', [{'match': ''}])[0].get('match')) / (
                    len(utterance) + 1)
            return score

        parse_results = bke.expand(tagged, clique_scoring_func=score_clique)
        count = 0
        for result in parse_results:
            count += 1
            parse_confidence = 0.0
            for tag in result:
                sample_entity = tag['entities'][0]
                entity_confidence = sample_entity.get('confidence', 0.0) * float(
                    len(sample_entity.get('match'))) / len(utterance)
                parse_confidence += entity_confidence
            yield {
                'utterance': utterance,
                'tags': result,
                'time': time.time() - start,
                'confidence': parse_confidence
            }

            if count >= N:
                break
开发者ID:MycroftAI,项目名称:adapt,代码行数:62,代码来源:parser.py

示例3: test_retrieval_based_on_insertion_order

# 需要导入模块: from adapt.tools.text.trie import Trie [as 别名]
# 或者: from adapt.tools.text.trie.Trie import insert [as 别名]
 def test_retrieval_based_on_insertion_order(self):
     trie = Trie()
     trie.insert("rest")
     trie.insert("restaurant")
     results = list(trie.lookup("rest"))
     assert len(results) == 1
     results = list(trie.lookup("restaurant"))
     assert len(results) == 1
开发者ID:Ace139,项目名称:adapt,代码行数:10,代码来源:TrieTest.py

示例4: test_data_is_correct_on_insert

# 需要导入模块: from adapt.tools.text.trie import Trie [as 别名]
# 或者: from adapt.tools.text.trie.Trie import insert [as 别名]
 def test_data_is_correct_on_insert(self):
     trie = Trie()
     trie.insert("restaurant", "Concept")
     results = list(trie.lookup("restaurant"))
     assert len(results) == 1
     assert len(results[0].get('data')) == 1
     data = list(results[0].get('data'))
     assert data[0] == 'Concept'
开发者ID:Ace139,项目名称:adapt,代码行数:10,代码来源:TrieTest.py

示例5: test_edit_distance

# 需要导入模块: from adapt.tools.text.trie import Trie [as 别名]
# 或者: from adapt.tools.text.trie.Trie import insert [as 别名]
 def test_edit_distance(self):
     trie = Trie(max_edit_distance=1)
     trie.insert("restaurant")
     results = list(trie.lookup("restauran"))
     assert len(results) == 1
     results = list(trie.lookup("estaurant"))
     assert len(results) == 1
     results = list(trie.lookup("estauran"))
     assert len(results) == 0
开发者ID:Ace139,项目名称:adapt,代码行数:11,代码来源:TrieTest.py

示例6: test_simple_remove

# 需要导入模块: from adapt.tools.text.trie import Trie [as 别名]
# 或者: from adapt.tools.text.trie.Trie import insert [as 别名]
    def test_simple_remove(self):
        trie = Trie()
        trie.insert("1", "Number")
        results = list(trie.lookup("1"))
        assert len(results) == 1
        assert len(results[0].get('data')) == 1

        assert trie.remove("1")
        results = list(trie.lookup("1"))
        assert len(results) == 0
开发者ID:Ace139,项目名称:adapt,代码行数:12,代码来源:TrieTest.py

示例7: tag

# 需要导入模块: from adapt.tools.text.trie import Trie [as 别名]
# 或者: from adapt.tools.text.trie.Trie import insert [as 别名]
    def tag(self, utterance):
        """
        Tag known entities within the utterance.

        :param utterance: a string of natural language text

        :return: dictionary, with the following keys

        match: str - the proper entity matched

        key: str - the string that was matched to the entity

        start_token: int - 0-based index of the first token matched

        end_token: int - 0-based index of the last token matched

        entities: list - a list of entity kinds as strings (Ex: Artist, Location)
        """
        tokens = self.tokenizer.tokenize(utterance)
        entities = []
        if len(self.regex_entities) > 0:
            for part, idx in self._iterate_subsequences(tokens):
                local_trie = Trie()
                for regex_entity in self.regex_entities:
                    match = regex_entity.match(part)
                    groups = match.groupdict() if match else {}
                    for key in list(groups):
                        match_str = groups.get(key)
                        local_trie.insert(match_str, key)
                sub_tagger = EntityTagger(local_trie, self.tokenizer, max_tokens=self.max_tokens)
                for sub_entity in sub_tagger.tag(part):
                    sub_entity['start_token'] += idx
                    sub_entity['end_token'] += idx
                    for e in sub_entity['entities']:
                        e['confidence'] = 0.5
                    entities.append(sub_entity)
        additional_sort = len(entities) > 0

        for i in xrange(len(tokens)):
            part = ' '.join(tokens[i:])

            for new_entity in self.trie.gather(part):
                new_entity['data'] = list(new_entity['data'])
                entities.append({
                    'match': new_entity.get('match'),
                    'key': new_entity.get('key'),
                    'start_token': i,
                    'entities': [new_entity],
                    'end_token': i + len(self.tokenizer.tokenize(new_entity.get('match'))) - 1
                })

        if additional_sort:
            entities = self._sort_and_merge_tags(entities)

        return entities
开发者ID:paulscott56,项目名称:adapt,代码行数:57,代码来源:entity_tagger.py

示例8: test_named_remove

# 需要导入模块: from adapt.tools.text.trie import Trie [as 别名]
# 或者: from adapt.tools.text.trie.Trie import insert [as 别名]
    def test_named_remove(self):
        trie = Trie()
        trie.insert("1", "Number")
        trie.insert("1", "The Loneliest")
        results = list(trie.lookup("1"))
        assert len(results) == 1
        assert len(results[0].get('data')) == 2

        assert trie.remove("1", "Number")
        results = list(trie.lookup("1"))
        assert len(results) == 1
        assert len(results[0].get('data')) == 1
开发者ID:Ace139,项目名称:adapt,代码行数:14,代码来源:TrieTest.py

示例9: test_edit_distance_confidence

# 需要导入模块: from adapt.tools.text.trie import Trie [as 别名]
# 或者: from adapt.tools.text.trie.Trie import insert [as 别名]
 def test_edit_distance_confidence(self):
     trie = Trie(max_edit_distance=2)
     trie.insert("a")
     trie.insert("bb")
     trie.insert("ccc")
     trie.insert("dddd")
     trie.insert("100")
     results = list(trie.gather("b"))
     assert len(results) == 1
     assert results[0].get('confidence') == 0.5
     results = list(trie.gather("1 of"))
     assert len(results) == 3
开发者ID:Ace139,项目名称:adapt,代码行数:14,代码来源:TrieTest.py

示例10: EntityTaggerTest

# 需要导入模块: from adapt.tools.text.trie import Trie [as 别名]
# 或者: from adapt.tools.text.trie.Trie import insert [as 别名]
class EntityTaggerTest(unittest.TestCase):

    def setUp(self):
        self.trie = Trie()
        self.tagger = EntityTagger(self.trie, EnglishTokenizer())
        self.trie.insert("play", "PlayVerb")
        self.trie.insert("the big bang theory", "Television Show")
        self.trie.insert("the big", "Not a Thing")

    def tearDown(self):
        pass

    def test_tag(self):
        tags = list(self.tagger.tag("play season 1 of the big bang theory"))
        assert len(tags) == 3

    def test_regex_tag(self):
        regex = re.compile(r"the (?P<Event>\w+\s\w+) theory")
        tagger = EntityTagger(self.trie, EnglishTokenizer(), regex_entities=[regex])
        tags = tagger.tag("the big bang theory")
        assert len(tags) == 3
        event_tags = [tag for tag in tags if tag.get('match') == 'big bang']
        assert len(event_tags) == 1
        assert len(event_tags[0].get('entities')) == 1
        assert len(event_tags[0].get('entities')[0].get('data')) == 1
        assert 'Event' in event_tags[0].get('entities')[0].get('data')
开发者ID:stuartskelton,项目名称:adapt,代码行数:28,代码来源:EntityTaggerTest.py

示例11: EntityTaggerTest

# 需要导入模块: from adapt.tools.text.trie import Trie [as 别名]
# 或者: from adapt.tools.text.trie.Trie import insert [as 别名]
class EntityTaggerTest(unittest.TestCase):

    def setUp(self):
        self.trie = Trie()
        self.tagger = EntityTagger(self.trie, EnglishTokenizer())
        self.trie.insert("play", "PlayVerb")
        self.trie.insert("the big bang theory", "Television Show")
        self.trie.insert("the big", "Not a Thing")

    def tearDown(self):
        pass

    def test_tag(self):
        tags = list(self.tagger.tag("play season 1 of the big bang theory"))
        assert len(tags) == 3

    def test_regex_tag(self):
        regex = re.compile(r"the (?P<Event>\w+\s\w+) theory")
        tagger = EntityTagger(self.trie, EnglishTokenizer(), regex_entities=[regex])
        tags = tagger.tag("the big bang theory")
        assert len(tags) == 3
        event_tags = [tag for tag in tags if tag.get('match') == 'big bang']
        assert len(event_tags) == 1
        assert len(event_tags[0].get('entities')) == 1
        assert len(event_tags[0].get('entities')[0].get('data')) == 1
        assert ('big bang', 'Event') in event_tags[0].get('entities')[0].get('data')

    def test_start_end_token_match_when_sorting_tagged_entities(self):
        repro_payload = [{"end_token": 1, "key": "1", "entities": [{"key": "1", "data": [["1", "Which"]], "confidence": 0.5, "match": "1"}], "start_token": 1, "match": "1"}, {"end_token": 1, "key": "1", "entities": [{"key": "1", "data": [["1", "Which"]], "confidence": 0.5, "match": "1"}], "start_token": 1, "match": "1"}, {"end_token": 1, "key": "1", "entities": [{"key": "1", "data": [["1", "Which"]], "confidence": 0.5, "match": "1"}], "start_token": 1, "match": "1"}, {"end_token": 1, "key": "1", "entities": [{"key": "1", "data": [["1", "Which"]], "confidence": 0.5, "match": "1"}], "start_token": 1, "match": "1"}, {"end_token": 3, "key": "20", "entities": [{"key": "20", "data": [["20", "SnoozeTime"]], "confidence": 0.5, "match": "20"}], "start_token": 3, "match": "20"}, {"end_token": 4, "key": "20 minutes", "entities": [{"key": "20 minutes", "data": [["20 minutes", "SnoozeTime"]], "confidence": 0.5, "match": "20 minutes"}], "start_token": 3, "match": "20 minutes"}, {"end_token": 3, "key": "20", "entities": [{"key": "20", "data": [["20", "Which"]], "confidence": 0.5, "match": "20"}], "start_token": 3, "match": "20"}, {"end_token": 3, "key": "20", "entities": [{"key": "20", "data": [["20", "Which"]], "confidence": 0.5, "match": "20"}], "start_token": 3, "match": "20"}, {"end_token": 0, "key": "snooze", "entities": [{"key": "snooze", "data": [["snooze", "SnoozeKeyword"]], "confidence": 1.0, "match": "snooze"}], "start_token": 0, "match": "snooze"}, {"end_token": 2, "key": "for", "entities": [{"key": "for", "data": [["for", "SnoozeFiller"]], "confidence": 1.0, "match": "for"}], "start_token": 2, "match": "for"}]
        # just asserting that the sort does not crash in py3
        self.tagger._sort_and_merge_tags(repro_payload)
开发者ID:Ace139,项目名称:adapt,代码行数:33,代码来源:EntityTaggerTest.py

示例12: BronKerboschExpanderTest

# 需要导入模块: from adapt.tools.text.trie import Trie [as 别名]
# 或者: from adapt.tools.text.trie.Trie import insert [as 别名]
class BronKerboschExpanderTest(unittest.TestCase):
    def setUp(self):
        self.tokenizer = EnglishTokenizer()
        self.trie = Trie(max_edit_distance=2)
        self.trie.insert("x-play", "Television Show")
        self.trie.insert("play", "Play Verb")
        self.trie.insert("play season", "Time Period")
        self.trie.insert("play", "Player Control")
        self.trie.insert("season", "Season Prefix")
        self.trie.insert("1", "Number")
        self.trie.insert("the big bang theory", "Television Show")
        self.trie.insert("the big", "Television Show")
        self.trie.insert("big bang", "event")
        self.trie.insert("bang theory", "Scientific Theory")
        self.tagger = EntityTagger(self.trie, self.tokenizer)

    def testExpander(self):
        self.tagger.trie.max_edit_distance = 0
        tags = self.tagger.tag("play season 1 of the big bang theory")
        expander = BronKerboschExpander(self.tokenizer)
        parse_results = list(expander.expand(tags))
        assert len(parse_results) == 6

    def testExpandedResult(self):
        tags = self.tagger.tag("season 1")
        expander = BronKerboschExpander(self.tokenizer)
        parse_results = list(expander.expand(tags))
        assert len(parse_results) == 1
        assert len(parse_results[0]) == 2


    def testConsistentExpandWithSameOverlapMultipleTimes(self):
        """
        example: play season 1 of the big bang theory play season one of the big bang theory
        series should contain two instances of the big bang theory
        :return:
        """
        utterance = "play season 1 of the big bang theory"
        tags = self.tagger.tag(utterance)

        def score_clique(clique):
            score = 0.0
            for tagged_entity in clique:
                ec = tagged_entity.get('entities', [{'confidence': 0.0}])[0].get('confidence')
                score += ec * len(tagged_entity.get('entities', [{'match': ''}])[0].get('match')) / (
                    len(utterance) + 1)
            return score
        expander = BronKerboschExpander(self.tokenizer)
        parse_results = list(expander.expand(tags, clique_scoring_func=score_clique))
        assert len(parse_results) == 6
        result_text = ' '.join([tag.get('entities')[0].get('key') for tag in parse_results[0]])
        result_parse = ', '.join(
            [tag.get('entities')[0].get('data')[0][1] for tag in parse_results[0]]
        )

        assert result_text == 'play season 1 the big bang theory'

    def testExpandWithRegexAndLiteralTokenMatch(self):
        # two tags for the same token, different confidence, should expand to two cliques
        tags = [{'end_token': 0, 'start_token': 0, 'key': u'spell', 'match': u'spell',
                 'entities': [{'confidence': 0.5, 'data': [u'SearchTerms'], 'match': u'spell', 'key': u'spell'}]},
                {'end_token': 0, 'start_token': 0, 'key': u'spell', 'match': u'spell',
                 'entities': [{'confidence': 1.0, 'data': [u'SpellingKeyword'], 'match': u'spell', 'key': u'spell'}]}]

        expander = BronKerboschExpander(self.tokenizer)

        cliques = list(expander._sub_expand(tags))
        assert len(cliques) == 2
开发者ID:Ace139,项目名称:adapt,代码行数:70,代码来源:EntityExpanderTest.py

示例13: test_basic_retrieval

# 需要导入模块: from adapt.tools.text.trie import Trie [as 别名]
# 或者: from adapt.tools.text.trie.Trie import insert [as 别名]
 def test_basic_retrieval(self):
     trie = Trie()
     trie.insert("restaurant")
     results = list(trie.lookup("restaurant"))
     assert len(results) == 1
开发者ID:Ace139,项目名称:adapt,代码行数:7,代码来源:TrieTest.py

示例14: tag

# 需要导入模块: from adapt.tools.text.trie import Trie [as 别名]
# 或者: from adapt.tools.text.trie.Trie import insert [as 别名]
    def tag(self, utterance, context_trie=None):
        """
        Tag known entities within the utterance.
        Args:
            utterance(str): a string of natural language text
            context_trie(trie): optional, a trie containing only entities from context
                for this request

        Returns: dictionary, with the following keys
            match(str): the proper entity matched
            key(str): the string that was matched to the entity
            start_token(int): 0-based index of the first token matched
            end_token(int): 0-based index of the last token matched
            entities(list): a list of entity kinds as strings (Ex: Artist, Location)
        """
        tokens = self.tokenizer.tokenize(utterance)
        entities = []
        if len(self.regex_entities) > 0:
            for part, idx in self._iterate_subsequences(tokens):
                local_trie = Trie()
                for regex_entity in self.regex_entities:
                    match = regex_entity.match(part)
                    groups = match.groupdict() if match else {}
                    for key in list(groups):
                        match_str = groups.get(key)
                        local_trie.insert(match_str, (match_str, key))
                sub_tagger = EntityTagger(local_trie, self.tokenizer, max_tokens=self.max_tokens)
                for sub_entity in sub_tagger.tag(part):
                    sub_entity['start_token'] += idx
                    sub_entity['end_token'] += idx
                    for e in sub_entity['entities']:
                        e['confidence'] = 0.5
                    entities.append(sub_entity)
        additional_sort = len(entities) > 0

        context_entities = []
        for i in xrange(len(tokens)):
            part = ' '.join(tokens[i:])

            for new_entity in self.trie.gather(part):
                new_entity['data'] = list(new_entity['data'])
                entities.append({
                    'match': new_entity.get('match'),
                    'key': new_entity.get('key'),
                    'start_token': i,
                    'entities': [new_entity],
                    'end_token': i + len(self.tokenizer.tokenize(new_entity.get('match'))) - 1,
                    'from_context': False
                })

            if context_trie:
                for new_entity in context_trie.gather(part):
                    new_entity['data'] = list(new_entity['data'])
                    new_entity['confidence'] *= 2.0  # context entities get double the weight!
                    context_entities.append({
                        'match': new_entity.get('match'),
                        'key': new_entity.get('key'),
                        'start_token': i,
                        'entities': [new_entity],
                        'end_token': i + len(self.tokenizer.tokenize(new_entity.get('match'))) - 1,
                        'from_context': True
                    })

        additional_sort = additional_sort or len(entities) > 0

        if additional_sort:
            entities = self._sort_and_merge_tags(entities + context_entities)

        return entities
开发者ID:amitrai1095,项目名称:adapt,代码行数:71,代码来源:entity_tagger.py

示例15: test_retrieval_of_multi_word_entity

# 需要导入模块: from adapt.tools.text.trie import Trie [as 别名]
# 或者: from adapt.tools.text.trie.Trie import insert [as 别名]
 def test_retrieval_of_multi_word_entity(self):
     trie = Trie()
     trie.insert("play", "PlayVerb")
     trie.insert("the big bang theory", "Television Series")
     results = list(trie.gather("1 of the big bang theory"))
     assert len(results) == 0
开发者ID:Ace139,项目名称:adapt,代码行数:8,代码来源:TrieTest.py


注:本文中的adapt.tools.text.trie.Trie.insert方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。