当前位置: 首页>>代码示例>>Python>>正文


Python Trie.insert_file方法代码示例

本文整理汇总了Python中trie.Trie.insert_file方法的典型用法代码示例。如果您正苦于以下问题:Python Trie.insert_file方法的具体用法?Python Trie.insert_file怎么用?Python Trie.insert_file使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在trie.Trie的用法示例。


在下文中一共展示了Trie.insert_file方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: WordSegment

# 需要导入模块: from trie import Trie [as 别名]
# 或者: from trie.Trie import insert_file [as 别名]
class WordSegment(object):
    def __init__(self, dict_path):
        '''
           dict_path is the user words dict's path, trie is datastruct to store words of dict,
           trie provide get the max prob cut route of the sentence
        '''
        self.trie_model = Trie(dict_path)

    def get_continuos_singe(self, word_list):
        result = []
        start_flag = False
        count_single = 0
        start_idx = 0
        for idx, word in enumerate(word_list):
            if len(word) == 1 and not start_flag:
                start_flag = True
                start_idx = idx
                count_single += 1
            elif len(word) == 1 and start_flag:
                count_single += 1
            elif len(word) != 1 and start_flag:
                if count_single >= 2: result.append((start_idx, start_idx+count_single))
                start_flag = False
                count_single = 0
            else:
                pass
        return result

    @utils.trans_unicode
    def cut_base_on_dict(self, sentence):
        word_list, tag_list = [], []
        blocks = utils.get_blocks(sentence, utils.RE_NORMAL_HAN)
        for block in blocks:
            max_prob_route = self.trie_model.get_max_prob_route(block)
            max_prob_word_list = [block[max_prob_route[idx]: max_prob_route[idx+1]] \
                                  for idx in range(len(max_prob_route)-1)]
            continuos_singe_list = self.get_continuos_singe(max_prob_word_list)
            last_end = 0
            for start, end in continuos_singe_list:
                for pre_word in max_prob_word_list[last_end: start]:
                    word_list.append(pre_word)
                    tag_list.append(self.trie_model.word_value.get(pre_word, {}).get('tag', 'x'))
                last_end = end
                continuos_singe_str = ''.join(max_prob_word_list[start: end])
                for slices in utils.get_splits(continuos_singe_str, utils.RE_NUNMBER_ENG):
                    #print slices
                    if utils.is_number_or_eng(slices):
                        word_list.append(slices)
                        number_tag = 'm'
                        tag_list.append(number_tag)
                    else:
                        mid_word_list = tag.crf_tag.crfToken(slices)
                        mid_tag_list = tag.crf_tag.crfPos(mid_word_list)
                        word_list.extend(mid_word_list)
                        tag_list.extend(mid_tag_list)
            for word in max_prob_word_list[last_end: ]:
                word_list.append(word)
                tag_list.append(self.trie_model.word_value.get(pre_word, {}).get('tag', 'x'))

        #tag_list = [self.trie_model.word_value.get(word, {}).get('tag', 'x') \
                    #for word in word_list]
        return SegmentPair(word_list, tag_list)

    @utils.trans_unicode
    def cut_base_on_crf(self, sentence):
        word_list, tag_list = [], []
        blocks = utils.get_chinese_block(sentence)
        for block in blocks:
            word_list.extend(tag.crf_tag.crfToken(block))
        tag_list = tag.crf_tag.crfPos(word_list)
        return SegmentPair(word_list, tag_list)

    @utils.trans_unicode
    def cut_for_search(self,sentence):
        word_list = []
        tag_list = []
        all_cut_possible = self.trie_model.get_all_cut_possible()
        for idx, cut_possible_pos in all_cut_possible.iteritems():
            word_list.extend([sentence[idx: pos+1] for pos in cut_possible_pos])
        tag_list = [self.trie_model.word_value.get(word, {}).get('tag', 'x') \
                    for word in word_list]
        return SegmentPair(word_list, tag_list)

    def load_user_dict(self, dict_path):
        self.trie_model.insert_file(dict_path)

    def load_user_crf_corpus(self, corpus_path):
        pass
开发者ID:vanway,项目名称:bamboo,代码行数:90,代码来源:__init__.py


注:本文中的trie.Trie.insert_file方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。