当前位置: 首页>>代码示例>>Python>>正文


Python NumberSet.mask_interval方法代码示例

本文整理汇总了Python中quex.engine.misc.interval_handling.NumberSet.mask_interval方法的典型用法代码示例。如果您正苦于以下问题:Python NumberSet.mask_interval方法的具体用法?Python NumberSet.mask_interval怎么用?Python NumberSet.mask_interval使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在quex.engine.misc.interval_handling.NumberSet的用法示例。


在下文中一共展示了NumberSet.mask_interval方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: EncodingTrafoUTF16

# 需要导入模块: from quex.engine.misc.interval_handling import NumberSet [as 别名]
# 或者: from quex.engine.misc.interval_handling.NumberSet import mask_interval [as 别名]

#.........这里部分代码省略.........
        ]).get_complement(NumberSet_All())

    def prune(self, number_set):
        global ForbiddenRange
        number_set.subtract(ForbiddenRange)
        number_set.mask(0, 0x110000)

    def get_interval_sequences(self, Orig):
        interval_1word, intervals_2word = _get_contigous_intervals(Orig)

        result = []
        if interval_1word is not None:
            result.append([interval_1word])

        if intervals_2word is not None:
            result.extend(
                _get_trigger_sequence_for_interval(interval)
                for interval in intervals_2word
            )
        return result

    def lexatom_n_per_character(self, CharacterSet):
        """If all characters in a unicode character set state machine require the
        same number of bytes to be represented this number is returned.  Otherwise,
        'None' is returned.

        RETURNS:   N > 0  number of bytes required to represent any character in the 
                          given state machine.
                   None   characters in the state machine require different numbers of
                          bytes.
        """
        assert isinstance(CharacterSet, NumberSet)

        interval_list = CharacterSet.get_intervals(PromiseToTreatWellF=True)
        front = interval_list[0].begin     # First element of number set
        back  = interval_list[-1].end - 1  # Last element of number set
        # Determine number of bytes required to represent the first and the 
        # last character of the number set. The number of bytes per character
        # increases monotonously, so only borders have to be considered.
        front_chunk_n = len(unicode_to_utf16(front))
        back_chunk_n  = len(unicode_to_utf16(back))
        if front_chunk_n != back_chunk_n: return None
        else:                             return front_chunk_n

    def _plug_encoding_error_detectors(self, sm):
        """Adorn states with transitions to the 'on_encoding_error' handler if the 
        input value lies beyond the limits. The state machine is an implementation
        of linear sequences of intervals. Thus, the 'code unit position' can be 
        be determined by the number of transitions from the init state.

        sm = mini state machine that implements the transition sequences.

        Bad ranges for code units (a 2 byte):
            1st: 0xDC00 - 0xCFFF
            2nd: 0x0000 - 0xDBFF, 0xE000 - 0x11000 
        """
        # 'CodeUnit[0]' appears at the init state
        # (Adapt trigger map before entering the 'on bad lexatom state'
        init_tm = sm.get_init_state().target_map.get_map()
        workset = set(init_tm.iterkeys()) 
        for si, trigger_set in init_tm.iteritems():
            assert not trigger_set.has_intersection(self.error_range_code_unit0)

        bad_lexatom_state_index = self._plug_encoding_error_detector_single_state(sm, init_tm)

        # 'CodeUnit[>0]' appear all at later states
        done = set([bad_lexatom_state_index])
        while workset:
            si = workset.pop()
            tm = sm.states[si].target_map.get_map()
            done.add(si)

            # Only add bad lexatom detection to state that transit on lexatoms
            # (Bad lexatom states, btw. do not have transitions)
            if not tm: continue

            for trigger_set in tm.itervalues():
                assert not trigger_set.has_intersection(self.error_range_code_unit1)

            workset.update(new_si for new_si in tm.iterkeys() if new_si not in done) 
            tm[bad_lexatom_state_index] = self.error_range_code_unit1

    def _plug_encoding_error_detector_single_state(self, sm, target_map):
        bad_lexatom_state_index = sm.access_bad_lexatom_state()
        if target_map: 
            target_map[bad_lexatom_state_index] = self.error_range_code_unit0
        return bad_lexatom_state_index

    def adapt_source_and_drain_range(self, LexatomByteN):
        EncodingTrafoBySplit.adapt_source_and_drain_range(self, LexatomByteN)
        self.error_range_code_unit0.mask_interval(self.lexatom_range)
        self.error_range_code_unit1.mask_interval(self.lexatom_range)
        if LexatomByteN == -1:
            return
        elif LexatomByteN >= 2: 
            return
        else:
            # if there are less than 2 byte for the lexatoms, then only the 
            # unicode range from 0x00 to 0xFF can be treated.
            self.source_set.mask(0x00, 0x100)
开发者ID:xxyzzzq,项目名称:quex,代码行数:104,代码来源:utf16_state_split.py

示例2: EncodingTrafoUTF8

# 需要导入模块: from quex.engine.misc.interval_handling import NumberSet [as 别名]
# 或者: from quex.engine.misc.interval_handling.NumberSet import mask_interval [as 别名]
class EncodingTrafoUTF8(EncodingTrafoBySplit):
    def __init__(self):
        drain_set = NumberSet.from_range(0, 0x100)
        EncodingTrafoBySplit.__init__(self, "utf8", CodeUnitRange=drain_set)
        self.UnchangedRange = 0x7F

        self.error_range_byte0 = NumberSet([
            Interval(0b00000000, 0b01111111+1), Interval(0b11000000, 0b11011111+1),
            Interval(0b11100000, 0b11101111+1), Interval(0b11110000, 0b11110111+1),
            Interval(0b11111000, 0b11111011+1), Interval(0b11111100, 0b11111101+1),
        ]).get_complement(NumberSet_All())

        self.error_range_byteN = NumberSet(
            Interval(0b10000000, 0b10111111+1)
        ).get_complement(NumberSet_All())

    def adapt_source_and_drain_range(self, LexatomByteN):
        EncodingTrafoBySplit.adapt_source_and_drain_range(self, LexatomByteN)
        self.error_range_byte0.mask_interval(self.lexatom_range)
        self.error_range_byteN.mask_interval(self.lexatom_range)

    def prune(self, X):
        pass

    def get_interval_sequences(self, Orig):
        """Orig = Unicode Trigger Set. It is transformed into a sequence of intervals
        that cover all elements of Orig in a representation as UTF8 code units.
        A transition from state '1' to state '2' on 'Orig' is then equivalent to 
        the transitions along the code unit sequence.
        """
        db = _split_by_transformed_sequence_length(Orig)
        if db is None: return []

        result = []
        for seq_length, interval in db.items():
            interval_list = _get_contiguous_interval_sequences(interval, seq_length)
            result.extend(
                _get_trigger_sequence_for_contigous_byte_range_interval(interval, seq_length)
                for interval in interval_list)
        return result

    def lexatom_n_per_character(self, CharacterSet):
        """If all characters in a unicode character set state machine require the
        same number of bytes to be represented this number is returned.  Otherwise,
        'None' is returned.

        RETURNS:   N > 0  number of bytes required to represent any character in the 
                          given state machine.
                   None   characters in the state machine require different numbers of
                          bytes.
        """
        assert isinstance(CharacterSet, NumberSet)

        interval_list = CharacterSet.get_intervals(PromiseToTreatWellF=True)
        front = interval_list[0].begin     # First element of number set
        back  = interval_list[-1].end - 1  # Last element of number set
        # Determine number of bytes required to represent the first and the 
        # last character of the number set. The number of bytes per character
        # increases monotonously, so only borders have to be considered.
        front_chunk_n = len(unicode_to_utf8(front))
        back_chunk_n  = len(unicode_to_utf8(back))
        if front_chunk_n != back_chunk_n: return None
        else:                             return front_chunk_n

    def _plug_encoding_error_detectors(self, sm):
        """Adorn states with transitions to the 'on_encoding_error' handler if the 
        input value lies beyond the limits. The state machine is an implementation
        of linear sequences of intervals. Thus, the 'byte position' can be 
        be determined by the number of transitions from the init state.

        sm = mini state machine that implements the transition sequences.

        UTF8 Encodings in binary look like the following (see 'man utf8').

            1 byte: 0xxxxxxx
            2 byte: 110xxxxx 10xxxxxx
            3 byte: 1110xxxx 10xxxxxx 10xxxxxx
            4 byte: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
            5 byte: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
            6 byte: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxxx

        The resulting byte ranges can be observed in 'error_range_byte0' for Byte[0]
        and 'error_range_byteN' for Byte[>0].
        """
        # 'Byte[0]' appears at the init state
        # (Adapt trigger map before entering the 'on bad lexatom state'
        init_tm = sm.get_init_state().target_map.get_map()
        workset = set(init_tm.iterkeys()) 
        for si, trigger_set in init_tm.iteritems():
            assert not trigger_set.has_intersection(self.error_range_byte0)

        bad_lexatom_state_index = self._plug_encoding_error_detector_single_state(sm, init_tm)

        # 'Byte[>0]' appear all at later states
        done = set([bad_lexatom_state_index])
        while workset:
            si = workset.pop()
            tm = sm.states[si].target_map.get_map()
            done.add(si)

#.........这里部分代码省略.........
开发者ID:xxyzzzq,项目名称:quex,代码行数:103,代码来源:utf8_state_split.py


注:本文中的quex.engine.misc.interval_handling.NumberSet.mask_interval方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。