當前位置: 首頁>>代碼示例>>Python>>正文


Python NumberSet.mask_interval方法代碼示例

本文整理匯總了Python中quex.engine.misc.interval_handling.NumberSet.mask_interval方法的典型用法代碼示例。如果您正苦於以下問題:Python NumberSet.mask_interval方法的具體用法?Python NumberSet.mask_interval怎麽用?Python NumberSet.mask_interval使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在quex.engine.misc.interval_handling.NumberSet的用法示例。


在下文中一共展示了NumberSet.mask_interval方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: EncodingTrafoUTF16

# 需要導入模塊: from quex.engine.misc.interval_handling import NumberSet [as 別名]
# 或者: from quex.engine.misc.interval_handling.NumberSet import mask_interval [as 別名]

#.........這裏部分代碼省略.........
        ]).get_complement(NumberSet_All())

    def prune(self, number_set):
        global ForbiddenRange
        number_set.subtract(ForbiddenRange)
        number_set.mask(0, 0x110000)

    def get_interval_sequences(self, Orig):
        interval_1word, intervals_2word = _get_contigous_intervals(Orig)

        result = []
        if interval_1word is not None:
            result.append([interval_1word])

        if intervals_2word is not None:
            result.extend(
                _get_trigger_sequence_for_interval(interval)
                for interval in intervals_2word
            )
        return result

    def lexatom_n_per_character(self, CharacterSet):
        """If all characters in a unicode character set state machine require the
        same number of bytes to be represented this number is returned.  Otherwise,
        'None' is returned.

        RETURNS:   N > 0  number of bytes required to represent any character in the 
                          given state machine.
                   None   characters in the state machine require different numbers of
                          bytes.
        """
        assert isinstance(CharacterSet, NumberSet)

        interval_list = CharacterSet.get_intervals(PromiseToTreatWellF=True)
        front = interval_list[0].begin     # First element of number set
        back  = interval_list[-1].end - 1  # Last element of number set
        # Determine number of bytes required to represent the first and the 
        # last character of the number set. The number of bytes per character
        # increases monotonously, so only borders have to be considered.
        front_chunk_n = len(unicode_to_utf16(front))
        back_chunk_n  = len(unicode_to_utf16(back))
        if front_chunk_n != back_chunk_n: return None
        else:                             return front_chunk_n

    def _plug_encoding_error_detectors(self, sm):
        """Adorn states with transitions to the 'on_encoding_error' handler if the 
        input value lies beyond the limits. The state machine is an implementation
        of linear sequences of intervals. Thus, the 'code unit position' can be 
        be determined by the number of transitions from the init state.

        sm = mini state machine that implements the transition sequences.

        Bad ranges for code units (a 2 byte):
            1st: 0xDC00 - 0xCFFF
            2nd: 0x0000 - 0xDBFF, 0xE000 - 0x11000 
        """
        # 'CodeUnit[0]' appears at the init state
        # (Adapt trigger map before entering the 'on bad lexatom state'
        init_tm = sm.get_init_state().target_map.get_map()
        workset = set(init_tm.iterkeys()) 
        for si, trigger_set in init_tm.iteritems():
            assert not trigger_set.has_intersection(self.error_range_code_unit0)

        bad_lexatom_state_index = self._plug_encoding_error_detector_single_state(sm, init_tm)

        # 'CodeUnit[>0]' appear all at later states
        done = set([bad_lexatom_state_index])
        while workset:
            si = workset.pop()
            tm = sm.states[si].target_map.get_map()
            done.add(si)

            # Only add bad lexatom detection to state that transit on lexatoms
            # (Bad lexatom states, btw. do not have transitions)
            if not tm: continue

            for trigger_set in tm.itervalues():
                assert not trigger_set.has_intersection(self.error_range_code_unit1)

            workset.update(new_si for new_si in tm.iterkeys() if new_si not in done) 
            tm[bad_lexatom_state_index] = self.error_range_code_unit1

    def _plug_encoding_error_detector_single_state(self, sm, target_map):
        bad_lexatom_state_index = sm.access_bad_lexatom_state()
        if target_map: 
            target_map[bad_lexatom_state_index] = self.error_range_code_unit0
        return bad_lexatom_state_index

    def adapt_source_and_drain_range(self, LexatomByteN):
        EncodingTrafoBySplit.adapt_source_and_drain_range(self, LexatomByteN)
        self.error_range_code_unit0.mask_interval(self.lexatom_range)
        self.error_range_code_unit1.mask_interval(self.lexatom_range)
        if LexatomByteN == -1:
            return
        elif LexatomByteN >= 2: 
            return
        else:
            # if there are less than 2 byte for the lexatoms, then only the 
            # unicode range from 0x00 to 0xFF can be treated.
            self.source_set.mask(0x00, 0x100)
開發者ID:xxyzzzq,項目名稱:quex,代碼行數:104,代碼來源:utf16_state_split.py

示例2: EncodingTrafoUTF8

# 需要導入模塊: from quex.engine.misc.interval_handling import NumberSet [as 別名]
# 或者: from quex.engine.misc.interval_handling.NumberSet import mask_interval [as 別名]
class EncodingTrafoUTF8(EncodingTrafoBySplit):
    def __init__(self):
        drain_set = NumberSet.from_range(0, 0x100)
        EncodingTrafoBySplit.__init__(self, "utf8", CodeUnitRange=drain_set)
        self.UnchangedRange = 0x7F

        self.error_range_byte0 = NumberSet([
            Interval(0b00000000, 0b01111111+1), Interval(0b11000000, 0b11011111+1),
            Interval(0b11100000, 0b11101111+1), Interval(0b11110000, 0b11110111+1),
            Interval(0b11111000, 0b11111011+1), Interval(0b11111100, 0b11111101+1),
        ]).get_complement(NumberSet_All())

        self.error_range_byteN = NumberSet(
            Interval(0b10000000, 0b10111111+1)
        ).get_complement(NumberSet_All())

    def adapt_source_and_drain_range(self, LexatomByteN):
        EncodingTrafoBySplit.adapt_source_and_drain_range(self, LexatomByteN)
        self.error_range_byte0.mask_interval(self.lexatom_range)
        self.error_range_byteN.mask_interval(self.lexatom_range)

    def prune(self, X):
        pass

    def get_interval_sequences(self, Orig):
        """Orig = Unicode Trigger Set. It is transformed into a sequence of intervals
        that cover all elements of Orig in a representation as UTF8 code units.
        A transition from state '1' to state '2' on 'Orig' is then equivalent to 
        the transitions along the code unit sequence.
        """
        db = _split_by_transformed_sequence_length(Orig)
        if db is None: return []

        result = []
        for seq_length, interval in db.items():
            interval_list = _get_contiguous_interval_sequences(interval, seq_length)
            result.extend(
                _get_trigger_sequence_for_contigous_byte_range_interval(interval, seq_length)
                for interval in interval_list)
        return result

    def lexatom_n_per_character(self, CharacterSet):
        """If all characters in a unicode character set state machine require the
        same number of bytes to be represented this number is returned.  Otherwise,
        'None' is returned.

        RETURNS:   N > 0  number of bytes required to represent any character in the 
                          given state machine.
                   None   characters in the state machine require different numbers of
                          bytes.
        """
        assert isinstance(CharacterSet, NumberSet)

        interval_list = CharacterSet.get_intervals(PromiseToTreatWellF=True)
        front = interval_list[0].begin     # First element of number set
        back  = interval_list[-1].end - 1  # Last element of number set
        # Determine number of bytes required to represent the first and the 
        # last character of the number set. The number of bytes per character
        # increases monotonously, so only borders have to be considered.
        front_chunk_n = len(unicode_to_utf8(front))
        back_chunk_n  = len(unicode_to_utf8(back))
        if front_chunk_n != back_chunk_n: return None
        else:                             return front_chunk_n

    def _plug_encoding_error_detectors(self, sm):
        """Adorn states with transitions to the 'on_encoding_error' handler if the 
        input value lies beyond the limits. The state machine is an implementation
        of linear sequences of intervals. Thus, the 'byte position' can be 
        be determined by the number of transitions from the init state.

        sm = mini state machine that implements the transition sequences.

        UTF8 Encodings in binary look like the following (see 'man utf8').

            1 byte: 0xxxxxxx
            2 byte: 110xxxxx 10xxxxxx
            3 byte: 1110xxxx 10xxxxxx 10xxxxxx
            4 byte: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
            5 byte: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
            6 byte: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxxx

        The resulting byte ranges can be observed in 'error_range_byte0' for Byte[0]
        and 'error_range_byteN' for Byte[>0].
        """
        # 'Byte[0]' appears at the init state
        # (Adapt trigger map before entering the 'on bad lexatom state'
        init_tm = sm.get_init_state().target_map.get_map()
        workset = set(init_tm.iterkeys()) 
        for si, trigger_set in init_tm.iteritems():
            assert not trigger_set.has_intersection(self.error_range_byte0)

        bad_lexatom_state_index = self._plug_encoding_error_detector_single_state(sm, init_tm)

        # 'Byte[>0]' appear all at later states
        done = set([bad_lexatom_state_index])
        while workset:
            si = workset.pop()
            tm = sm.states[si].target_map.get_map()
            done.add(si)

#.........這裏部分代碼省略.........
開發者ID:xxyzzzq,項目名稱:quex,代碼行數:103,代碼來源:utf8_state_split.py


注:本文中的quex.engine.misc.interval_handling.NumberSet.mask_interval方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。