本文整理汇总了Python中quex.engine.misc.interval_handling.NumberSet.mask_interval方法的典型用法代码示例。如果您正苦于以下问题:Python NumberSet.mask_interval方法的具体用法?Python NumberSet.mask_interval怎么用?Python NumberSet.mask_interval使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类quex.engine.misc.interval_handling.NumberSet
的用法示例。
在下文中一共展示了NumberSet.mask_interval方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: EncodingTrafoUTF16
# 需要导入模块: from quex.engine.misc.interval_handling import NumberSet [as 别名]
# 或者: from quex.engine.misc.interval_handling.NumberSet import mask_interval [as 别名]
#.........这里部分代码省略.........
]).get_complement(NumberSet_All())
def prune(self, number_set):
global ForbiddenRange
number_set.subtract(ForbiddenRange)
number_set.mask(0, 0x110000)
def get_interval_sequences(self, Orig):
interval_1word, intervals_2word = _get_contigous_intervals(Orig)
result = []
if interval_1word is not None:
result.append([interval_1word])
if intervals_2word is not None:
result.extend(
_get_trigger_sequence_for_interval(interval)
for interval in intervals_2word
)
return result
def lexatom_n_per_character(self, CharacterSet):
"""If all characters in a unicode character set state machine require the
same number of bytes to be represented this number is returned. Otherwise,
'None' is returned.
RETURNS: N > 0 number of bytes required to represent any character in the
given state machine.
None characters in the state machine require different numbers of
bytes.
"""
assert isinstance(CharacterSet, NumberSet)
interval_list = CharacterSet.get_intervals(PromiseToTreatWellF=True)
front = interval_list[0].begin # First element of number set
back = interval_list[-1].end - 1 # Last element of number set
# Determine number of bytes required to represent the first and the
# last character of the number set. The number of bytes per character
# increases monotonously, so only borders have to be considered.
front_chunk_n = len(unicode_to_utf16(front))
back_chunk_n = len(unicode_to_utf16(back))
if front_chunk_n != back_chunk_n: return None
else: return front_chunk_n
def _plug_encoding_error_detectors(self, sm):
"""Adorn states with transitions to the 'on_encoding_error' handler if the
input value lies beyond the limits. The state machine is an implementation
of linear sequences of intervals. Thus, the 'code unit position' can be
be determined by the number of transitions from the init state.
sm = mini state machine that implements the transition sequences.
Bad ranges for code units (a 2 byte):
1st: 0xDC00 - 0xCFFF
2nd: 0x0000 - 0xDBFF, 0xE000 - 0x11000
"""
# 'CodeUnit[0]' appears at the init state
# (Adapt trigger map before entering the 'on bad lexatom state'
init_tm = sm.get_init_state().target_map.get_map()
workset = set(init_tm.iterkeys())
for si, trigger_set in init_tm.iteritems():
assert not trigger_set.has_intersection(self.error_range_code_unit0)
bad_lexatom_state_index = self._plug_encoding_error_detector_single_state(sm, init_tm)
# 'CodeUnit[>0]' appear all at later states
done = set([bad_lexatom_state_index])
while workset:
si = workset.pop()
tm = sm.states[si].target_map.get_map()
done.add(si)
# Only add bad lexatom detection to state that transit on lexatoms
# (Bad lexatom states, btw. do not have transitions)
if not tm: continue
for trigger_set in tm.itervalues():
assert not trigger_set.has_intersection(self.error_range_code_unit1)
workset.update(new_si for new_si in tm.iterkeys() if new_si not in done)
tm[bad_lexatom_state_index] = self.error_range_code_unit1
def _plug_encoding_error_detector_single_state(self, sm, target_map):
bad_lexatom_state_index = sm.access_bad_lexatom_state()
if target_map:
target_map[bad_lexatom_state_index] = self.error_range_code_unit0
return bad_lexatom_state_index
def adapt_source_and_drain_range(self, LexatomByteN):
EncodingTrafoBySplit.adapt_source_and_drain_range(self, LexatomByteN)
self.error_range_code_unit0.mask_interval(self.lexatom_range)
self.error_range_code_unit1.mask_interval(self.lexatom_range)
if LexatomByteN == -1:
return
elif LexatomByteN >= 2:
return
else:
# if there are less than 2 byte for the lexatoms, then only the
# unicode range from 0x00 to 0xFF can be treated.
self.source_set.mask(0x00, 0x100)
示例2: EncodingTrafoUTF8
# 需要导入模块: from quex.engine.misc.interval_handling import NumberSet [as 别名]
# 或者: from quex.engine.misc.interval_handling.NumberSet import mask_interval [as 别名]
class EncodingTrafoUTF8(EncodingTrafoBySplit):
def __init__(self):
drain_set = NumberSet.from_range(0, 0x100)
EncodingTrafoBySplit.__init__(self, "utf8", CodeUnitRange=drain_set)
self.UnchangedRange = 0x7F
self.error_range_byte0 = NumberSet([
Interval(0b00000000, 0b01111111+1), Interval(0b11000000, 0b11011111+1),
Interval(0b11100000, 0b11101111+1), Interval(0b11110000, 0b11110111+1),
Interval(0b11111000, 0b11111011+1), Interval(0b11111100, 0b11111101+1),
]).get_complement(NumberSet_All())
self.error_range_byteN = NumberSet(
Interval(0b10000000, 0b10111111+1)
).get_complement(NumberSet_All())
def adapt_source_and_drain_range(self, LexatomByteN):
EncodingTrafoBySplit.adapt_source_and_drain_range(self, LexatomByteN)
self.error_range_byte0.mask_interval(self.lexatom_range)
self.error_range_byteN.mask_interval(self.lexatom_range)
def prune(self, X):
pass
def get_interval_sequences(self, Orig):
"""Orig = Unicode Trigger Set. It is transformed into a sequence of intervals
that cover all elements of Orig in a representation as UTF8 code units.
A transition from state '1' to state '2' on 'Orig' is then equivalent to
the transitions along the code unit sequence.
"""
db = _split_by_transformed_sequence_length(Orig)
if db is None: return []
result = []
for seq_length, interval in db.items():
interval_list = _get_contiguous_interval_sequences(interval, seq_length)
result.extend(
_get_trigger_sequence_for_contigous_byte_range_interval(interval, seq_length)
for interval in interval_list)
return result
def lexatom_n_per_character(self, CharacterSet):
"""If all characters in a unicode character set state machine require the
same number of bytes to be represented this number is returned. Otherwise,
'None' is returned.
RETURNS: N > 0 number of bytes required to represent any character in the
given state machine.
None characters in the state machine require different numbers of
bytes.
"""
assert isinstance(CharacterSet, NumberSet)
interval_list = CharacterSet.get_intervals(PromiseToTreatWellF=True)
front = interval_list[0].begin # First element of number set
back = interval_list[-1].end - 1 # Last element of number set
# Determine number of bytes required to represent the first and the
# last character of the number set. The number of bytes per character
# increases monotonously, so only borders have to be considered.
front_chunk_n = len(unicode_to_utf8(front))
back_chunk_n = len(unicode_to_utf8(back))
if front_chunk_n != back_chunk_n: return None
else: return front_chunk_n
def _plug_encoding_error_detectors(self, sm):
"""Adorn states with transitions to the 'on_encoding_error' handler if the
input value lies beyond the limits. The state machine is an implementation
of linear sequences of intervals. Thus, the 'byte position' can be
be determined by the number of transitions from the init state.
sm = mini state machine that implements the transition sequences.
UTF8 Encodings in binary look like the following (see 'man utf8').
1 byte: 0xxxxxxx
2 byte: 110xxxxx 10xxxxxx
3 byte: 1110xxxx 10xxxxxx 10xxxxxx
4 byte: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
5 byte: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
6 byte: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxxx
The resulting byte ranges can be observed in 'error_range_byte0' for Byte[0]
and 'error_range_byteN' for Byte[>0].
"""
# 'Byte[0]' appears at the init state
# (Adapt trigger map before entering the 'on bad lexatom state'
init_tm = sm.get_init_state().target_map.get_map()
workset = set(init_tm.iterkeys())
for si, trigger_set in init_tm.iteritems():
assert not trigger_set.has_intersection(self.error_range_byte0)
bad_lexatom_state_index = self._plug_encoding_error_detector_single_state(sm, init_tm)
# 'Byte[>0]' appear all at later states
done = set([bad_lexatom_state_index])
while workset:
si = workset.pop()
tm = sm.states[si].target_map.get_map()
done.add(si)
#.........这里部分代码省略.........