本文整理汇总了Python中quex.engine.misc.file_in.skip_whitespace函数的典型用法代码示例。如果您正苦于以下问题:Python skip_whitespace函数的具体用法?Python skip_whitespace怎么用?Python skip_whitespace使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了skip_whitespace函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse
def parse(fh):
"""This function parses a mode description and enters it into the
'blackboard.mode_description_db'. Once all modes are parsed
they can be translated into 'real' modes and are located in
'blackboard.mode_db'.
"""
# NOTE: Catching of EOF happens in caller: parse_section(...)
skip_whitespace(fh)
mode_name = read_identifier(fh, OnMissingStr="Missing identifier at beginning of mode definition.")
# NOTE: constructor does register this mode in the mode_db
new_mode = ModeDescription(mode_name, SourceRef.from_FileHandle(fh))
# (*) inherited modes / option_db
skip_whitespace(fh)
dummy = fh.read(1)
if dummy not in [":", "{"]:
error_msg("missing ':' or '{' after mode '%s'" % mode_name, fh)
if dummy == ":":
__parse_option_list(new_mode, fh)
# (*) read in pattern-action pairs and events
while __parse_element(new_mode, fh):
pass
示例2: __parse_element
def __parse_element(new_mode, fh):
"""Returns: False, if a closing '}' has been found.
True, else.
"""
position = fh.tell()
try:
description = "pattern or event handler"
skip_whitespace(fh)
# NOTE: Do not use 'read_word' since we need to continue directly after
# whitespace, if a regular expression is to be parsed.
position = fh.tell()
word = read_until_whitespace(fh)
if word == "}": return False
# -- check for 'on_entry', 'on_exit', ...
if __parse_event(new_mode, fh, word): return True
fh.seek(position)
description = "start of mode element: regular expression"
pattern = regular_expression.parse(fh)
pattern.set_source_reference(SourceRef.from_FileHandle(fh, new_mode.name))
position = fh.tell()
description = "start of mode element: code fragment for '%s'" % pattern.pattern_string()
__parse_action(new_mode, fh, pattern.pattern_string(), pattern)
except EndOfStreamException:
fh.seek(position)
error_eof(description, fh)
return True
示例3: __parse
def __parse(fh, result, IndentationSetupF=False):
"""Parses pattern definitions of the form:
[ \t] => grid 4;
[:intersection([:alpha:], [\X064-\X066]):] => space 1;
In other words the right hand side *must* be a character set.
"""
# NOTE: Catching of EOF happens in caller: parse_section(...)
#
while 1 + 1 == 2:
skip_whitespace(fh)
if check(fh, ">"):
break
# A regular expression state machine
pattern, identifier, sr = __parse_definition_head(fh, result)
if pattern is None and IndentationSetupF:
error.log("Keyword '\\else' cannot be used in indentation setup.", fh)
# '__parse_definition_head()' ensures that only identifiers mentioned in
# 'result' are accepted.
if not IndentationSetupF:
value = read_value_specifier(fh, identifier, 1)
result.specify(identifier, pattern, value, sr)
else:
result.specify(identifier, pattern, sr)
if not check(fh, ";"):
error.log("Missing ';' after '%s' specification." % identifier, fh)
return result
示例4: _base_parse
def _base_parse(self, fh, IndentationSetupF=False):
"""Parses pattern definitions of the form:
[ \t] => grid 4;
[:intersection([:alpha:], [\X064-\X066]):] => space 1;
In other words the right hand side *must* be a character set.
ADAPTS: result to contain parsing information.
"""
# NOTE: Catching of EOF happens in caller: parse_section(...)
#
while 1 + 1 == 2:
skip_whitespace(fh)
if check(fh, ">"):
break
# A regular expression state machine
pattern, identifier, sr = _parse_definition_head(fh, self.identifier_list)
if pattern is None and IndentationSetupF:
error.log("Keyword '\\else' cannot be used in indentation setup.", fh)
# '_parse_definition_head()' ensures that only identifiers mentioned in
# 'result' are accepted.
if self.requires_count():
count = _read_value_specifier(fh, identifier, 1)
self.specify(identifier, pattern, count, sr)
else:
self.specify(identifier, pattern, sr)
if not check(fh, ";"):
error.log("Missing ';' after '%s' specification." % identifier, fh)
return self.finalize()
示例5: __parse_property_expression
def __parse_property_expression(stream, PropertyLetter, EqualConditionPossibleF=True):
"""Parses an expression of the form '\? { X [ = Y] }' where
? = PropertyLetter. If the '=' operator is present then
two fields are returned first = left hand side, second =
right hand side. Othewise an element is returned.
"""
assert len(PropertyLetter) == 1
assert type(PropertyLetter) == str
assert type(EqualConditionPossibleF) == bool
# verify '\?'
x = stream.read(2)
if x != "\\" + PropertyLetter:
raise RegularExpressionException("Unicode property letter '\\%s' expected, received '%s'." % x)
skip_whitespace(stream)
x = stream.read(1)
if x != "{":
raise RegularExpressionException("Unicode property '\\%s' not followed by '{'." % PropertyLetter)
content = __snap_until(stream, "}")
fields = content.split("=")
if len(fields) == 0:
raise RegularExpressionException("Unicode property expression '\\%s{}' cannot have no content.")
if len(fields) > 2:
raise RegularExpressionException("Unicode property expression '\\%s' can have at maximum one '='.")
if not EqualConditionPossibleF and len(fields) == 2:
raise RegularExpressionException("Unicode property expression '\\%s' does not allow '=' conditions")
return map(lambda x: x.strip(), fields)
示例6: snap_set_expression
def snap_set_expression(stream, PatternDict):
assert stream.__class__.__name__ == "StringIO" \
or stream.__class__.__name__ == "file"
__debug_entry("set_expression", stream)
result = snap_property_set(stream)
if result is not None: return result
x = stream.read(2)
if x == "\\C":
return snap_case_folded_pattern(stream, PatternDict, NumberSetF=True)
elif x == "[:":
result = snap_set_term(stream, PatternDict)
skip_whitespace(stream)
x = stream.read(2)
if x != ":]":
raise RegularExpressionException("Missing closing ':]' for character set expression.\n" + \
"found: '%s'" % x)
elif x[0] == "[":
stream.seek(-1, 1)
result = traditional_character_set.do(stream)
elif x[0] == "{":
stream.seek(-1, 1)
result = snap_replacement(stream, PatternDict, StateMachineF=False)
else:
result = None
return __debug_exit(result, stream)
示例7: __parse_section
def __parse_section(fh, descriptor, already_defined_list):
global token_type_code_fragment_db
assert type(already_defined_list) == list
SubsectionList = ["name", "file_name", "standard", "distinct", "union", "inheritable", "noid"] \
+ token_type_code_fragment_db.keys()
position = fh.tell()
skip_whitespace(fh)
word = read_identifier(fh)
if word == "":
fh.seek(position)
if check(fh, "}"):
fh.seek(position)
return False
error_msg("Missing token_type section ('standard', 'distinct', or 'union').", fh)
verify_word_in_list(word, SubsectionList,
"Subsection '%s' not allowed in token_type section." % word, fh)
if word == "name":
if not check(fh, "="):
error_msg("Missing '=' in token_type 'name' specification.", fh)
descriptor.class_name, descriptor.name_space, descriptor.class_name_safe = read_namespaced_name(fh, "token_type")
if not check(fh, ";"):
error_msg("Missing terminating ';' in token_type 'name' specification.", fh)
elif word == "inheritable":
descriptor.open_for_derivation_f = True
check_or_die(fh, ";")
elif word == "noid":
descriptor.token_contains_token_id_f = False;
check_or_die(fh, ";")
elif word == "file_name":
if not check(fh, "="):
error_msg("Missing '=' in token_type 'file_name' specification.", fh)
descriptor.set_file_name(read_until_letter(fh, ";"))
if not check(fh, ";"):
error_msg("Missing terminating ';' in token_type 'file_name' specification.", fh)
elif word in ["standard", "distinct", "union"]:
if word == "standard": parse_standard_members(fh, word, descriptor, already_defined_list)
elif word == "distinct": parse_distinct_members(fh, word, descriptor, already_defined_list)
elif word == "union": parse_union_members(fh, word, descriptor, already_defined_list)
if not check(fh, "}"):
fh.seek(position)
error_msg("Missing closing '}' at end of token_type section '%s'." % word, fh);
elif word in token_type_code_fragment_db.keys():
fragment = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False)
descriptor.__dict__[word] = fragment
else:
assert False, "This code section section should not be reachable because 'word'\n" + \
"was checked to fit in one of the 'elif' cases."
return True
示例8: parse
def parse(fh):
"""This function parses a mode description and enters it into the
'mode_description_db'. Once all modes are parsed
they can be translated into 'real' modes and are located in
'blackboard.mode_db'.
"""
# NOTE: Catching of EOF happens in caller: parse_section(...)
skip_whitespace(fh)
mode_name = read_identifier(fh)
if mode_name == "":
error_msg("missing identifier at beginning of mode definition.", fh)
# NOTE: constructor does register this mode in the mode_db
new_mode = ModeDescription(mode_name, fh.name, get_current_line_info_number(fh))
# (*) inherited modes / options
skip_whitespace(fh)
dummy = fh.read(1)
if dummy not in [":", "{"]:
error_msg("missing ':' or '{' after mode '%s'" % mode_name, fh)
if dummy == ":":
__parse_option_list(new_mode, fh)
# (*) read in pattern-action pairs and events
while __parse_element(new_mode, fh):
pass
# (*) check for modes w/o pattern definitions
if not new_mode.has_event_handler() and not new_mode.has_own_matches():
if new_mode.options["inheritable"] != "only":
new_mode.options["inheritable"] = "only"
error_msg("Mode without pattern and event handlers needs to be 'inheritable only'.\n" + \
"<inheritable: only> has been added automatically.", fh, DontExitF=True)
示例9: __parse_base_mode_list
def __parse_base_mode_list(fh, new_mode):
new_mode.base_modes = []
trailing_comma_f = False
while 1 + 1 == 2:
if check(fh, "{"): fh.seek(-1, 1); break
elif check(fh, "<"): fh.seek(-1, 1); break
skip_whitespace(fh)
identifier = read_identifier(fh)
if identifier == "": break
new_mode.base_modes.append(identifier)
trailing_comma_f = False
if not check(fh, ","): break
trailing_comma_f = True
if trailing_comma_f:
error_msg("Trailing ',' after base mode '%s'." % new_mode.base_modes[-1], fh,
DontExitF=True, WarningF=True)
elif len(new_mode.base_modes) != 0:
# This check is a 'service' -- for those who follow the old convention
pos = fh.tell()
skip_whitespace(fh)
dummy_identifier = read_identifier(fh)
if dummy_identifier != "":
error_msg("Missing separating ',' between base modes '%s' and '%s'.\n" \
% (new_mode.base_modes[-1], dummy_identifier) + \
"(The comma separator is mandatory since quex 0.53.1)", fh)
fh.seek(pos)
示例10: parse
def parse(fh, CodeFragmentName,
ErrorOnFailureF=True, AllowBriefTokenSenderF=True, ContinueF=True):
"""RETURNS: An object of class CodeUser containing
line number, filename, and the code fragment.
None in case of failure.
"""
assert type(ErrorOnFailureF) == bool
assert type(AllowBriefTokenSenderF) == bool
skip_whitespace(fh)
word = fh.read(2)
if len(word) >= 1 and word[0] == "{":
if len(word) > 1: fh.seek(-1, 1) # unput the second character
return __parse_normal(fh, CodeFragmentName)
elif AllowBriefTokenSenderF and word == "=>":
return __parse_brief_token_sender(fh, ContinueF)
elif not ErrorOnFailureF:
fh.seek(-2,1)
return None
else:
error.log("Missing code fragment after %s definition." % CodeFragmentName,
fh)
示例11: read_character_code
def read_character_code(fh):
# NOTE: This function is tested with the regeression test for feature request 2251359.
# See directory $QUEX_PATH/TEST/2251359.
pos = fh.tell()
start = fh.read(1)
if start == "":
fh.seek(pos); return -1
elif start == "'":
# read an utf-8 char an get the token-id
# Example: '+'
if check(fh, "\\"):
# snap_backslashed_character throws an exception if 'backslashed char' is nonsense.
character_code = snap_backslashed_character.do(fh, ReducedSetOfBackslashedCharactersF=True)
else:
character_code = __read_one_utf8_code_from_stream(fh)
if character_code is None:
error.log("Missing utf8-character for definition of character code by character.",
fh)
elif fh.read(1) != '\'':
error.log("Missing closing ' for definition of character code by character.",
fh)
return character_code
if start == "U":
if fh.read(1) != "C": fh.seek(pos); return -1
# read Unicode Name
# Example: UC MATHEMATICAL_MONOSPACE_DIGIT_FIVE
skip_whitespace(fh)
ucs_name = __read_token_identifier(fh)
if ucs_name == "": fh.seek(pos); return -1
# Get the character set related to the given name. Note, the size of the set
# is supposed to be one.
character_code = ucs_property_db.get_character_set("Name", ucs_name)
if type(character_code) in [str, unicode]:
error.verify_word_in_list(ucs_name, ucs_property_db["Name"].code_point_db,
"The string %s\ndoes not identify a known unicode character." % ucs_name,
fh)
elif type(character_code) not in [int, long]:
error.log("%s relates to more than one character in unicode database." % ucs_name,
fh)
return character_code
fh.seek(pos)
character_code = read_integer(fh)
if character_code is not None: return character_code
# Try to interpret it as something else ...
fh.seek(pos)
return -1
示例12: _read_value_specifier
def _read_value_specifier(fh, Keyword, Default=None):
skip_whitespace(fh)
value = read_integer(fh)
if value is not None: return value
# not a number received, is it an identifier?
variable = read_identifier(fh)
if variable != "": return variable
elif Default is not None: return Default
error.log("Missing integer or variable name after keyword '%s'." % Keyword, fh)
示例13: snap_set_term
def snap_set_term(stream, PatternDict):
global special_character_set_db
__debug_entry("set_term", stream)
operation_list = [ "union", "intersection", "difference", "inverse"]
character_set_list = special_character_set_db.keys()
skip_whitespace(stream)
position = stream.tell()
# if there is no following '(', then enter the 'snap_expression' block below
word = read_identifier(stream)
if word in operation_list:
set_list = snap_set_list(stream, word, PatternDict)
# if an error occurs during set_list parsing, an exception is thrown about syntax error
L = len(set_list)
result = set_list[0]
if word == "inverse":
# The inverse of multiple sets, is to be the inverse of the union of these sets.
if L > 1:
for character_set in set_list[1:]:
result.unite_with(character_set)
return __debug_exit(result.get_complement(Setup.buffer_codec.source_set), stream)
if L < 2:
raise RegularExpressionException("Regular Expression: A %s operation needs at least\n" % word + \
"two sets to operate on them.")
if word == "union":
for set in set_list[1:]:
result.unite_with(set)
elif word == "intersection":
for set in set_list[1:]:
result.intersect_with(set)
elif word == "difference":
for set in set_list[1:]:
result.subtract(set)
elif word in character_set_list:
reg_expr = special_character_set_db[word]
result = traditional_character_set.do_string(reg_expr)
elif word != "":
verify_word_in_list(word, character_set_list + operation_list,
"Unknown keyword '%s'." % word, stream)
else:
stream.seek(position)
result = snap_set_expression(stream, PatternDict)
return __debug_exit(result, stream)
示例14: snap_curly_bracketed_expression
def snap_curly_bracketed_expression(stream, PatternDict, Name, TriggerChar, MinN=1, MaxN=1):
"""Snaps a list of RE's in '{' and '}'. The separator between the patterns is
whitespace. 'MinN' and 'MaxN' determine the number of expected patterns.
Set 'MaxN=sys.maxint' for an arbitrary number of patterns.
RETURNS: result = list of patterns.
it holds: len(result) >= MinN
len(result) <= MaxN
if not, the function sys.exit()-s.
"""
assert MinN <= MaxN
assert MinN > 0
skip_whitespace(stream)
# Read over the trigger character
if not check(stream, "{"):
error_msg("Missing opening '{' after %s %s." % (Name, TriggerChar), stream)
result = []
while 1 + 1 == 2:
pattern = snap_expression(stream, PatternDict)
if pattern is not None:
result.append(pattern)
if check(stream, "}"):
break
elif check_whitespace(stream):
continue
elif check(stream, "/") or check(stream, "$"):
error_msg("Pre- or post contexts are not allowed in %s \\%s{...} expressions." % (Name, TriggerChar), stream)
else:
error_msg("Missing closing '}' %s in \\%s{...}." % (Name, TriggerChar), stream)
if MinN != MaxN:
if len(result) < MinN:
error_msg("At minimum %i pattern%s required between '{' and '}'" \
% (MinN, "" if MinN == 1 else "s"), stream)
if len(result) > MaxN:
error_msg("At maximum %i pattern%s required between '{' and '}'" \
% (MaxN, "" if MaxN == 1 else "s"), stream)
else:
if len(result) != MinN:
error_msg("Exactly %i pattern%s required between '{' and '}'" \
% (MinN, "" if MinN == 1 else "s"), stream)
return result
示例15: snap_replacement
def snap_replacement(stream, PatternDict, StateMachineF=True):
"""Snaps a predefined pattern from the input string and returns the resulting
state machine.
"""
skip_whitespace(stream)
pattern_name = read_identifier(stream)
if pattern_name == "":
raise RegularExpressionException("Pattern replacement expression misses identifier after '{'.")
skip_whitespace(stream)
if not check(stream, "}"):
raise RegularExpressionException("Pattern replacement expression misses closing '}' after '%s'." \
% pattern_name)
verify_word_in_list(pattern_name, PatternDict.keys(),
"Specifier '%s' not found in any preceeding 'define { ... }' section." % pattern_name,
stream)
reference = PatternDict[pattern_name]
assert reference.__class__.__name__ == "PatternShorthand"
# The replacement may be a state machine or a number set
if StateMachineF:
# Get a cloned version of state machine
state_machine = reference.get_state_machine()
assert isinstance(state_machine, StateMachine)
# It is essential that state machines defined as patterns do not
# have origins. Otherwise, the optimization of patterns that
# contain pattern replacements might get confused and can
# not find all optimizations.
assert state_machine.has_origins() == False
# A state machine, that contains pre- or post- conditions cannot be part
# of a replacement. The addition of new post-contexts would mess up the pattern.
## if state_machine.has_pre_or_post_context():
## error_msg("Pre- or post-conditioned pattern was used in replacement.\n" + \
## "Quex's regular expression grammar does not allow this.", stream)
return state_machine
else:
# Get a cloned version of character set
character_set = reference.get_character_set()
if character_set is None:
error_msg("Replacement in character set expression must be a character set.\n"
"Specifier '%s' relates to a pattern state machine." % pattern_name, stream)
if character_set.is_empty():
error_msg("Referenced character set '%s' is empty.\nAborted." % pattern_name, stream)
return character_set