本文整理汇总了Python中sre_constants.SUBPATTERN属性的典型用法代码示例。如果您正苦于以下问题:Python sre_constants.SUBPATTERN属性的具体用法?Python sre_constants.SUBPATTERN怎么用?Python sre_constants.SUBPATTERN使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类sre_constants
的用法示例。
在下文中一共展示了sre_constants.SUBPATTERN属性的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import sre_constants [as 别名]
# 或者: from sre_constants import SUBPATTERN [as 别名]
def __init__(self, lexicon, flags=0):
from sre_constants import BRANCH, SUBPATTERN
if isinstance(flags, RegexFlag):
flags = flags.value
self.lexicon = lexicon
# combine phrases into a compound pattern
p = []
s = sre_parse.Pattern()
s.flags = flags
for phrase, action in lexicon:
gid = s.opengroup()
p.append(sre_parse.SubPattern(s, [
(SUBPATTERN, (gid, 0, 0, sre_parse.parse(phrase, flags))),
]))
s.closegroup(gid, p[-1])
p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
self.scanner = sre_compile.compile(p)
示例2: __init__
# 需要导入模块: import sre_constants [as 别名]
# 或者: from sre_constants import SUBPATTERN [as 别名]
def __init__(self, lexicon, flags=0):
from sre_constants import BRANCH, SUBPATTERN
self.lexicon = lexicon
# combine phrases into a compound pattern
p = []
s = sre_parse.Pattern()
s.flags = flags
for phrase, action in lexicon:
p.append(sre_parse.SubPattern(s, [
(SUBPATTERN, (len(p)+1, sre_parse.parse(phrase, flags))),
]))
s.groups = len(p)+1
p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
self.scanner = sre_compile.compile(p)
示例3: _compile
# 需要导入模块: import sre_constants [as 别名]
# 或者: from sre_constants import SUBPATTERN [as 别名]
def _compile(regexp):
parsed = sre_parse.parse(regexp)
parsed = _remove_group_identifiers(parsed)
# Add grouping parentheses around the regexp; this will allow
# us to access the material that was split on.
# Need to set the Pattern to expect a single group
pattern = sre_parse.Pattern()
pattern.groups += 1
grouped = sre_parse.SubPattern(pattern)
grouped.append((sre_constants.SUBPATTERN, (1, parsed)))
return sre_compile.compile(grouped, re.UNICODE | re.MULTILINE | re.DOTALL)
示例4: __init__
# 需要导入模块: import sre_constants [as 别名]
# 或者: from sre_constants import SUBPATTERN [as 别名]
def __init__(self, lexicon, flags=0):
from sre_constants import BRANCH, SUBPATTERN
self.lexicon = lexicon
# combine phrases into a compound pattern
p = []
s = sre_parse.Pattern()
s.flags = flags
for phrase, action in lexicon:
gid = s.opengroup()
p.append(sre_parse.SubPattern(s, [
(SUBPATTERN, (gid, sre_parse.parse(phrase, flags))),
]))
s.closegroup(gid, p[-1])
p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
self.scanner = sre_compile.compile(p)
示例5: _make_match_string_from_pattern
# 需要导入模块: import sre_constants [as 别名]
# 或者: from sre_constants import SUBPATTERN [as 别名]
def _make_match_string_from_pattern(parsetree, makebad=False, groups=None):
collect = []
if groups is None:
groups = {}
for op, val in parsetree:
if op is sre_constants.LITERAL:
if makebad:
collect.append(chr((val ^ 4) & 0xFF)) # flip bit 4
if random.randint(0,9) == 0:
makebad = False # don't error everything
else:
collect.append(chr(val))
elif op is sre_constants.CATEGORY:
collect.append(get_substitute(val, makebad))
elif op is sre_constants.IN:
if val[0][0] is sre_constants.CATEGORY:
collect.append(_make_match_string_from_pattern(val, False, groups))
else:
collect.append(chr(random.choice(val)[1]))
elif op is sre_constants.BRANCH:
collect.append(_make_match_string_from_pattern(val[1][random.randint(0,1)], False, groups))
elif op is sre_constants.SUBPATTERN:
string = _make_match_string_from_pattern(val[1], False, groups)
groups[val[0]] = string
collect.append(string)
elif op is sre_constants.MAX_REPEAT or op is sre_constants.MIN_REPEAT:
for i in xrange(random.randint(val[0], min(val[1], 10))):
collect.append(_make_match_string_from_pattern(val[2], False, groups))
elif op is sre_constants.ANY:
collect.append(random.choice(ANYCHAR))
elif op is sre_constants.GROUPREF:
collect.append(groups[val])
elif op is sre_constants.AT:
pass # ignore anchors
else:
raise UnhandledOpError("Unhandled RE op: %r" % (op,))
if makebad: # in case it didn't get done yet.
collect.insert(random.randrange(0, len(collect)), random.choice(ascii.printable))
return "".join(collect)
示例6: __init__
# 需要导入模块: import sre_constants [as 别名]
# 或者: from sre_constants import SUBPATTERN [as 别名]
def __init__(self, lexicon, flags=0):
from sre_constants import BRANCH, SUBPATTERN
self.lexicon = lexicon
# combine phrases into a compound pattern
p = []
s = sre_parse.Pattern()
s.flags = flags
for phrase, action in lexicon:
p.append(sre_parse.SubPattern(s, [
(SUBPATTERN, (len(p)+1, sre_parse.parse(phrase, flags))),
]))
s.groups = len(p)+1
p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
self.scanner = sre_compile.compile(p)
示例7: __init__
# 需要导入模块: import sre_constants [as 别名]
# 或者: from sre_constants import SUBPATTERN [as 别名]
def __init__(self, pattern, flags=0, charset=CHARSET, max_count=None):
# If the RE module cannot compile it, we give up quickly
self.matcher = re.compile(r'(?:%s)\Z' % pattern, flags)
if not flags & re.DOTALL:
charset = ''.join(c for c in charset if c != '\n')
self.charset = charset
self.named_group_lookup = self.matcher.groupindex
if flags & re.IGNORECASE:
raise ParseError('Flag "i" not supported. https://github.com/google/sre_yield/issues/4')
elif flags & re.UNICODE:
raise ParseError('Flag "u" not supported. https://github.com/google/sre_yield/issues/3')
elif flags & re.LOCALE:
raise ParseError('Flag "l" not supported. https://github.com/google/sre_yield/issues/5')
if max_count is None:
self.max_count = MAX_REPEAT_COUNT
else:
self.max_count = max_count
self.has_groupref = False
# Configure the parser backends
self.backends = {
sre_constants.LITERAL: lambda y: [chr(y)],
sre_constants.RANGE: lambda l, h: [chr(c) for c in range(l, h+1)],
sre_constants.SUBPATTERN: self.maybe_save,
sre_constants.BRANCH: self.branch_values,
sre_constants.MIN_REPEAT: self.max_repeat_values,
sre_constants.MAX_REPEAT: self.max_repeat_values,
sre_constants.AT: self.empty_list,
sre_constants.ASSERT: self.empty_list,
sre_constants.ASSERT_NOT: self.empty_list,
sre_constants.ANY:
lambda _: self.in_values(((sre_constants.NEGATE,),)),
sre_constants.IN: self.in_values,
sre_constants.NOT_LITERAL: self.not_literal,
sre_constants.CATEGORY: self.category,
sre_constants.GROUPREF: self.groupref,
}
# Now build a generator that knows all possible patterns
self.raw = self.sub_values(sre_parse.parse(pattern, flags))
# Configure this class instance to know about that result
self.length = self.raw.__len__()
示例8: _remove_group_identifiers
# 需要导入模块: import sre_constants [as 别名]
# 或者: from sre_constants import SUBPATTERN [as 别名]
def _remove_group_identifiers(parsed_re):
"""
Modifies the given parsed regular expression, replacing all groupings
(as indicated by parenthesis in the regular expression string) with
non-grouping variants (indicated with '(?:...)'). This works on the
output of sre_parse.parse, modifing the group indentifier in
SUBPATTERN structures to None.
@param parsed_re: the output of sre_parse.parse(string)
@type parsed_re: C{SubPattern}
"""
if isinstance(parsed_re, sre_parse.SubPattern):
# If it's a SubPattern, replace each item with its processed
# equivalent. These classes are mutable, so that in-place
# modification is allowed.
for i in range(len(parsed_re)):
parsed_re[i] = _remove_group_identifiers(parsed_re[i])
return parsed_re
elif isinstance(parsed_re, list) or isinstance(parsed_re, tuple):
# Otherwise, if it's a sequence, check for the tell-tale
# SUBPATTERN item and repair the sub item if needed
to_process = list(parsed_re)
if to_process[0] == sre_constants.SUBPATTERN:
# replace next int with None
sub_item = list(to_process[1])
sub_item[0] = None
to_process[1] = tuple(sub_item)
# Process each item, in the case of nested SUBPATTERNS
processed = map(_remove_group_identifiers, to_process)
# Coerce back into the original type
if isinstance(parsed_re, list):
return processed
else:
return tuple(processed)
else:
# Don't need to do anything to other types
return parsed_re
# Replace any grouping parentheses with non-grouping ones. We
# need to do this, because the list returned by re.sub will
# contain an element corresponding to every set of grouping
# parentheses. We must not touch escaped parentheses, and
# need to handle the case of escaped escapes (e.g. "\\(").
# We also need to handle nested parentheses, which means our
# regexp contexts must be zero-width. There are also issues with
# parenthesis appearing in bracketed contexts, hence we've
# operated on the intermediate parse structure from sre_parse.
示例9: check_anchor_state
# 需要导入模块: import sre_constants [as 别名]
# 或者: from sre_constants import SUBPATTERN [as 别名]
def check_anchor_state(self, matcher, arguments):
# A bit of a hack to support zero-width leading anchors. The goal is
# that /^(a|b)$/ will match properly, and that /a^b/ or /a\bb/ throws
# an error. (It's unfortunate that I couldn't easily handle /$^/ which
# matches the empty string; I went for the common case.)
#
# There are three states, for example:
# / STATE_START
# | / STATE_START (^ causes no transition here, but is illegal at STATE_MIDDLE or STATE_END)
# | | / STATE_START (\b causes no transition here, but advances MIDDLE to END)
# | | | / (same as above for ^)
# | | | | / STATE_MIDDLE (anything besides ^ and \b advances START to MIDDLE)
# | | | | | / still STATE_MIDDLE
# . . . . . . / advances MIDDLE to END
# ^ \b ^ X Y \b $
if self.state == STATE_START:
if matcher == sre_constants.AT:
if arguments[0] in (sre_constants.AT_END, sre_constants.AT_END_STRING):
self.state = STATE_END
elif arguments[0] == sre_constants.AT_NON_BOUNDARY:
# This is nonsensical at beginning of string
raise ParseError("Anchor %r found at START state" % (arguments[0],))
# All others (AT_BEGINNING, AT_BEGINNING_STRING, and AT_BOUNDARY) remain in START.
elif matcher != sre_constants.SUBPATTERN:
self.state = STATE_MIDDLE
# subpattern remains in START
elif self.state == STATE_END:
if matcher == sre_constants.AT:
if arguments[0] not in (
sre_constants.AT_END,
sre_constants.AT_END_STRING,
sre_constants.AT_BOUNDARY,
):
raise ParseError("Anchor %r found at END state" % (arguments[0],))
# those three remain in END
elif matcher != sre_constants.SUBPATTERN:
raise ParseError(
"Non-end-anchor %r found at END state" % (arguments[0],)
)
# subpattern remains in END
else: # self.state == STATE_MIDDLE
if matcher == sre_constants.AT:
if arguments[0] not in (
sre_constants.AT_END,
sre_constants.AT_END_STRING,
sre_constants.AT_BOUNDARY,
):
raise ParseError(
"Anchor %r found at MIDDLE state" % (arguments[0],)
)
# All others (AT_END, AT_END_STRING, AT_BOUNDARY) advance to END.
self.state = STATE_END