本文整理汇总了Python中sre_parse.parse函数的典型用法代码示例。如果您正苦于以下问题:Python parse函数的具体用法?Python parse怎么用?Python parse使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了parse函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test
def test():
import ClearMap.Utils.InverseRegularExpression as ire;
import sre_parse as sre;
reload(ire)
source = '/test/test_(?P<row>\d{4})_(?P<col>\d{3}).tif';
p = sre.parse(source);
ire.patternToExpression(p)
reload(ire)
source = r'/test/test_(?:\d)_(?P<col>\d{3})_[7-9][.](?=col)tif$';
p = sre.parse(source);
ire.patternToExpression(p)
示例2: charclass_runner
def charclass_runner(pat):
r = Regex().get_parse_tree(pat)
regexlint_version = r.children[0].matching_character_codes
sre_parsed = sre_parse.parse(pat)
print(sre_parsed)
if isinstance(sre_parsed[0][1], int):
sre_chars = sre_parsed
else:
sre_chars = sre_parsed[0][1]
print('inner', sre_chars)
golden = list(expand_sre_in(sre_chars))
order_matters = True
try:
if (sre_parsed[0][0] == sre_constants.NOT_LITERAL or
sre_parsed[0][1][0][0] == sre_constants.NEGATE):
golden = [i for i in range(256) if i not in golden]
order_matters = False
except TypeError:
pass
print('sre_parse', golden)
print('regexlint', regexlint_version)
if order_matters:
assert golden == regexlint_version
else:
print('extra:', sorted(set(regexlint_version) - set(golden)))
print('missing:', sorted(set(golden) - set(regexlint_version)))
assert sorted(golden) == sorted(regexlint_version)
示例3: reverse_group_map
def reverse_group_map(re_str):
r = re.compile(re_str)
ast = sre_parse.parse(re_str)
group_indices = r.groupindex
group_index_map = dict((index, group)
for (group, index) in r.groupindex.items())
return group_index_map
示例4: compile
def compile(p, flags=0):
# internal: convert pattern list to internal format
if isstring(p):
pattern = p
p = sre_parse.parse(p, flags)
else:
pattern = None
code = _code(p, flags)
# print code
# XXX: <fl> get rid of this limitation!
if p.pattern.groups > 100:
raise AssertionError(
"sorry, but this version only supports 100 named groups"
)
# map in either direction
groupindex = p.pattern.groupdict
indexgroup = [None] * p.pattern.groups
for k, i in groupindex.items():
indexgroup[i] = k
return _sre.compile(
pattern, flags | p.pattern.flags, code,
p.pattern.groups - 1,
groupindex, indexgroup
)
示例5: isFileExpression
def isFileExpression(source):
"""Checks if filename is a regular expression denoting a file list
Arguments:
source (str): source file name
Returns:
bool: True if source is true regular expression with at least one non-literal
Note:
The any character '.' is not treated as a non-literal because of possible filename extensions
"""
if not isinstance(source, basestring):
return False;
if isFile(source):
return False;
else:
#searchRegex = re.compile('.*\\\\d\{(?P<digit>\d)\}.*').search
#m = searchRegex(source);
#if m is None:
# return False;
#else:
# return True;
#parse regular expression
p = sre_parse.parse(source);
for l in p:
#note: allow for a filname.ext patterns although this is strictly a regular expression which should be denoted as filename\.ext
if l[0] != 'literal' and l[0] != 'any':
return True;
return False;
示例6: compile_regexp_to_noncapturing
def compile_regexp_to_noncapturing(pattern, flags=0):
"""
Convert all grouping parentheses in the given regexp pattern to
non-capturing groups, and return the result. E.g.:
>>> from nltk.internals import compile_regexp_to_noncapturing
>>> compile_regexp_to_noncapturing('ab(c(x+)(z*))?d')
'ab(?:c(?:x+)(?:z*))?d'
:type pattern: str
:rtype: str
"""
def convert_regexp_to_noncapturing_parsed(parsed_pattern):
res_data = []
for key, value in parsed_pattern.data:
if key == sre_constants.SUBPATTERN:
index, subpattern = value
value = (None, convert_regexp_to_noncapturing(subpattern))
elif key == sre_constants.GROUPREF:
raise ValueError('Regular expressions with back-references are not supported: {0}'.format(pattern))
res_data.append((key, value))
parsed_pattern.data = res_data
parsed_pattern.pattern.groups = 1
parsed_pattern.pattern.groupdict = {}
return parsed_pattern
return sre_compile.compile(convert_regexp_to_noncapturing_parsed(sre_parse.parse(pattern)))
示例7: regex_score
def regex_score(regex, search_string):
"""
Returns a closeness score of how well the regex matches the string.
Will return -1 if it doesn't match.
"""
match = re.search(regex, search_string)
if match:
# Base score is the longest distance between regex, match,
# and search_string
regex_match_dist = levenshtein_distance(
regex.pattern.lower(), match.group(0).lower())
match_string_dist = levenshtein_distance(
match.group(0).lower(), search_string.lower())
score = max(regex_match_dist, match_string_dist)
# Adjust score: Special anchors slightly reduce distance
for opcode, argument in sre_parse.parse(regex.pattern):
if str(opcode) == 'AT':
if str(argument) == 'AT_BEGINNING' or 'AT_END':
# ^ or $, adjust 1 edit
score -= 1
if str(argument) == 'AT_BOUNDARY':
# all other anchors reduce 2 edits
score -= 2
return score if score >= 0 else 0
else:
return -1
示例8: re_replace_literals
def re_replace_literals(text, mapping):
"""Raises NotImplementedError or re.error"""
assert isinstance(text, unicode)
pattern = sre_parse.parse(text)
return _construct_regexp(pattern, mapping)
示例9: __init__
def __init__(self, lexicon, flags=FLAGS):
self.actions = [None]
# combine phrases into a compound pattern
s = sre_parse.Pattern()
s.flags = flags
p = []
# NOTE(kgibbs): These lines must be added to make this file work under
# Python 2.2, which is commonly used at Google.
def enumerate(obj):
i = -1
for item in obj:
i += 1
yield i, item
# NOTE(kgibbs): End changes.
for idx, token in enumerate(lexicon):
phrase = token.pattern
try:
subpattern = sre_parse.SubPattern(s,
[(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])
except sre_constants.error:
raise
p.append(subpattern)
self.actions.append(token)
s.groups = len(p)+1 # NOTE(guido): Added to make SRE validation work
p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
self.scanner = sre_compile.compile(p)
示例10: clean_pattern
def clean_pattern(pattern):
"""
Cleans URL patterns
* pattern => token
* '2' => ('literal', 50)
* '2|3' => ('in', [('literal', 50), ('literal', 51)])
"""
star = '*'
parsed = sre_parse.parse(pattern)
literals = []
for token in parsed:
if token[0] == LITERAL:
character = quote(unichr(token[1]).encode('utf8'))
literals.append(character)
elif token[0] == AT:
pass
elif literals[-1:] != [star]:
literals.append(star)
rule = '/' + ''.join(literals)
if parsed and not rule.endswith(star):
if parsed[-1] == (AT, AT_END):
rule += '$'
else:
rule += star
return rule
示例11: compile
def compile(p, flags=0):
# internal: convert pattern list to internal format
if isstring(p):
pattern = p
p = sre_parse.parse(p, flags)
else:
pattern = None
code = _code(p, flags)
if flags & SRE_FLAG_DEBUG:
print()
dis(code)
# map in either direction
groupindex = p.state.groupdict
indexgroup = [None] * p.state.groups
for k, i in groupindex.items():
indexgroup[i] = k
return _sre.compile(
pattern, flags | p.state.flags, code,
p.state.groups-1,
groupindex, tuple(indexgroup)
)
示例12: charclass_runner
def charclass_runner(pat):
r = Regex().get_parse_tree(pat)
regexlint_version = r.children[0].matching_character_codes
sre_parsed = sre_parse.parse(pat)
print sre_parsed
if isinstance(sre_parsed[0][1], int):
sre_chars = sre_parsed
else:
sre_chars = sre_parsed[0][1]
golden = list(expand_sre_in(sre_chars))
order_matters = True
try:
if (sre_parsed[0][0] == 'not_literal' or
sre_parsed[0][1][0][0] == 'negate'):
golden = [i for i in range(256) if i not in golden]
order_matters = False
except TypeError:
pass
print golden
print regexlint_version
if order_matters:
assert golden == regexlint_version
else:
assert sorted(golden) == sorted(regexlint_version)
示例13: compile
def compile(p, flags=0):
# internal: convert pattern list to internal format
if type(p) in STRING_TYPES:
import sre_parse
pattern = p
p = sre_parse.parse(p, flags)
else:
pattern = None
code = _code(p, flags)
# print code
# FIXME: <fl> get rid of this limitation!
assert p.pattern.groups <= 100,\
"sorry, but this version only supports 100 named groups"
# map in either direction
groupindex = p.pattern.groupdict
indexgroup = [None] * p.pattern.groups
for k, i in groupindex.items():
indexgroup[i] = k
return _sre.compile(
pattern, flags, code,
p.pattern.groups-1,
groupindex, indexgroup
)
示例14: doReversing
def doReversing(p):
# p = 'ab*de.gh+i{10}'
# p = re.compile()
dbg("Pattern:" + p)
pattern = sre_parse.parse(p, 0)
out = reverse(pattern)
return out
示例15: regex_slice
def regex_slice(expr, start, end):
"""
Get a slice of a regex by calling regex_index on each index.
Note that this can return expressions that are overly general: for example,
it can mix characters from both branches of a regex. Being more specific
than that would take more work.
>>> regex_slice('test', 0, 1)
't'
>>> regex_slice('t?est', 0, 2)
'[te][es]'
>>> regex_slice('mo+', 3, 8)
'ooooo'
"""
if start < 0 or end < 0:
raise NotImplementedError("Can't take negative slices of a regex yet")
result = ''
for index in range(start, end):
choices = _regex_index_pattern(parse(expr), index)
if len(choices) == 0:
return None
elif len(choices) == 1:
regex = unparse(choices[0])
result += regex
else:
regex = round_trip(unparse(('branch', (None, choices))))
if '|' in regex:
result += '(%s)' % (regex,)
else:
result += regex
return result