本文整理汇总了Python中re.Scanner方法的典型用法代码示例。如果您正苦于以下问题:Python re.Scanner方法的具体用法?Python re.Scanner怎么用?Python re.Scanner使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类re
的用法示例。
在下文中一共展示了re.Scanner方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scanner
# 需要导入模块: import re [as 别名]
# 或者: from re import Scanner [as 别名]
def scanner(cls):
if not getattr(cls, '_scanner', None):
def h(tpe):
return lambda sc, tk: cls.Token(tpe, tk)
cls._scanner = re.Scanner([
(r"(--|//).*?$", h(cls.LINE_COMMENT)),
(r"\/\*.+?\*\/", h(cls.BLOCK_COMMENT)),
(r'"(?:[^"\\]|\\.)*"', h(cls.STRING)),
(r"'(?:[^'\\]|\\.)*'", h(cls.STRING)),
(r"\$\$(?:[^\$\\]|\\.)*\$\$", h(cls.STRING)),
(r";", h(cls.SEMICOLON)),
(r"\s+", h(cls.WHITESPACE)),
(r".", h(cls.OTHER))
], re.MULTILINE | re.DOTALL)
return cls._scanner
示例2: test_scanner
# 需要导入模块: import re [as 别名]
# 或者: from re import Scanner [as 别名]
def test_scanner(self):
def s_ident(scanner, token): return token
def s_operator(scanner, token): return "op%s" % token
def s_float(scanner, token): return float(token)
def s_int(scanner, token): return int(token)
scanner = Scanner([
(r"[a-zA-Z_]\w*", s_ident),
(r"\d+\.\d*", s_float),
(r"\d+", s_int),
(r"=|\+|-|\*|/", s_operator),
(r"\s+", None),
])
self.assertTrue(scanner.scanner.scanner("").pattern)
self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
(['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
'op+', 'bar'], ''))
示例3: test_scanner
# 需要导入模块: import re [as 别名]
# 或者: from re import Scanner [as 别名]
def test_scanner(self):
def s_ident(scanner, token): return token
def s_operator(scanner, token): return "op%s" % token
def s_float(scanner, token): return float(token)
def s_int(scanner, token): return int(token)
scanner = Scanner([
(r"[a-zA-Z_]\w*", s_ident),
(r"\d+\.\d*", s_float),
(r"\d+", s_int),
(r"=|\+|-|\*|/", s_operator),
(r"\s+", None),
])
self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
(['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
'op+', 'bar'], ''))
示例4: test_unlimited_zero_width_repeat
# 需要导入模块: import re [as 别名]
# 或者: from re import Scanner [as 别名]
def test_unlimited_zero_width_repeat(self):
# Issue #9669
self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
# def test_scanner(self):
# def s_ident(scanner, token): return token
# def s_operator(scanner, token): return "op%s" % token
# def s_float(scanner, token): return float(token)
# def s_int(scanner, token): return int(token)
#
# scanner = Scanner([
# (r"[a-zA-Z_]\w*", s_ident),
# (r"\d+\.\d*", s_float),
# (r"\d+", s_int),
# (r"=|\+|-|\*|/", s_operator),
# (r"\s+", None),
# ])
#
# self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
#
# self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
# (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
# 'op+', 'bar'], ''))
示例5: __init__
# 需要导入模块: import re [as 别名]
# 或者: from re import Scanner [as 别名]
def __init__(self):
def identifier(scanner, token):
return 'IDENT', token
def operator(scanner, token):
return 'OPERATOR', token
def digit(scanner, token):
return 'DIGIT', token
def open_brace(scanner, token):
return 'OPEN_BRACE', token
def close_brace(scanner, token):
return 'CLOSE_BRACE', token
self.scanner = re.Scanner([
(r'[a-zA-Z_]\w*', identifier),
#(r'-*[0-9]+(\.[0-9]+)?', digit), # won't work for .34
#(r'[-+]?[0-9]*\.?[0-9]+', digit), # won't work for 4.56e-2
#(r'[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?', digit),
(r'-*[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?', digit),
(r'}', close_brace),
(r'}', close_brace),
(r'{', open_brace),
(r':', None),
(r'\s+', None)
])
示例6: _tokenize_constraint
# 需要导入模块: import re [as 别名]
# 或者: from re import Scanner [as 别名]
def _tokenize_constraint(string, variable_names):
lparen_re = r"\("
rparen_re = r"\)"
op_re = "|".join([re.escape(op.token_type) for op in _ops])
num_re = r"[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?"
whitespace_re = r"\s+"
# Prefer long matches:
variable_names = sorted(variable_names, key=len, reverse=True)
variable_re = "|".join([re.escape(n) for n in variable_names])
lexicon = [
(lparen_re, _token_maker(Token.LPAREN, string)),
(rparen_re, _token_maker(Token.RPAREN, string)),
(op_re, _token_maker("__OP__", string)),
(variable_re, _token_maker("VARIABLE", string)),
(num_re, _token_maker("NUMBER", string)),
(whitespace_re, None),
]
scanner = re.Scanner(lexicon)
tokens, leftover = scanner.scan(string)
if leftover:
offset = len(string) - len(leftover)
raise PatsyError("unrecognized token in constraint",
Origin(string, offset, offset + 1))
return tokens
示例7: __init__
# 需要导入模块: import re [as 别名]
# 或者: from re import Scanner [as 别名]
def __init__(self):
valid_fnchars = r"A-Za-z0-9_%#~@/\$!\*\.\+\-\:"
tokens = [
(r"[%s]+" % valid_fnchars, lambda scanner, token: Token(Token.STR, token)),
(
r'"[%s ]+"' % valid_fnchars,
lambda scanner, token: Token(Token.QSTR, token[1:-1]),
),
(r"[ ]", lambda scanner, token: None),
]
self.scanner = re.Scanner(tokens)
示例8: __init__
# 需要导入模块: import re [as 别名]
# 或者: from re import Scanner [as 别名]
def __init__(self, query, default_conjunction='AND'):
self.query = query
self.default_conjunction = default_conjunction
def yield_symbol(symbol_type):
def callback(scanner, token):
return (symbol_type, token)
return callback
def yield_string(scanner, token):
return ('STRING', token[1:-1].lower())
def yield_simple_string(scanner, token):
return ('STRING', token.lower())
self.scanner = re.Scanner([
(r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"', yield_string),
(r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'", yield_string),
(r'\bAND\b', yield_symbol('AND')),
(r'\bOR\b', yield_symbol('OR')),
(r'[@_\-\w]+', yield_simple_string),
(r'&', yield_symbol('AND')),
(r'\|', yield_symbol('OR')),
(r'\(', yield_symbol('LPAREN')),
(r'\)', yield_symbol('RPAREN')),
(r'\s+', None),
], re.U)
示例9: _tokenize
# 需要导入模块: import re [as 别名]
# 或者: from re import Scanner [as 别名]
def _tokenize(layout: Layout, mv_string: str):
# Get the names of the canonical blades
blade_name_index_map = {name: index for index, name in enumerate(layout.names)}
tokenizer = re.Scanner([(
r'\s+',
lambda s, t: ('space', s.match, None)
), (
r'\(',
lambda s, t: ('(', s.match, None)
), (
r'\)',
lambda s, t: (')', s.match, None)
), (
r'[+-]',
lambda s, t: ('sign', s.match, 1 if t == '+' else -1)
), (
_unsigned_float_pattern,
lambda s, t: ('coeff', s.match, float(t))
), (
r'\^',
lambda s, t: ('wedge', s.match, None)
), (
r'\b(?:{})\b'.format('|'.join(
re.escape(name)
for name in layout.names
if name
)),
lambda s, t: ('blade', s.match, blade_name_index_map[t])
), (
r'.',
lambda s, t: ('unrecognized', s.match, None)
)])
tokens, rest = tokenizer.scan(mv_string)
assert not rest # our unrecognized match should handle this
return tokens + [
('end', re.compile(r'$').match(mv_string, len(mv_string)), None)
]
示例10: tokenize_inner
# 需要导入模块: import re [as 别名]
# 或者: from re import Scanner [as 别名]
def tokenize_inner(contents):
"""
Scan a string and return a list of Token objects representing the contents
of the cmake listfile.
"""
# Regexes are in priority order. Changing the order may alter the
# behavior of the lexer
scanner = re.Scanner([
# Optional group brackets
(r"\[", lambda s, t: (TokenType.LSQ_BRACKET, t)),
(r"\]", lambda s, t: (TokenType.RSQ_BRACKET, t)),
# Mandatory group brackets
("<", lambda s, t: (TokenType.LA_BRACKET, t)),
(">", lambda s, t: (TokenType.RA_BRACKET, t)),
# Parenthesis
(r"\(", lambda s, t: (TokenType.LPAREN, t)),
(r"\)", lambda s, t: (TokenType.RPAREN, t)),
# Pipe character
(r"\|", lambda s, t: (TokenType.PIPE, t)),
# uppercase name
(r"[A-Z0-9_]+", lambda s, t: (TokenType.BIGNAME, t)),
# lowercase name
(r"[a-z0-9_\-]+", lambda s, t: (TokenType.SMALLNAME, t)),
# ellipsis
(r"\.\.\.", lambda s, t: (TokenType.ELLIPSIS, t)),
# whitespace
(r"\s+", lambda s, t: (TokenType.WHITESPACE, t)),
], re.DOTALL)
tokens, remainder = scanner.scan(contents)
if remainder:
raise ValueError("Unparsed tokens: {}".format(remainder))
return tokens