本文整理匯總了Python中re.Scanner類的典型用法代碼示例。如果您正苦於以下問題:Python Scanner類的具體用法?Python Scanner怎麽用?Python Scanner使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了Scanner類的14個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_scanner
def test_scanner(self):
def s_ident(scanner, token):
return token
def s_operator(scanner, token):
return "op%s" % token
def s_float(scanner, token):
return float(token)
def s_int(scanner, token):
return int(token)
scanner = Scanner(
[
(r"[a-zA-Z_]\w*", s_ident),
(r"\d+\.\d*", s_float),
(r"\d+", s_int),
(r"=|\+|-|\*|/", s_operator),
(r"\s+", None),
]
)
self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
self.assertEqual(
scanner.scan("sum = 3*foo + 312.50 + bar"),
(["sum", "op=", 3, "op*", "foo", "op+", 312.5, "op+", "bar"], ""),
)
示例2: scan
def scan(self, input):
"""Preforms the scan of the input and outputs any errors including
line on which the lexical error occured."""
scanner = Scanner([
(r"[\n]", self.newline),
(r"\"[^\"\n]*\"", self.string),
(r"\'[^\'\n]*\'", self.string),
(r"\b(if|fi|else|do|od|fa|af|to|proc)\b", self.key),
(r"\b(end|return|forward|var|type|break)\b", self.key),
(r"\b(exit|true|false|writes|write|read)\b", self.key),
(r"[A-Za-z][A-Za-z0-9_]*", self.identifier),
(r"\-\>|\(|\)|\[\]|\[|\]|;|:\=|:|\,", self.symbol),
(r"\+|\-|\/|\*|\=|\%|!\=|\>=|\<=|\>|\<|\?", self.operator),
(r"[0-9]+", self.integer),
(r"#.*(?=\n?)", self.ignore),
(r"[\t ]+", self.ignore),
])
tokens, remainder = scanner.scan(input)
tokens.append(('EOF', 'EOF'))
if remainder:
print "line %s: illegal character (%s)" % (
self.line, remainder[:1])
sys.exit(1)
else:
return tokens
示例3: __init__
def __init__(self, s):
self.string = s.strip()
Scanner.__init__(self,
[(r"/([^/\\]|\\.)*/", self.regexp),
(r'"([^"\\]|\\.)*"', self.str_to_re),
(r"'([^'\\]|\\.)*'", self.str_to_re),
(r"([<>]=?)|(!=)", self.relop),
(r"[=|()&!,#]", self.table),
(r"\s+", None),
(r"[^=)|&#/<>!,]+", self.tag)
])
示例4: read_sexp
def read_sexp(sexp):
processor = TokenProcessor(sexp)
scanner = Scanner([
(r"\s+", processor("skip_whitespaces")),
(r";[^\n]*\n", processor("skip")),
(r""""(?:[^"])*"|(\]|\[|\)|\(|[^\(\)\s]+)""", processor("atom")),
(r".*", processor("error"))
], re.M)
scanner.scan(processor.string)
if processor.paren_stack:
processor.raise_error("missing closing parenthesis.")
result = eval("".join(processor.result).lstrip(","))
return (isinstance(result, tuple) and (result[0],0) or (result,0))[0]
示例5: scan
def scan(self, string):
"""
Scans an input string for tokens, and returns them.
"""
scanner = Scanner([
(self.constant_signs, lambda _, tok: (self.constant_type, tok)),
(self.numerical_variables, lambda _, tok: (self.numerical_type, tok)),
(self.sentntial_variables, lambda _, tok: (self.sentntial_type, tok)),
(self.predicate_variables, lambda _, tok: (self.predicate_type, tok))])
tokens, remainder = scanner.scan(string)
if remainder:
if len(remainder) > 10:
remainder = remainder[:10]
raise LexicalException("Error lexing input near {0}...".format(remainder))
return tokens
示例6: read
def read(self, value):
self.result = []
self.paren_stack = []
self.source = value
self.pos = 0
self.quoted = False
self.scanner = Scanner(
[
(r"\s+", self("skip")),
(r";[^\n]*\n", self("skip")),
(r""""(((?<=\\)")|[^"])*((?<!\\)")""", self("str")),
(r"(\(|\[)", self("open")),
(r"(\)|\])", self("close")),
(r"(([\d]+|(((\d+)?\.[\d]+)|([\d]+\.)))e[\+\-]?[\d]+)|(((\d+)?\.[\d]+)|([\d]+\.))", self("number")),
(r"\-?((0x[\da-f]+)|(0[0-7]+)|([1-9][\d]*)|0)[l]?", self("number")),
(r"""%s([^\(\[\)\]\s"]+)""" % self.symbol_marker, self("symbol")),
(r"'", self("quote")),
(r"""([^\(\[\)\]\s"]+)""", self("ident")),
(r"""".*""", self("unterm_str")),
(r".*", self("unknown_token")),
],
re.M | re.S | re.I,
)
self.scanner.scan(self.source)
if self.paren_stack:
self.raise_error("missing closing parenthesis.")
return self.parse(self.result)
示例7: vt_parse
def vt_parse(str):
# We'll memoise this function so several calls on the same input don't
# require re-parsing.
if(str in vt_parse.memory):
return vt_parse.memory[str]
# Use the built in re.Scanner to tokenise the input string.
def s_lbrace(scanner, token): return ("LBRACE", token)
def s_rbrace(scanner, token): return ("RBRACE", token)
def s_comma(scanner, token): return ("COMMA", token)
def s_varname(scanner, token): return ("VAR", token)
scanner = Scanner([
(r'{', s_lbrace),
(r'}', s_rbrace),
(r',', s_comma),
(r'[a-zA-Z_]\w*', s_varname),
(r'\s+', None)
])
tokens = scanner.scan(str)
# tokens is a pair of the tokenised string and any "uneaten" part.
# check the entire string was eaten.
if(tokens[1] != ''):
print "Could not read the variable tree given:"
print str
#print "could not lex: " + tokens[1].__str__()
exit()
tokens = tokens[0] # Just the list of tokens.
p = Parser()
try:
tree = p.parse(tokens)
except p.ParseErrors, e:
print "Could not read the variable tree given:"
print str
exit()
示例8: test_scanner
def test_scanner(self):
def s_ident(scanner, token): return token
def s_operator(scanner, token): return "op%s" % token
def s_float(scanner, token): return float(token)
def s_int(scanner, token): return int(token)
from re import Scanner
scanner = Scanner([
(r"[a-zA-Z_]\w*", s_ident),
(r"\d+\.\d*", s_float),
(r"\d+", s_int),
(r"=|\+|-|\*|/", s_operator),
(r"\s+", None),
])
self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
(['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
'op+', 'bar'], ''))
示例9: __init__
class Parser:
def __init__(self):
self.s1 = Scanner((
(r'^@@', self.got),
(r'aa', self.got),
))
def write_all(self, text):
D("scan %r", self.s1.scan(text))
def got(self, text):
D("GOT %r", text)
示例10: lex_source
def lex_source(source):
"""
Lexes the source into ice9 tokens. Returns a list of
(token type, token string) pairs.
May raise a ValueError in case of a syntax error.
"""
scanner_tokens = [(regex, make_token(typ)) for typ, regex in TOKENS]
scanner = Scanner(scanner_tokens)
# use python's scanner class to tokenize the input
tokenized, unused = scanner.scan(source)
if unused != '':
# unexpected character broke the flow!
lineno = sum(1 for typ,tok in tokenized if typ == 'newline') + 1
raise Ice9LexicalError(lineno, 'illegal character (%s)' % unused[0])
# mark the start and end of the file
tokenized.insert(0, ('SOF', 'start of file'))
tokenized.append(('EOF', 'end of file'))
return tokenized
示例11: __init__
def __init__(self, s):
self.string = s
Scanner.__init__(self, [(r"([^<>|\\]|\\.)+", self.text), (r"[<>|]", self.table)])
示例12: Reader
class Reader(object):
PAREN = {"]": "[", ")": "("}
def __init__(self, binding=None, symbol_marker="'", use_dict=True):
self.binding = binding or default_binding
self.symbol_marker = symbol_marker
self.use_dict = use_dict
def read(self, value):
self.result = []
self.paren_stack = []
self.source = value
self.pos = 0
self.quoted = False
self.scanner = Scanner(
[
(r"\s+", self("skip")),
(r";[^\n]*\n", self("skip")),
(r""""(((?<=\\)")|[^"])*((?<!\\)")""", self("str")),
(r"(\(|\[)", self("open")),
(r"(\)|\])", self("close")),
(r"(([\d]+|(((\d+)?\.[\d]+)|([\d]+\.)))e[\+\-]?[\d]+)|(((\d+)?\.[\d]+)|([\d]+\.))", self("number")),
(r"\-?((0x[\da-f]+)|(0[0-7]+)|([1-9][\d]*)|0)[l]?", self("number")),
(r"""%s([^\(\[\)\]\s"]+)""" % self.symbol_marker, self("symbol")),
(r"'", self("quote")),
(r"""([^\(\[\)\]\s"]+)""", self("ident")),
(r"""".*""", self("unterm_str")),
(r".*", self("unknown_token")),
],
re.M | re.S | re.I,
)
self.scanner.scan(self.source)
if self.paren_stack:
self.raise_error("missing closing parenthesis.")
return self.parse(self.result)
def append(self, v):
if self.quoted:
quote_lst = self.paren_stack.pop()[1]
quote_lst.append(Token(v, self.pos))
self.quoted = False
else:
self.last().append(Token(v, self.pos))
def __call__(self, name):
def _(scanner, s):
self.pos += len(s)
return getattr(self, name)(s)
return _
def unknown_token(self, s):
self.raise_error("unknown token: %s" % s)
def skip(self, _):
pass
def quote(self, _):
new_lst = []
self.last().append(new_lst)
self.paren_stack.append(["quote", new_lst])
self.append(Ident("quote"))
self.quoted = True
def open(self, s):
new_lst = []
if self.quoted:
quote_lst = self.paren_stack.pop()[1]
quote_lst.append(new_lst)
self.quoted = False
else:
self.last().append(new_lst)
self.paren_stack.append([s, new_lst])
def close(self, s):
if not self.paren_stack:
self.raise_error("missing opening parenthesis.")
if self.PAREN[s] != self.paren_stack.pop()[0]:
self.raise_error("missing closing parenthesis.")
def str(self, s):
self.append(eval('u""' + s + '""'))
def unterm_str(self, s):
self.raise_error("unterminated string literal.")
def number(self, s):
self.append(eval(s))
def symbol(self, s):
self.append(Symbol(s[1:]))
def ident(self, s):
if s in self.binding:
self.append(self.binding[s])
else:
self.append(Ident(s))
def last(self):
if self.paren_stack:
#.........這裏部分代碼省略.........
示例13: callback
from pprint import pformat
import logging
import re
log = logging.getLogger()
D = log.debug
logging.basicConfig(level=logging.DEBUG)
def callback(scanner, text):
D("CALL %r", text)
def ignore(scanner, text):
D("IGNORE %r", text)
s = Scanner((
(r'{{{', callback),
(r'##', callback),
(r'\s+', ignore),
(r'(.+)(?=##)', callback),
))
text = "## {{{ aa##"
while text:
D("%r", text)
text = s.scan(text)[1]
示例14: _scan_file
def _scan_file(self):
scanner = Scanner(token_patterns, FLAGS['s'])
return scanner.scan(self._read_file())[0]