本文整理汇总了Python中tokenize.tokenize方法的典型用法代码示例。如果您正苦于以下问题:Python tokenize.tokenize方法的具体用法?Python tokenize.tokenize怎么用?Python tokenize.tokenize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tokenize
的用法示例。
在下文中一共展示了tokenize.tokenize方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: import tokenize [as 别名]
# 或者: from tokenize import tokenize [as 别名]
def main():
if len(sys.argv) != 2:
sys.exit("Usage: rangify <nam file>")
codepoints_data = list(tokenize.tokenize(open(sys.argv[1], 'rb').readline))
codepoints = get_codepoints(codepoints_data)
codepoints.sort()
seqs = []
seq = (None,)
for cp in codepoints:
if seq[0] is None:
seq = (cp,cp)
elif seq[1] == cp - 1:
seq = (seq[0], cp)
else:
seqs.append(seq)
seq = (None,)
for seq in seqs:
print(seq)
示例2: find_doc_for
# 需要导入模块: import tokenize [as 别名]
# 或者: from tokenize import tokenize [as 别名]
def find_doc_for(ast_entry, body_lines):
lineno = ast_entry.lineno - 1
line_io = io.BytesIO(body_lines[lineno].encode())
try:
tokens = tokenize(line_io.readline) or []
line_comments = [t.string for t in tokens if t.type == COMMENT]
if line_comments:
formatted_lcs = [l[1:].strip() for l in line_comments]
filtered_lcs = [l for l in formatted_lcs if not is_ignored(l)]
if filtered_lcs:
return filtered_lcs[0]
except TokenError:
pass
lineno -= 1
while lineno >= 0:
if iscomment(body_lines[lineno]):
comment = body_lines[lineno].strip("# ")
if not is_ignored(comment):
return comment
if not body_lines[lineno].strip() == "":
return None
lineno -= 1
return None
示例3: run
# 需要导入模块: import tokenize [as 别名]
# 或者: from tokenize import tokenize [as 别名]
def run(self):
OPENERS=('class', 'def', 'for', 'if', 'try', 'while')
INDENT=tokenize.INDENT
NAME=tokenize.NAME
save_tabsize = tokenize.tabsize
tokenize.tabsize = self.tabwidth
try:
try:
for (typ, token, start, end, line) in token_generator(self.readline):
if typ == NAME and token in OPENERS:
self.blkopenline = line
elif type == INDENT and self.blkopenline:
self.indentedline = line
break
except (tokenize.TokenError, IndentationError):
# since we cut off the tokenizer early, we can trigger
# spurious errors
pass
finally:
tokenize.tabsize = save_tabsize
return self.blkopenline, self.indentedline
示例4: __init__
# 需要导入模块: import tokenize [as 别名]
# 或者: from tokenize import tokenize [as 别名]
def __init__(self, f):
self.find_stmt = 1 # next token begins a fresh stmt?
self.level = 0 # current indent level
# Raw file lines.
self.raw = f.readlines()
# File lines, rstripped & tab-expanded. Dummy at start is so
# that we can use tokenize's 1-based line numbering easily.
# Note that a line is all-blank iff it's "\n".
self.lines = [_rstrip(line).expandtabs() + "\n"
for line in self.raw]
self.lines.insert(0, None)
self.index = 1 # index into self.lines of next line
# List of (lineno, indentlevel) pairs, one for each stmt and
# comment line. indentlevel is -1 for comment lines, as a
# signal that tokenize doesn't know what to do about them;
# indeed, they're our headache!
self.stats = []
示例5: __waiting
# 需要导入模块: import tokenize [as 别名]
# 或者: from tokenize import tokenize [as 别名]
def __waiting(self, ttype, tstring, lineno):
opts = self.__options
# Do docstring extractions, if enabled
if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
# module docstring?
if self.__freshmodule:
if ttype == tokenize.STRING:
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
self.__freshmodule = 0
elif ttype not in (tokenize.COMMENT, tokenize.NL):
self.__freshmodule = 0
return
# class docstring?
if ttype == tokenize.NAME and tstring in ('class', 'def'):
self.__state = self.__suiteseen
return
if ttype == tokenize.NAME and tstring in opts.keywords:
self.__state = self.__keywordseen
示例6: __openseen
# 需要导入模块: import tokenize [as 别名]
# 或者: from tokenize import tokenize [as 别名]
def __openseen(self, ttype, tstring, lineno):
if ttype == tokenize.OP and tstring == ')':
# We've seen the last of the translatable strings. Record the
# line number of the first line of the strings and update the list
# of messages seen. Reset state for the next batch. If there
# were no strings inside _(), then just ignore this entry.
if self.__data:
self.__addentry(EMPTYSTRING.join(self.__data))
self.__state = self.__waiting
elif ttype == tokenize.STRING:
self.__data.append(safe_eval(tstring))
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
token.NEWLINE, tokenize.NL]:
# warn if we see anything else than STRING or whitespace
print >> sys.stderr, _(
'*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
) % {
'token': tstring,
'file': self.__curfile,
'lineno': self.__lineno
}
self.__state = self.__waiting
示例7: indentify
# 需要导入模块: import tokenize [as 别名]
# 或者: from tokenize import tokenize [as 别名]
def indentify(s):
out = []
stack = []
l = ['', s]
for (tokenType, tokenString, (startRow, startColumn),
(endRow, endColumn), logicalLine) in tokenize(l.pop):
if tokenString in ['[', '(', '{']:
stack.append(tokenString)
elif tokenString in [']', ')', '}']:
stack.pop()
if tokenString == '\0':
out.append(' '*len(stack))
else:
out.append(tokenString)
return ''.join(out)
###########
# Unjelly #
###########
示例8: __init__
# 需要导入模块: import tokenize [as 别名]
# 或者: from tokenize import tokenize [as 别名]
def __init__(self, model, lstm_layers, lstm_size):
self.__model = model
# Initial state
self._h0 = torch.zeros((lstm_layers, 1, lstm_size), device=device)
self._c0 = torch.zeros((lstm_layers, 1, lstm_size), device=device)
# Last line of source code read
self._last_line = ""
self._tokens: List[tokenize.TokenInfo] = []
# Last token, because we need to input that to the model for inference
self._last_token = 0
# Last bit of the input string
self._untokenized = ""
# For timing
self.time_add = 0
self.time_predict = 0
self.time_check = 0
示例9: read
# 需要导入模块: import tokenize [as 别名]
# 或者: from tokenize import tokenize [as 别名]
def read(filename):
"""
Read a regular Python file with special formatting and performance
preprocessing on it. The result is a string that conforms to the IPython
notebook version 3 python script format.
"""
with open(filename, 'rb') as fin:
token_gen = _generate_tokens(fin.readline)
cvt_docstr_gen = convert_toplevel_docstring(token_gen)
nl_gen = fix_newlines(cvt_docstr_gen)
out = list(nl_gen)
formatted = tokenize.untokenize(out).decode('utf-8')
return fix_empty_lines(formatted)
# =============================================================================
# Helpers
# =============================================================================
示例10: convert_toplevel_docstring
# 需要导入模块: import tokenize [as 别名]
# 或者: from tokenize import tokenize [as 别名]
def convert_toplevel_docstring(tokens):
for token in tokens:
# For each string
if token.type == tokenize.STRING:
text = token.string
# Must be a docstring
if text.startswith('"""') or text.startswith("'''"):
startline, startcol = token.start
# Starting column MUST be 0
if startcol == 0:
endline, endcol = token.end
lines = ['# ' + line
for line in text.strip('"\' \n').split('\n')]
text = '\n'.join(lines)
fmt = '# <markdowncell>\n{0}\n# <codecell>'.format(text)
yield TokenInfo(type=tokenize.COMMENT,
start=(startline, startcol),
end=(endline, endcol),
string=fmt,
line='#')
# To next token
continue
# Return untouched
yield token
示例11: fix_newlines
# 需要导入模块: import tokenize [as 别名]
# 或者: from tokenize import tokenize [as 别名]
def fix_newlines(tokens):
first = True
curline = 1
for token in tokens:
if first:
first = False
curline = token.end[0] + 1
else:
# Fill NEWLINE token in between
while curline < token.start[0]:
yield TokenInfo(type=tokenize.NEWLINE,
string='\n',
start=(curline, 0),
end=(curline, 0),
line='\n', )
curline += 1
curline = token.end[0] + 1
yield token
示例12: get_chunks
# 需要导入模块: import tokenize [as 别名]
# 或者: from tokenize import tokenize [as 别名]
def get_chunks(code) -> Generator[Chunk, None, None]:
g = tokenize.tokenize(io.BytesIO(code.encode("utf-8")).readline)
chunk = Chunk()
try:
for item in g:
t = PyToken(item)
reuse = chunk.append(t)
if chunk.complete:
yield chunk
chunk = Chunk()
if reuse:
reuse = chunk.append(t)
# assert not reuse
if chunk.complete:
yield chunk
chunk = Chunk()
yield chunk
except tokenize.TokenError as e:
if state.verbose:
traceback.print_exc()
print(e)
示例13: format
# 需要导入模块: import tokenize [as 别名]
# 或者: from tokenize import tokenize [as 别名]
def format(self):
""" Parse and send the colored source.
"""
# store line offsets in self.lines
self.lines = [0, 0]
pos = 0
while 1:
pos = string.find(self.raw, '\n', pos) + 1
if not pos: break
self.lines.append(pos)
self.lines.append(len(self.raw))
# parse the source and write it
self.pos = 0
text = cStringIO.StringIO(self.raw)
self.out.write('<pre class="code">\n')
try:
tokenize.tokenize(text.readline, self)
except tokenize.TokenError, ex:
msg = ex[0]
line = ex[1][0]
self.out.write("<h3>ERROR: %s</h3>%s\n" % (
msg, self.raw[self.lines[line]:]))
示例14: handle_token
# 需要导入模块: import tokenize [as 别名]
# 或者: from tokenize import tokenize [as 别名]
def handle_token(self, *args):
# dispatch incoming tokens to the current handler
if DEBUG > 1:
print self.handler.im_func.func_name, self.indent,
print tokenize.tok_name[args[0]], repr(args[1])
if args[0] == tokenize.DEDENT:
self.indent = self.indent - 1
while self.scope and self.scope[-1][0] >= self.indent:
del self.scope[-1]
del self.stack[-1]
self.handler = apply(self.handler, args)
if args[0] == tokenize.INDENT:
self.indent = self.indent + 1
##
# (Token handler) Scans for encoding directive.
示例15: look_for_pythondoc
# 需要导入模块: import tokenize [as 别名]
# 或者: from tokenize import tokenize [as 别名]
def look_for_pythondoc(self, type, token, start, end, line):
if type == tokenize.COMMENT and string.rstrip(token) == "##":
# found a comment: set things up for comment processing
self.comment_start = start
self.comment = []
return self.process_comment_body
else:
# deal with "bare" subjects
if token == "def" or token == "class":
self.subject_indent = self.indent
self.subject_parens = 0
self.subject_start = self.comment_start = None
self.subject = []
return self.process_subject(type, token, start, end, line)
return self.look_for_pythondoc
##
# (Token handler) Processes a comment body. This handler adds
# comment lines to the current comment.