本文整理汇总了Python中tokenizer.Tokenizer.__init__方法的典型用法代码示例。如果您正苦于以下问题:Python Tokenizer.__init__方法的具体用法?Python Tokenizer.__init__怎么用?Python Tokenizer.__init__使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tokenizer.Tokenizer
的用法示例。
在下文中一共展示了Tokenizer.__init__方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from tokenizer import Tokenizer [as 别名]
# 或者: from tokenizer.Tokenizer import __init__ [as 别名]
def __init__( self, string_to_tokenize = '', prefix_chars = '-=<>!+*&|/%^', suffix_chars = '=<>&|' ):
Tokenizer.__init__( self, string_to_tokenize )
self.prefix = prefix_chars
self.suffix = suffix_chars
### Setup JavaScriptTokenizer-specific regexen
self.PREFIX = re.compile( "[%s]" % self.prefix )
self.SUFFIX = re.compile( "[%s]" % self.suffix )
self.BEGIN_IDENTIFIER = self.CHARACTER
self.MULTILINE_COMMENT = re.compile("[\*]")
self.END_COMMENT = re.compile("[/]")
self.ESCAPE = re.compile("[\\\\]")
示例2: __init__
# 需要导入模块: from tokenizer import Tokenizer [as 别名]
# 或者: from tokenizer.Tokenizer import __init__ [as 别名]
def __init__( self, string_to_tokenize = '' ):
Tokenizer.__init__( self, string_to_tokenize )
### Setup CSSTokenizer-specific regexen
### Throwing everything away after reading through the CSS spec.
### I ought be using the specified tokens, so I will.
# IDENT {ident}
# ATKEYWORD @{ident}
# STRING {string}
# INVALID {invalid}
# HASH #{name}
# NUMBER {num}
# PERCENTAGE {num}%
# DIMENSION {num}{ident}
# URI url\({w}{string}{w}\)
# |url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}\)
# UNICODE-RANGE U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?
# CDO <!--
# CDC -->
# ; ;
# { \{
# } \}
# ( \(
# ) \)
# [ \[
# ] \]
# S [ \t\r\n\f]+
# COMMENT \/\*[^*]*\*+([^/*][^*]*\*+)*\/
# FUNCTION {ident}\(
# INCLUDES ~=
# DASHMATCH |=
# DELIM any other character not matched by the above rules, and neither a single nor a double quote
#
#
# ident [-]?{nmstart}{nmchar}*
# name {nmchar}+
# nmstart [_a-z]|{nonascii}|{escape}
# nonascii [^\0-\177]
# unicode \\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?
# escape {unicode}|\\[^\n\r\f0-9a-f]
# nmchar [_a-z0-9-]|{nonascii}|{escape}
# num [0-9]+|[0-9]*\.[0-9]+
# string {string1}|{string2}
# string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
# string2 \'([^\n\r\f\\']|\\{nl}|{escape})*\'
# invalid {invalid1}|{invalid2}
# invalid1 \"([^\n\r\f\\"]|\\{nl}|{escape})*
# invalid2 \'([^\n\r\f\\']|\\{nl}|{escape})*
# nl \n|\r\n|\r|\f
# w [ \t\r\n\f]*
示例3: __init__
# 需要导入模块: from tokenizer import Tokenizer [as 别名]
# 或者: from tokenizer.Tokenizer import __init__ [as 别名]
def __init__(self):
Tokenizer.__init__(self)