本文整理汇总了Python中re.UNICODE属性的典型用法代码示例。如果您正苦于以下问题:Python re.UNICODE属性的具体用法?Python re.UNICODE怎么用?Python re.UNICODE使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类re
的用法示例。
在下文中一共展示了re.UNICODE属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __parse_domain_to_employer_line
# 需要导入模块: import re [as 别名]
# 或者: from re import UNICODE [as 别名]
def __parse_domain_to_employer_line(self, raw_domain, raw_org):
"""Parse domain to employer lines"""
d = re.match(self.DOMAIN_REGEX, raw_domain, re.UNICODE)
if not d:
cause = "invalid domain format: '%s'" % raw_domain
raise InvalidFormatError(cause=cause)
dom = d.group('domain').strip()
o = re.match(self.ORGANIZATION_REGEX, raw_org, re.UNICODE)
if not o:
cause = "invalid organization format: '%s'" % raw_org
raise InvalidFormatError(cause=cause)
org = o.group('organization').strip()
org = self.__encode(org)
dom = self.__encode(dom)
return org, dom
示例2: unmarshal
# 需要导入模块: import re [as 别名]
# 或者: from re import UNICODE [as 别名]
def unmarshal (self, filename):
"""
Unmarshals (loads from a plain text file) the tagger model. For
safety, this operation is intended to be performed only on
newly created taggers (i.e., without any previous model).
@param filename: Name of the file from which the model will
be read.
@type filename: C{string}
"""
handler = file(filename, "r")
pattern = re.compile(r'^(.+):(.+?)$', re.UNICODE)
for line in handler.readlines():
m = re.match(pattern, line)
text, tag = m.groups()
self._model[text] = tag
handler.close()
示例3: render_re
# 需要导入模块: import re [as 别名]
# 或者: from re import UNICODE [as 别名]
def render_re(regex):
"""Renders a repr()-style value for a compiled regular expression."""
actual_flags = []
if regex.flags:
flags = [
(re.IGNORECASE, 'IGNORECASE'),
(re.LOCALE, 'LOCALE'),
(re.UNICODE, 'UNICODE'),
(re.MULTILINE, 'MULTILINE'),
(re.DOTALL, 'DOTALL'),
(re.VERBOSE, 'VERBOSE'),
]
for val, name in flags:
if regex.flags & val:
actual_flags.append(name)
if actual_flags:
return 're.compile(%r, %s)' % (regex.pattern, '|'.join(actual_flags))
else:
return 're.compile(%r)' % regex.pattern
示例4: str_flags_to_int
# 需要导入模块: import re [as 别名]
# 或者: from re import UNICODE [as 别名]
def str_flags_to_int(str_flags):
flags = 0
if "i" in str_flags:
flags |= re.IGNORECASE
if "l" in str_flags:
flags |= re.LOCALE
if "m" in str_flags:
flags |= re.MULTILINE
if "s" in str_flags:
flags |= re.DOTALL
if "u" in str_flags:
flags |= re.UNICODE
if "x" in str_flags:
flags |= re.VERBOSE
return flags
示例5: __init__
# 需要导入模块: import re [as 别名]
# 或者: from re import UNICODE [as 别名]
def __init__(self, **kwargs):
"""Construct a TINYTEXT.
:param charset: Optional, a column-level character set for this string
value. Takes precedence to 'ascii' or 'unicode' short-hand.
:param collation: Optional, a column-level collation for this string
value. Takes precedence to 'binary' short-hand.
:param ascii: Defaults to False: short-hand for the ``latin1``
character set, generates ASCII in schema.
:param unicode: Defaults to False: short-hand for the ``ucs2``
character set, generates UNICODE in schema.
:param national: Optional. If true, use the server's configured
national character set.
:param binary: Defaults to False: short-hand, pick the binary
collation type that matches the column's character set. Generates
BINARY in schema. This does not affect the type of data stored,
only the collation of character data.
"""
super(TINYTEXT, self).__init__(**kwargs)
示例6: expandvars
# 需要导入模块: import re [as 别名]
# 或者: from re import UNICODE [as 别名]
def expandvars(path):
"""
Args:
path (pathlike): A path to expand
Returns:
`fsnative`
Like :func:`python:os.path.expandvars` but supports unicode under Windows
+ Python 2 and always returns a `fsnative`.
"""
path = path2fsn(path)
def repl_func(match):
return environ.get(match.group(1), match.group(0))
path = re.compile(r"\$(\w+)", flags=re.UNICODE).sub(repl_func, path)
if os.name == "nt":
path = re.sub(r"%([^%]+)%", repl_func, path)
return re.sub(r"\$\{([^\}]+)\}", repl_func, path)
示例7: __init__
# 需要导入模块: import re [as 别名]
# 或者: from re import UNICODE [as 别名]
def __init__(self, export_comments=None):
if export_comments is not None:
if export_comments == 'LINE':
self.can_export_comments = ['COMMENT']
elif export_comments == 'MULTILINE':
self.can_export_comments = ['MULTICOMMENT']
elif export_comments == 'ALL':
self.can_export_comments = ['COMMENT', 'MULTICOMMENT']
else:
raise ValueError(
'Only `LINE`, `MULTILINE` and `ALL` value are allowed for '
'`export_comments`. given: `%s`.' % export_comments
)
self.lex = lex.lex(
module=self,
debug=False,
reflags=(re.UNICODE | re.MULTILINE),
errorlog=lex.NullLogger(),
)
示例8: regex
# 需要导入模块: import re [as 别名]
# 或者: from re import UNICODE [as 别名]
def regex(self):
"""
Returns a compiled regular expression, depending upon the activated
language-code.
"""
language_code = get_language()
if language_code not in self._regex_dict:
if isinstance(self._regex, six.string_types):
regex = self._regex
else:
regex = force_text(self._regex)
try:
compiled_regex = re.compile(regex, re.UNICODE)
except re.error as e:
raise ImproperlyConfigured(
'"%s" is not a valid regular expression: %s' %
(regex, six.text_type(e)))
self._regex_dict[language_code] = compiled_regex
return self._regex_dict[language_code]
示例9: __init__
# 需要导入模块: import re [as 别名]
# 或者: from re import UNICODE [as 别名]
def __init__(self, pattern, markdown_instance=None):
"""
Create an instant of an inline pattern.
Keyword arguments:
* pattern: A regular expression that matches a pattern
"""
self.pattern = pattern
self.compiled_re = re.compile("^(.*?)%s(.*?)$" % pattern,
re.DOTALL | re.UNICODE)
# Api for Markdown to pass safe_mode into instance
self.safe_mode = False
if markdown_instance:
self.markdown = markdown_instance
示例10: test_bug_6561
# 需要导入模块: import re [as 别名]
# 或者: from re import UNICODE [as 别名]
def test_bug_6561(self):
# '\d' should match characters in Unicode category 'Nd'
# (Number, Decimal Digit), but not those in 'Nl' (Number,
# Letter) or 'No' (Number, Other).
decimal_digits = [
unichr(0x0037), # '\N{DIGIT SEVEN}', category 'Nd'
unichr(0x0e58), # '\N{THAI DIGIT SIX}', category 'Nd'
unichr(0xff10), # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
]
for x in decimal_digits:
self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
not_decimal_digits = [
unichr(0x2165), # '\N{ROMAN NUMERAL SIX}', category 'Nl'
unichr(0x3039), # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
unichr(0x2082), # '\N{SUBSCRIPT TWO}', category 'No'
unichr(0x32b4), # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
]
for x in not_decimal_digits:
self.assertIsNone(re.match('^\d$', x, re.UNICODE))
示例11: match_nickname_content
# 需要导入模块: import re [as 别名]
# 或者: from re import UNICODE [as 别名]
def match_nickname_content(self, content: str) -> Tuple[str, str]:
m = re.match(
r'^\[(?P<nick>.+?)\] (?P<content>.*)',
content, flags=re.UNICODE
)
return (m.group('nick'), m.group('content')) if m else (None, None)
示例12: try_set_nick
# 需要导入模块: import re [as 别名]
# 或者: from re import UNICODE [as 别名]
def try_set_nick(self, msg):
# handle command
user_id = msg.user.id
target = "%d" % msg.chat_id
try:
tmp = msg.content.split()
cmd = tmp[0][1:].lower()
args = tmp[1:]
except:
return
if cmd == "nick":
if len(args) == 1:
nick = args[0]
if not re.match(r'^\w', nick, flags=re.UNICODE):
self.send_msg(target, "Use a human's nick name, please.")
return True
self.nick_store.set_nickname(user_id, nick)
content = "Changed nickname to '%s'" % nick
logger.debug(target, content)
self.send_msg(target, content)
else:
self.send_msg(
target,
"Invalid Command, use '/nick nickname'"
"to change nickname."
)
return True
示例13: __parse_stream
# 需要导入模块: import re [as 别名]
# 或者: from re import UNICODE [as 别名]
def __parse_stream(self, stream):
"""Generic method to parse mailmap streams"""
nline = 0
lines = stream.split('\n')
for line in lines:
nline += 1
# Ignore blank lines and comments
m = re.match(self.LINES_TO_IGNORE_REGEX, line, re.UNICODE)
if m:
continue
line = line.strip('\n').strip(' ')
parts = line.split('>')
if len(parts) == 0:
cause = "line %s: invalid format" % str(nline)
raise InvalidFormatError(cause=cause)
aliases = []
for part in parts:
part = part.replace(',', ' ')
part = part.strip('\n').strip(' ')
if len(part) == 0:
continue
if part.find('<') < 0:
cause = "line %s: invalid format" % str(nline)
raise InvalidFormatError(cause=cause)
alias = email.utils.parseaddr(part + '>')
aliases.append(alias)
yield aliases
示例14: __validate_email
# 需要导入模块: import re [as 别名]
# 或者: from re import UNICODE [as 别名]
def __validate_email(self, email):
"""Checks if a string looks like an email address"""
e = re.match(self.EMAIL_ADDRESS_REGEX, email, re.UNICODE)
if e:
return email
else:
error = "Invalid email address: " + str(email)
msg = self.GRIMOIRELAB_INVALID_FORMAT % {'error': error}
raise InvalidFormatError(cause=msg)
示例15: __parse_stream
# 需要导入模块: import re [as 别名]
# 或者: from re import UNICODE [as 别名]
def __parse_stream(self, stream, parse_line):
"""Generic method to parse gitdm streams"""
if not stream:
raise InvalidFormatError(cause='stream cannot be empty or None')
nline = 0
lines = stream.split('\n')
for line in lines:
nline += 1
# Ignore blank lines and comments
m = re.match(self.LINES_TO_IGNORE_REGEX, line, re.UNICODE)
if m:
continue
m = re.match(self.VALID_LINE_REGEX, line, re.UNICODE)
if not m:
cause = "line %s: invalid format" % str(nline)
raise InvalidFormatError(cause=cause)
try:
result = parse_line(m.group(1), m.group(2))
yield result
except InvalidFormatError as e:
cause = "line %s: %s" % (str(nline), e)
raise InvalidFormatError(cause=cause)