本文整理匯總了Python中re.UNICODE屬性的典型用法代碼示例。如果您正苦於以下問題:Python re.UNICODE屬性的具體用法?Python re.UNICODE怎麽用?Python re.UNICODE使用的例子?那麽, 這裏精選的屬性代碼示例或許可以為您提供幫助。您也可以進一步了解該屬性所在類re
的用法示例。
在下文中一共展示了re.UNICODE屬性的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: __parse_domain_to_employer_line
# 需要導入模塊: import re [as 別名]
# 或者: from re import UNICODE [as 別名]
def __parse_domain_to_employer_line(self, raw_domain, raw_org):
"""Parse domain to employer lines"""
d = re.match(self.DOMAIN_REGEX, raw_domain, re.UNICODE)
if not d:
cause = "invalid domain format: '%s'" % raw_domain
raise InvalidFormatError(cause=cause)
dom = d.group('domain').strip()
o = re.match(self.ORGANIZATION_REGEX, raw_org, re.UNICODE)
if not o:
cause = "invalid organization format: '%s'" % raw_org
raise InvalidFormatError(cause=cause)
org = o.group('organization').strip()
org = self.__encode(org)
dom = self.__encode(dom)
return org, dom
示例2: unmarshal
# 需要導入模塊: import re [as 別名]
# 或者: from re import UNICODE [as 別名]
def unmarshal (self, filename):
"""
Unmarshals (loads from a plain text file) the tagger model. For
safety, this operation is intended to be performed only on
newly created taggers (i.e., without any previous model).
@param filename: Name of the file from which the model will
be read.
@type filename: C{string}
"""
handler = file(filename, "r")
pattern = re.compile(r'^(.+):(.+?)$', re.UNICODE)
for line in handler.readlines():
m = re.match(pattern, line)
text, tag = m.groups()
self._model[text] = tag
handler.close()
示例3: render_re
# 需要導入模塊: import re [as 別名]
# 或者: from re import UNICODE [as 別名]
def render_re(regex):
"""Renders a repr()-style value for a compiled regular expression."""
actual_flags = []
if regex.flags:
flags = [
(re.IGNORECASE, 'IGNORECASE'),
(re.LOCALE, 'LOCALE'),
(re.UNICODE, 'UNICODE'),
(re.MULTILINE, 'MULTILINE'),
(re.DOTALL, 'DOTALL'),
(re.VERBOSE, 'VERBOSE'),
]
for val, name in flags:
if regex.flags & val:
actual_flags.append(name)
if actual_flags:
return 're.compile(%r, %s)' % (regex.pattern, '|'.join(actual_flags))
else:
return 're.compile(%r)' % regex.pattern
示例4: str_flags_to_int
# 需要導入模塊: import re [as 別名]
# 或者: from re import UNICODE [as 別名]
def str_flags_to_int(str_flags):
flags = 0
if "i" in str_flags:
flags |= re.IGNORECASE
if "l" in str_flags:
flags |= re.LOCALE
if "m" in str_flags:
flags |= re.MULTILINE
if "s" in str_flags:
flags |= re.DOTALL
if "u" in str_flags:
flags |= re.UNICODE
if "x" in str_flags:
flags |= re.VERBOSE
return flags
示例5: __init__
# 需要導入模塊: import re [as 別名]
# 或者: from re import UNICODE [as 別名]
def __init__(self, **kwargs):
"""Construct a TINYTEXT.
:param charset: Optional, a column-level character set for this string
value. Takes precedence to 'ascii' or 'unicode' short-hand.
:param collation: Optional, a column-level collation for this string
value. Takes precedence to 'binary' short-hand.
:param ascii: Defaults to False: short-hand for the ``latin1``
character set, generates ASCII in schema.
:param unicode: Defaults to False: short-hand for the ``ucs2``
character set, generates UNICODE in schema.
:param national: Optional. If true, use the server's configured
national character set.
:param binary: Defaults to False: short-hand, pick the binary
collation type that matches the column's character set. Generates
BINARY in schema. This does not affect the type of data stored,
only the collation of character data.
"""
super(TINYTEXT, self).__init__(**kwargs)
示例6: expandvars
# 需要導入模塊: import re [as 別名]
# 或者: from re import UNICODE [as 別名]
def expandvars(path):
"""
Args:
path (pathlike): A path to expand
Returns:
`fsnative`
Like :func:`python:os.path.expandvars` but supports unicode under Windows
+ Python 2 and always returns a `fsnative`.
"""
path = path2fsn(path)
def repl_func(match):
return environ.get(match.group(1), match.group(0))
path = re.compile(r"\$(\w+)", flags=re.UNICODE).sub(repl_func, path)
if os.name == "nt":
path = re.sub(r"%([^%]+)%", repl_func, path)
return re.sub(r"\$\{([^\}]+)\}", repl_func, path)
示例7: __init__
# 需要導入模塊: import re [as 別名]
# 或者: from re import UNICODE [as 別名]
def __init__(self, export_comments=None):
if export_comments is not None:
if export_comments == 'LINE':
self.can_export_comments = ['COMMENT']
elif export_comments == 'MULTILINE':
self.can_export_comments = ['MULTICOMMENT']
elif export_comments == 'ALL':
self.can_export_comments = ['COMMENT', 'MULTICOMMENT']
else:
raise ValueError(
'Only `LINE`, `MULTILINE` and `ALL` value are allowed for '
'`export_comments`. given: `%s`.' % export_comments
)
self.lex = lex.lex(
module=self,
debug=False,
reflags=(re.UNICODE | re.MULTILINE),
errorlog=lex.NullLogger(),
)
示例8: regex
# 需要導入模塊: import re [as 別名]
# 或者: from re import UNICODE [as 別名]
def regex(self):
"""
Returns a compiled regular expression, depending upon the activated
language-code.
"""
language_code = get_language()
if language_code not in self._regex_dict:
if isinstance(self._regex, six.string_types):
regex = self._regex
else:
regex = force_text(self._regex)
try:
compiled_regex = re.compile(regex, re.UNICODE)
except re.error as e:
raise ImproperlyConfigured(
'"%s" is not a valid regular expression: %s' %
(regex, six.text_type(e)))
self._regex_dict[language_code] = compiled_regex
return self._regex_dict[language_code]
示例9: __init__
# 需要導入模塊: import re [as 別名]
# 或者: from re import UNICODE [as 別名]
def __init__(self, pattern, markdown_instance=None):
"""
Create an instant of an inline pattern.
Keyword arguments:
* pattern: A regular expression that matches a pattern
"""
self.pattern = pattern
self.compiled_re = re.compile("^(.*?)%s(.*?)$" % pattern,
re.DOTALL | re.UNICODE)
# Api for Markdown to pass safe_mode into instance
self.safe_mode = False
if markdown_instance:
self.markdown = markdown_instance
示例10: test_bug_6561
# 需要導入模塊: import re [as 別名]
# 或者: from re import UNICODE [as 別名]
def test_bug_6561(self):
# '\d' should match characters in Unicode category 'Nd'
# (Number, Decimal Digit), but not those in 'Nl' (Number,
# Letter) or 'No' (Number, Other).
decimal_digits = [
unichr(0x0037), # '\N{DIGIT SEVEN}', category 'Nd'
unichr(0x0e58), # '\N{THAI DIGIT SIX}', category 'Nd'
unichr(0xff10), # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
]
for x in decimal_digits:
self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
not_decimal_digits = [
unichr(0x2165), # '\N{ROMAN NUMERAL SIX}', category 'Nl'
unichr(0x3039), # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
unichr(0x2082), # '\N{SUBSCRIPT TWO}', category 'No'
unichr(0x32b4), # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
]
for x in not_decimal_digits:
self.assertIsNone(re.match('^\d$', x, re.UNICODE))
示例11: match_nickname_content
# 需要導入模塊: import re [as 別名]
# 或者: from re import UNICODE [as 別名]
def match_nickname_content(self, content: str) -> Tuple[str, str]:
m = re.match(
r'^\[(?P<nick>.+?)\] (?P<content>.*)',
content, flags=re.UNICODE
)
return (m.group('nick'), m.group('content')) if m else (None, None)
示例12: try_set_nick
# 需要導入模塊: import re [as 別名]
# 或者: from re import UNICODE [as 別名]
def try_set_nick(self, msg):
# handle command
user_id = msg.user.id
target = "%d" % msg.chat_id
try:
tmp = msg.content.split()
cmd = tmp[0][1:].lower()
args = tmp[1:]
except:
return
if cmd == "nick":
if len(args) == 1:
nick = args[0]
if not re.match(r'^\w', nick, flags=re.UNICODE):
self.send_msg(target, "Use a human's nick name, please.")
return True
self.nick_store.set_nickname(user_id, nick)
content = "Changed nickname to '%s'" % nick
logger.debug(target, content)
self.send_msg(target, content)
else:
self.send_msg(
target,
"Invalid Command, use '/nick nickname'"
"to change nickname."
)
return True
示例13: __parse_stream
# 需要導入模塊: import re [as 別名]
# 或者: from re import UNICODE [as 別名]
def __parse_stream(self, stream):
"""Generic method to parse mailmap streams"""
nline = 0
lines = stream.split('\n')
for line in lines:
nline += 1
# Ignore blank lines and comments
m = re.match(self.LINES_TO_IGNORE_REGEX, line, re.UNICODE)
if m:
continue
line = line.strip('\n').strip(' ')
parts = line.split('>')
if len(parts) == 0:
cause = "line %s: invalid format" % str(nline)
raise InvalidFormatError(cause=cause)
aliases = []
for part in parts:
part = part.replace(',', ' ')
part = part.strip('\n').strip(' ')
if len(part) == 0:
continue
if part.find('<') < 0:
cause = "line %s: invalid format" % str(nline)
raise InvalidFormatError(cause=cause)
alias = email.utils.parseaddr(part + '>')
aliases.append(alias)
yield aliases
示例14: __validate_email
# 需要導入模塊: import re [as 別名]
# 或者: from re import UNICODE [as 別名]
def __validate_email(self, email):
"""Checks if a string looks like an email address"""
e = re.match(self.EMAIL_ADDRESS_REGEX, email, re.UNICODE)
if e:
return email
else:
error = "Invalid email address: " + str(email)
msg = self.GRIMOIRELAB_INVALID_FORMAT % {'error': error}
raise InvalidFormatError(cause=msg)
示例15: __parse_stream
# 需要導入模塊: import re [as 別名]
# 或者: from re import UNICODE [as 別名]
def __parse_stream(self, stream, parse_line):
"""Generic method to parse gitdm streams"""
if not stream:
raise InvalidFormatError(cause='stream cannot be empty or None')
nline = 0
lines = stream.split('\n')
for line in lines:
nline += 1
# Ignore blank lines and comments
m = re.match(self.LINES_TO_IGNORE_REGEX, line, re.UNICODE)
if m:
continue
m = re.match(self.VALID_LINE_REGEX, line, re.UNICODE)
if not m:
cause = "line %s: invalid format" % str(nline)
raise InvalidFormatError(cause=cause)
try:
result = parse_line(m.group(1), m.group(2))
yield result
except InvalidFormatError as e:
cause = "line %s: %s" % (str(nline), e)
raise InvalidFormatError(cause=cause)