本文整理匯總了Python中regex.escape方法的典型用法代碼示例。如果您正苦於以下問題:Python regex.escape方法的具體用法?Python regex.escape怎麽用?Python regex.escape使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類regex
的用法示例。
在下文中一共展示了regex.escape方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: expand
# 需要導入模塊: import regex [as 別名]
# 或者: from regex import escape [as 別名]
def expand(self, s):
if not self._defs:
return s
if self._regex is not None:
regex = self._regex
substs = self._substs
else:
substs = []
regex = []
for i, (name, definition) in enumerate(self._defs.items()):
substs += [definition]
regex += ['(?P<mwic{i}>{name})'.format(i=i, name=re.escape(name))]
regex = '|'.join(regex)
regex = re.compile(regex)
self._regex = regex
self._substs = substs
assert self._regex is not None
assert self._substs is not None
def replace(match):
for i, subst in enumerate(substs):
if match.group('mwic{i}'.format(i=i)) is not None:
return subst
assert False # no coverage
return self._regex.sub(replace, s)
示例2: _phrase_to_regex
# 需要導入模塊: import regex [as 別名]
# 或者: from regex import escape [as 別名]
def _phrase_to_regex(phrase):
# Treat whitespace between words as meaning anything other than alphanumeric
# characters.
pattern = r"[^\w--_]+".join(regex.escape(word) for word in phrase.split())
# Treat spaces at the beginning or end of the phrase as matching any
# whitespace character. This makes it easy to select stuff like non-breaking
# space, which occurs frequently in browsers.
# TODO Support newlines. Note that these are frequently implemented as
# separate text nodes in the accessibility tree, so the obvious
# implementation would not work well.
if phrase == " ":
pattern = r"\s"
else:
if phrase.startswith(" "):
pattern = r"\s" + pattern
if phrase.endswith(" "):
pattern = pattern + r"\s"
# Only match at boundaries of alphanumeric sequences if the phrase ends
# are alphanumeric.
if regex.search(r"^[\w--_]", phrase, regex.VERSION1 | regex.UNICODE):
pattern = r"(?<![\w--_])" + pattern
if regex.search(r"[\w--_]$", phrase, regex.VERSION1 | regex.UNICODE):
pattern = pattern + r"(?![\w--_])"
return pattern
示例3: exclude_words
# 需要導入模塊: import regex [as 別名]
# 或者: from regex import escape [as 別名]
def exclude_words(phrasegrams, words):
"""Given a list of words, excludes those from the keys of the phrase dictionary."""
new_phrasergrams = {}
words_re_list = []
for word in words:
we = regex.escape(word)
words_re_list.append("^" + we + "$|^" + we + "_|_" + we + "$|_" + we + "_")
word_reg = regex.compile(r""+"|".join(words_re_list))
for gram in tqdm(phrasegrams):
valid = True
for sub_gram in gram:
if word_reg.search(sub_gram.decode("unicode_escape", "ignore")) is not None:
valid = False
break
if not valid:
continue
if valid:
new_phrasergrams[gram] = phrasegrams[gram]
return new_phrasergrams
# Generating word grams.
示例4: remove_punctuation
# 需要導入模塊: import regex [as 別名]
# 或者: from regex import escape [as 別名]
def remove_punctuation(text: str, marks=None) -> str:
"""
Remove punctuation from ``text`` by removing all instances of ``marks``.
Args:
text (str): Urdu text
marks (str): If specified, remove only the characters in this string,
e.g. ``marks=',;:'`` removes commas, semi-colons, and colons.
Otherwise, all punctuation marks are removed.
Returns:
str: returns a ``str`` object containing normalized text.
Note:
When ``marks=None``, Python's built-in :meth:`str.translate()` is
used to remove punctuation; otherwise, a regular expression is used
instead. The former's performance is about 5-10x faster.
Examples:
>>> from urduhack.preprocessing import remove_punctuation
>>> output = remove_punctuation("کر ؟ سکتی ہے۔")
کر سکتی ہے
"""
if marks:
return re.sub('[{}]+'.format(re.escape(marks)), '', text, flags=re.UNICODE)
return text.translate(PUNCTUATION_TRANSLATE_UNICODE)
示例5: test_expression_diff
# 需要導入模塊: import regex [as 別名]
# 或者: from regex import escape [as 別名]
def test_expression_diff(self) -> None:
source, _ = read_data("expression.py")
expected, _ = read_data("expression.diff")
tmp_file = Path(black.dump_to_file(source))
diff_header = re.compile(
rf"{re.escape(str(tmp_file))}\t\d\d\d\d-\d\d-\d\d "
r"\d\d:\d\d:\d\d\.\d\d\d\d\d\d \+\d\d\d\d"
)
try:
result = BlackRunner().invoke(black.main, ["--diff", str(tmp_file)])
self.assertEqual(result.exit_code, 0)
finally:
os.unlink(tmp_file)
actual = result.output
actual = diff_header.sub(DETERMINISTIC_HEADER, actual)
actual = actual.rstrip() + "\n" # the diff output has a trailing space
if expected != actual:
dump = black.dump_to_file(actual)
msg = (
"Expected diff isn't equal to the actual. If you made changes to"
" expression.py and this is an anticipated difference, overwrite"
f" tests/data/expression.diff with {dump}"
)
self.assertEqual(expected, actual, msg)
示例6: search_citing_sentences
# 需要導入模塊: import regex [as 別名]
# 或者: from regex import escape [as 別名]
def search_citing_sentences(aid, txt, match):
lines = txt.split('\n')
txt = ' '.join(lines)
txt = ' '.join(txt.split())
sentences = split_sentences(txt)
founds = set()
for r in match.keys():
if r:
regexp_list = [regex.escape('\cite%s' % r),
regex.escape('\\refs{%s}' % r),
r'(?<!(bibitem|lref).*?)' + regex.escape('%s' % r)]
print aid, r
for regexp in regexp_list:
results = search_citation(sentences, regexp)
founds.update(results)
print("Regex: '{0!s}', Found: {1:d}".format(regexp, len(results)))
if len(results):
break
print("_" * 50)
return founds
示例7: _replace_escape_bytes
# 需要導入模塊: import regex [as 別名]
# 或者: from regex import escape [as 別名]
def _replace_escape_bytes(self, value):
chunks = []
offset = 0
while offset < len(value):
match = self._lex_escape_re.search(value, offset)
if match is None:
# Append the remaining of the string
chunks.append(value[offset:])
break
# Append the part of string before match
chunks.append(value[offset:match.start()])
offset = match.end()
# Process the escape
if match.group(1) is not None: # single-char
chr = match.group(1)
if chr == b"\n":
pass
elif chr == b"\\" or chr == b"'" or chr == b"\"":
chunks.append(chr)
elif chr == b"a":
chunks.append(b"\a")
elif chr == b"b":
chunks.append(b"\b")
elif chr == b"f":
chunks.append(b"\f")
elif chr == b"n":
chunks.append(b"\n")
elif chr == b"r":
chunks.append(b"\r")
elif chr == b"t":
chunks.append(b"\t")
elif chr == b"v":
chunks.append(b"\v")
elif match.group(2) is not None: # oct
chunks.append(byte(int(match.group(2), 8)))
elif match.group(3) is not None: # hex
chunks.append(byte(int(match.group(3), 16)))
return b"".join(chunks)
示例8: escape_string
# 需要導入模塊: import regex [as 別名]
# 或者: from regex import escape [as 別名]
def escape_string(self):
"""Escape all special characters in a string
Returns:
Chepy: The Chepy object.
"""
self.state = re.escape(self._convert_to_str())
return self
示例9: extract
# 需要導入模塊: import regex [as 別名]
# 或者: from regex import escape [as 別名]
def extract(s: str, entities: Iterable[str], useregex=False, ignorecase=True) -> Iterable[str]:
for m in re.compile(
r"\b(?:{})\b".format(r"|".join(
e if useregex else re.escape(e).replace(' ', r"s+") for e in entities
)),
re.I if ignorecase else 0
).finditer(s):
yield m.group(0)
示例10: __findeqtagpairspans
# 需要導入模塊: import regex [as 別名]
# 或者: from regex import escape [as 別名]
def __findeqtagpairspans(
s: str,
tag: str,
useregex: bool = False
) -> Iterable[Tuple[Tuple[int, int], ...]]:
for match in re.finditer(r"(?P<__open>{})(?P<__content>.*?)(?P<__close>\1)".format(tag if useregex else re.escape(tag)), s):
yield (match.span("__open"), match.span("__content"), match.span("__close"))
示例11: __findtagpairspans
# 需要導入模塊: import regex [as 別名]
# 或者: from regex import escape [as 別名]
def __findtagpairspans(
s: str,
tag: str, closetag: Optional[str] = None,
useregex: bool = False
) -> Iterable[Tuple[Tuple[int, int], ...]]:
if closetag is None or tag == closetag:
yield from __findeqtagpairspans(s, tag, useregex=useregex)
return
if not useregex:
tag = re.escape(tag)
closetag = re.escape(closetag)
retags = re.compile(r"(?P<__open>{})|(?P<__close>{})".format(tag, closetag))
startspans = []
for match in retags.finditer(s):
opengroup = match.group("__open")
if opengroup:
startspans.append(match.span())
continue
closegroup = match.group("__close")
if closegroup and startspans:
startspan = startspans.pop()
endspan = match.span()
yield (startspan, (startspan[1], endspan[0]), endspan)
示例12: __init__
# 需要導入模塊: import regex [as 別名]
# 或者: from regex import escape [as 別名]
def __init__(self, charset: Union[Dict[str, Sequence[int]], Sequence[str], str]) -> None:
"""
Builds a codec converting between graphemes/code points and integer
label sequences.
charset may either be a string, a list or a dict. In the first case
each code point will be assigned a label, in the second case each
string in the list will be assigned a label, and in the final case each
key string will be mapped to the value sequence of integers. In the
first two cases labels will be assigned automatically.
As 0 is the blank label in a CTC output layer, output labels and input
dictionaries are/should be 1-indexed.
Args:
charset (unicode, list, dict): Input character set.
"""
if isinstance(charset, dict):
self.c2l = charset
else:
self.c2l = {k: [v] for v, k in enumerate(sorted(charset), start=1)}
# map integer labels to code points because regex only works with strings
self.l2c = {} # type: Dict[str, str]
for k, v in self.c2l.items():
self.l2c[''.join(chr(c) for c in v)] = k
# sort prefixes for c2l regex
self.c2l_regex = regex.compile(r'|'.join(regex.escape(x) for x in sorted(self.c2l.keys(), key=len, reverse=True)))
# sort prefixes for l2c regex
self.l2c_regex = regex.compile(r'|'.join(regex.escape(x) for x in sorted(self.l2c.keys(), key=len, reverse=True)))
示例13: read_xsampa_table
# 需要導入模塊: import regex [as 別名]
# 或者: from regex import escape [as 別名]
def read_xsampa_table(self):
filename = os.path.join('data', 'ipa-xsampa.csv')
filename = pkg_resources.resource_filename(__name__, filename)
with open(filename, 'rb') as f:
xs2ipa = {x[1]: x[0] for x in csv.reader(f, encoding='utf-8')}
xs = sorted(xs2ipa.keys(), key=len, reverse=True)
xs_regex = re.compile('|'.join(list(map(re.escape, xs))))
return xs_regex, xs2ipa
示例14: _construct_split_regex
# 需要導入模塊: import regex [as 別名]
# 或者: from regex import escape [as 別名]
def _construct_split_regex(self):
known_words_group = "|".join(map(re.escape, self._get_sorted_words_from_cache()))
if self._no_word_spacing:
regex = r"^(.*?)({})(.*)$".format(known_words_group)
else:
regex = r"^(.*?(?:\A|\W|_|\d))({})((?:\Z|\W|_|\d).*)$".format(known_words_group)
self._split_regex_cache.setdefault(
self._settings.registry_key, {})[self.info['name']] = \
re.compile(regex, re.UNICODE | re.IGNORECASE)
示例15: convert
# 需要導入模塊: import regex [as 別名]
# 或者: from regex import escape [as 別名]
def convert(self, newstart: str) -> None:
"""Convert to another list type by replacing starting pattern."""
match = self._match
ms = match.start()
for s, e in reversed(match.spans('pattern')):
self[s - ms:e - ms] = newstart
self.pattern = escape(newstart)