本文整理汇总了Python中regex.escape函数的典型用法代码示例。如果您正苦于以下问题:Python escape函数的具体用法?Python escape怎么用?Python escape使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了escape函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self, keywords, fuzzy_min_len=None):
"""Initialize search
"""
if fuzzy_min_len is None:
fuzzy_min_len = []
self.fuzzy_min_len = sorted(fuzzy_min_len)
self.keywords = {}
for i, k in keywords:
k = k.strip().lower()
if k not in self.keywords:
self.keywords[k] = i
else:
print("ERROR: found duplicate keyword '{0}'".format(k))
print("Number of unique keywords ID to be search: {0}"
.format(len(self.keywords)))
kw = []
for k in self.keywords:
d = self.get_allow_distance(k)
if d:
kw.append(r'(?:{0}){{e<={1}}}'.format(re.escape(k), d))
else:
kw.append(re.escape(k))
re_str = '|'.join(kw)
re_str = r'\b(?:{0})\b'.format(re_str)
self.re_keywords = re.compile(re_str)
示例2: process_index_title_change_in_history
def process_index_title_change_in_history(indx, **kwargs):
"""
Update all history entries which reference 'old' to 'new'.
"""
if indx.is_commentary():
pattern = r'{} on '.format(re.escape(kwargs["old"]))
title_pattern = r'(^{}$)|({} on)'.format(re.escape(kwargs["old"]), re.escape(kwargs["old"]))
else:
commentators = text.IndexSet({"categories.0": "Commentary"}).distinct("title")
pattern = r"(^{} \d)|(^({}) on {} \d)".format(re.escape(kwargs["old"]), "|".join(commentators), re.escape(kwargs["old"]))
title_pattern = r'(^{}$)|(^({}) on {})'.format(re.escape(kwargs["old"]), "|".join(commentators), re.escape(kwargs["old"]))
text_hist = HistorySet({"ref": {"$regex": pattern}})
for h in text_hist:
h.ref = h.ref.replace(kwargs["old"], kwargs["new"], 1)
h.save()
link_hist = HistorySet({"new.refs": {"$regex": pattern}})
for h in link_hist:
h.new["refs"] = [r.replace(kwargs["old"], kwargs["new"], 1) for r in h.new["refs"]]
h.save()
note_hist = HistorySet({"new.ref": {"$regex": pattern}})
for h in note_hist:
h.new["ref"] = h.new["ref"].replace(kwargs["old"], kwargs["new"], 1)
h.save()
title_hist = HistorySet({"title": {"$regex": title_pattern}})
for h in title_hist:
h.title = h.title.replace(kwargs["old"], kwargs["new"], 1)
h.save()
示例3: dep_counts
def dep_counts(name):
ref_patterns = {
'alone': r'^{} \d'.format(re.escape(name)),
'commentor': r'{} on'.format(re.escape(name)),
'commentee': r'on {} \d'.format(re.escape(name))
}
commentee_title_pattern = r'on {}'.format(re.escape(name))
ret = {
'version title exact match': text.VersionSet({"title": name}).count(),
'version title match commentor': text.VersionSet({"title": {"$regex": ref_patterns["commentor"]}}).count(),
'version title match commentee': text.VersionSet({"title": {"$regex": commentee_title_pattern}}).count(),
'history title exact match': history.HistorySet({"title": name}).count(),
'history title match commentor': history.HistorySet({"title": {"$regex": ref_patterns["commentor"]}}).count(),
'history title match commentee': history.HistorySet({"title": {"$regex": commentee_title_pattern}}).count(),
}
for pname, pattern in ref_patterns.items():
ret.update({
'note match ' + pname: note.NoteSet({"ref": {"$regex": pattern}}).count(),
'link match ' + pname: link.LinkSet({"refs": {"$regex": pattern}}).count(),
'history refs match ' + pname: history.HistorySet({"ref": {"$regex": pattern}}).count(),
'history new refs match ' + pname: history.HistorySet({"new.refs": {"$regex": pattern}}).count()
})
return ret
示例4: process_index_title_change_in_history
def process_index_title_change_in_history(indx, **kwargs):
print "Cascading History {} to {}".format(kwargs['old'], kwargs['new'])
"""
Update all history entries which reference 'old' to 'new'.
"""
from sefaria.model.text import prepare_index_regex_for_dependency_process
pattern = prepare_index_regex_for_dependency_process(indx)
pattern = pattern.replace(re.escape(indx.title), re.escape(kwargs["old"]))
title_pattern = ur'(^{}$)'.format(re.escape(kwargs["old"]))
text_hist = HistorySet({"ref": {"$regex": pattern}})
print "Cascading Text History {} to {}".format(kwargs['old'], kwargs['new'])
for h in text_hist:
h.ref = h.ref.replace(kwargs["old"], kwargs["new"], 1)
h.save()
link_hist = HistorySet({"new.refs": {"$regex": pattern}})
print "Cascading Link History {} to {}".format(kwargs['old'], kwargs['new'])
for h in link_hist:
h.new["refs"] = [r.replace(kwargs["old"], kwargs["new"], 1) for r in h.new["refs"]]
h.save()
note_hist = HistorySet({"new.ref": {"$regex": pattern}})
print "Cascading Note History {} to {}".format(kwargs['old'], kwargs['new'])
for h in note_hist:
h.new["ref"] = h.new["ref"].replace(kwargs["old"], kwargs["new"], 1)
h.save()
title_hist = HistorySet({"title": {"$regex": title_pattern}})
print "Cascading Index History {} to {}".format(kwargs['old'], kwargs['new'])
for h in title_hist:
h.title = h.title.replace(kwargs["old"], kwargs["new"], 1)
h.save()
示例5: inline_one
def inline_one(start: str, end: str, nest=Nesting.FRAME, sub=None, display=Display.INLINE):
"""
"""
patt = re.compile(Patterns.single_group.value.format(
re.escape(start), re.escape(end)))
return inline(patt, escape=[start[0], end[0]],
nest=nest, display=display, sub=sub)
示例6: process_index_delete_in_links
def process_index_delete_in_links(indx, **kwargs):
if indx.is_commentary():
pattern = ur'^{} on '.format(re.escape(indx.title))
else:
commentators = text.IndexSet({"categories.0": "Commentary"}).distinct("title")
pattern = ur"(^{} \d)|^({}) on {} \d".format(re.escape(indx.title), "|".join(commentators), re.escape(indx.title))
LinkSet({"refs": {"$regex": pattern}}).delete()
示例7: guess_split
def guess_split(majiribun, reading):
kanjis=[]
matchreg_greedy=''
matchreg_nongreedy=''
for char in majiribun:
if kanji_re.match(char):
kanjis.append(char)
matchreg_greedy += "(\p{Hiragana}+)"
matchreg_nongreedy += "(\p{Hiragana}+?)"
else:
matchreg_greedy += re.escape(char)
matchreg_nongreedy += re.escape(char)
m = re.match(matchreg_greedy + '$', reading)
if m:
yomis = m.groups()
yomis_nongreedy = re.match(matchreg_nongreedy + '$', reading).groups()
if yomis != yomis_nongreedy:
# Ambiguous!
return None
d = {}
for idx in range(0, len(kanjis)):
d[kanjis[idx]] = yomis[idx]
return(d)
示例8: dep_counts
def dep_counts(name):
commentators = model.IndexSet({"categories.0": "Commentary"}).distinct("title")
ref_patterns = {
'alone': r'^{} \d'.format(re.escape(name)),
'commentor': r'{} on'.format(re.escape(name)),
'commentee': r'^({}) on {} \d'.format("|".join(commentators), re.escape(name))
}
commentee_title_pattern = r'^({}) on {} \d'.format("|".join(commentators), re.escape(name))
ret = {
'version title exact match': model.VersionSet({"title": name}).count(),
'version title match commentor': model.VersionSet({"title": {"$regex": ref_patterns["commentor"]}}).count(),
'version title match commentee': model.VersionSet({"title": {"$regex": commentee_title_pattern}}).count(),
'history title exact match': model.HistorySet({"title": name}).count(),
'history title match commentor': model.HistorySet({"title": {"$regex": ref_patterns["commentor"]}}).count(),
'history title match commentee': model.HistorySet({"title": {"$regex": commentee_title_pattern}}).count(),
}
for pname, pattern in ref_patterns.items():
ret.update({
'note match ' + pname: model.NoteSet({"ref": {"$regex": pattern}}).count(),
'link match ' + pname: model.LinkSet({"refs": {"$regex": pattern}}).count(),
'history refs match ' + pname: model.HistorySet({"ref": {"$regex": pattern}}).count(),
'history new refs match ' + pname: model.HistorySet({"new.refs": {"$regex": pattern}}).count()
})
return ret
示例9: expand_parens
def expand_parens(string, parens="()", include_spaces=False, substitute_string=''):
output = []
open_paren = re.escape(parens[0])
close_paren = re.escape(parens[1])
substitute_string = re.escape(substitute_string)
in_string = re.sub(open_paren + substitute_string, parens[0], string)
in_string = re.sub(substitute_string + close_paren, parens[1], in_string)
if include_spaces:
regex1 = regex2 = re.compile(r'(^.*)' + open_paren + r'(.+)' + close_paren + r'(.*$)')
else:
regex1 = re.compile(r'(^.*\S)' + open_paren + r'(\S+)' + close_paren + r'(.*$)')
regex2 = re.compile(r'(^.*)' + open_paren + r'(\S+)' + close_paren + r'(\S.*$)')
re_match1 = regex1.search(in_string)
re_match2 = regex2.search(in_string)
if re_match1:
within = re_match1.group(1) + re_match1.group(2) + re_match1.group(3)
without = re_match1.group(1) + re_match1.group(3)
elif re_match2:
within = re_match2.group(1) + re_match2.group(2) + re_match2.group(3)
without = re_match2.group(1) + re_match2.group(3)
else:
return [string]
output = [clean_str(without), clean_str(within)]
return output
示例10: __init__
def __init__(self, charset: Union[Dict[str, Sequence[int]], Sequence[str], str]) -> None:
"""
Builds a codec converting between graphemes/code points and integer
label sequences.
charset may either be a string, a list or a dict. In the first case
each code point will be assigned a label, in the second case each
string in the list will be assigned a label, and in the final case each
key string will be mapped to the value sequence of integers. In the
first two cases labels will be assigned automatically.
As 0 is the blank label in a CTC output layer, output labels and input
dictionaries are/should be 1-indexed.
Args:
charset (unicode, list, dict): Input character set.
"""
if isinstance(charset, dict):
self.c2l = charset
else:
self.c2l = {k: [v] for v, k in enumerate(sorted(charset), start=1)}
# map integer labels to code points because regex only works with strings
self.l2c = {} # type: Dict[str, str]
for k, v in self.c2l.items():
self.l2c[''.join(chr(c) for c in v)] = k
# sort prefixes for c2l regex
self.c2l_regex = regex.compile(r'|'.join(regex.escape(x) for x in sorted(self.c2l.keys(), key=len, reverse=True)))
# sort prefixes for l2c regex
self.l2c_regex = regex.compile(r'|'.join(regex.escape(x) for x in sorted(self.l2c.keys(), key=len, reverse=True)))
示例11: _match_by_edit_distance
def _match_by_edit_distance(full_text, text_to_match):
text_to_match = text_to_match.replace("-LRB-", "(").replace("-RRB-", ")")
text_to_match = text_to_match.replace("-LCB-", "{").replace("-RCB-", "}")
text_to_match = re.sub(r'\[\\\]\\\)\]$', ')', text_to_match)
try:
end_point = (text_to_match.index(" ") if " " in text_to_match else len(text_to_match))
potential_matches = [full_text[m.start():(m.start() + len(text_to_match) + 1)] for m in
re.finditer(re.escape(text_to_match[0:end_point]), full_text, re.U | re.I)]
except:
import sys
print(full_text)
print()
print(text_to_match)
sys.exit(1)
if len(potential_matches) == 0:
potential_matches = [full_text[m.start():(m.start() + len(text_to_match) + 1)] for m in
re.finditer(re.escape(text_to_match[0]), full_text, re.U)]
if len(potential_matches) == 0:
text_to_match = text_to_match.replace("(", "[")
potential_matches = [full_text[m.start():(m.start() + len(text_to_match) + 1)] for m in
re.finditer(re.escape(text_to_match[0]), full_text, re.U)]
potential_matches = [(p[0:p.rindex(text_to_match[-1])+1]
if text_to_match[-1] in p and len(p) > len(text_to_match)
else p)
for p in potential_matches]
if len(potential_matches) == 0:
# No idea why this would ever happen, but it does
return text_to_match
match_with_lowest_edit_distance = ""
lowest_edit_distance = -1
for match in potential_matches:
e_d = edit_distance(match, text_to_match)
if lowest_edit_distance == -1 or e_d <= lowest_edit_distance:
lowest_edit_distance = e_d
match_with_lowest_edit_distance = match
result = match_with_lowest_edit_distance.strip()
if text_to_match[-1] in result:
while result[-1] != text_to_match[-1]:
result = result[0:-1]
elif text_to_match[-1] == '"' and re.search(r'["”\u201d]', result):
while result[-1] not in ['"', '”', "\u201d"]:
result = result[0:-1]
elif text_to_match[-1] not in [']', '}', ')'] and text_to_match[-2:] != "..":
while result[-1] != text_to_match[-1]:
result += full_text[full_text.index(result) + len(result)][-1]
return result
示例12: process_index_title_change_in_links
def process_index_title_change_in_links(indx, **kwargs):
if indx.is_commentary():
pattern = r'^{} on '.format(re.escape(kwargs["old"]))
else:
commentators = text.IndexSet({"categories.0": "Commentary"}).distinct("title")
pattern = r"(^{} \d)|(^({}) on {} \d)".format(re.escape(kwargs["old"]), "|".join(commentators), re.escape(kwargs["old"]))
#pattern = r'(^{} \d)|( on {} \d)'.format(re.escape(kwargs["old"]), re.escape(kwargs["old"]))
links = LinkSet({"refs": {"$regex": pattern}})
for l in links:
l.refs = [r.replace(kwargs["old"], kwargs["new"], 1) if re.search(pattern, r) else r for r in l.refs]
l.save()
示例13: process_index_title_change_in_links
def process_index_title_change_in_links(indx, **kwargs):
print "Cascading Links {} to {}".format(kwargs['old'], kwargs['new'])
pattern = text.Ref(indx.title).regex()
pattern = pattern.replace(re.escape(indx.title), re.escape(kwargs["old"]))
links = LinkSet({"refs": {"$regex": pattern}})
for l in links:
l.refs = [r.replace(kwargs["old"], kwargs["new"], 1) if re.search(pattern, r) else r for r in l.refs]
try:
l.save()
except InputError: #todo: this belongs in a better place - perhaps in abstract
logger.warning("Deleting link that failed to save: {} - {}".format(l.refs[0], l.refs[1]))
l.delete()
示例14: process_index_title_change_in_notes
def process_index_title_change_in_notes(indx, **kwargs):
print "Cascading Notes {} to {}".format(kwargs['old'], kwargs['new'])
pattern = Ref(indx.title).regex()
pattern = pattern.replace(re.escape(indx.title), re.escape(kwargs["old"]))
notes = NoteSet({"ref": {"$regex": pattern}})
for n in notes:
try:
n.ref = n.ref.replace(kwargs["old"], kwargs["new"], 1)
n.save()
except Exception:
logger.warning("Deleting note that failed to save: {}".format(n.ref))
n.delete()
示例15: __init__
def __init__(self, leading_allow=None, trailing_allow=None):
"""
:param list leading_allow: The leading punctuation characters to allow.
:param list trailing_allow: The trailing punctuation characters to allow.
"""
leading_pattern = "" if not leading_allow else r"[%s]*" % regex.escape("".join(leading_allow))
trailing_pattern = "" if not trailing_allow else r"[%s]" % regex.escape("".join(trailing_allow))
if trailing_pattern:
super(OuterPunctuationFilter, self).__init__(
"%s[^\W_]+(?:$|.*[^\W_]%s*|%s*)" % (leading_pattern, trailing_pattern, trailing_pattern)
)
else:
super(OuterPunctuationFilter, self).__init__("%s[^\W_](?:$|.*[^\W_])" % leading_pattern)