当前位置: 首页>>代码示例>>Python>>正文


Python regex.escape函数代码示例

本文整理汇总了Python中regex.escape函数的典型用法代码示例。如果您正苦于以下问题:Python escape函数的具体用法?Python escape怎么用?Python escape使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了escape函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

    def __init__(self, keywords, fuzzy_min_len=None):
        """Initialize search
        """
        if fuzzy_min_len is None:
            fuzzy_min_len = []
        self.fuzzy_min_len = sorted(fuzzy_min_len)
        self.keywords = {}
        for i, k in keywords:
            k = k.strip().lower()
            if k not in self.keywords:
                self.keywords[k] = i
            else:
                print("ERROR: found duplicate keyword '{0}'".format(k))

        print("Number of unique keywords ID to be search: {0}"
              .format(len(self.keywords)))

        kw = []
        for k in self.keywords:
            d = self.get_allow_distance(k)
            if d:
                kw.append(r'(?:{0}){{e<={1}}}'.format(re.escape(k), d))
            else:
                kw.append(re.escape(k))

        re_str = '|'.join(kw)
        re_str = r'\b(?:{0})\b'.format(re_str)
        self.re_keywords = re.compile(re_str)
开发者ID:soodoku,项目名称:search-names,代码行数:28,代码来源:searchengines.py

示例2: process_index_title_change_in_history

def process_index_title_change_in_history(indx, **kwargs):
    """
    Update all history entries which reference 'old' to 'new'.
    """
    if indx.is_commentary():
        pattern = r'{} on '.format(re.escape(kwargs["old"]))
        title_pattern = r'(^{}$)|({} on)'.format(re.escape(kwargs["old"]), re.escape(kwargs["old"]))
    else:
        commentators = text.IndexSet({"categories.0": "Commentary"}).distinct("title")
        pattern = r"(^{} \d)|(^({}) on {} \d)".format(re.escape(kwargs["old"]), "|".join(commentators), re.escape(kwargs["old"]))
        title_pattern = r'(^{}$)|(^({}) on {})'.format(re.escape(kwargs["old"]), "|".join(commentators), re.escape(kwargs["old"]))

    text_hist = HistorySet({"ref": {"$regex": pattern}})
    for h in text_hist:
        h.ref = h.ref.replace(kwargs["old"], kwargs["new"], 1)
        h.save()

    link_hist = HistorySet({"new.refs": {"$regex": pattern}})
    for h in link_hist:
        h.new["refs"] = [r.replace(kwargs["old"], kwargs["new"], 1) for r in h.new["refs"]]
        h.save()

    note_hist = HistorySet({"new.ref": {"$regex": pattern}})
    for h in note_hist:
        h.new["ref"] = h.new["ref"].replace(kwargs["old"], kwargs["new"], 1)
        h.save()

    title_hist = HistorySet({"title": {"$regex": title_pattern}})
    for h in title_hist:
        h.title = h.title.replace(kwargs["old"], kwargs["new"], 1)
        h.save()
开发者ID:rivkahcarl,项目名称:Sefaria-Project,代码行数:31,代码来源:history.py

示例3: dep_counts

def dep_counts(name):
    ref_patterns = {
        'alone': r'^{} \d'.format(re.escape(name)),
        'commentor': r'{} on'.format(re.escape(name)),
        'commentee': r'on {} \d'.format(re.escape(name))
    }

    commentee_title_pattern = r'on {}'.format(re.escape(name))

    ret = {
        'version title exact match': text.VersionSet({"title": name}).count(),
        'version title match commentor': text.VersionSet({"title": {"$regex": ref_patterns["commentor"]}}).count(),
        'version title match commentee': text.VersionSet({"title": {"$regex": commentee_title_pattern}}).count(),
        'history title exact match': history.HistorySet({"title": name}).count(),
        'history title match commentor': history.HistorySet({"title": {"$regex": ref_patterns["commentor"]}}).count(),
        'history title match commentee': history.HistorySet({"title": {"$regex": commentee_title_pattern}}).count(),
    }

    for pname, pattern in ref_patterns.items():
        ret.update({
            'note match ' + pname: note.NoteSet({"ref": {"$regex": pattern}}).count(),
            'link match ' + pname: link.LinkSet({"refs": {"$regex": pattern}}).count(),
            'history refs match ' + pname: history.HistorySet({"ref": {"$regex": pattern}}).count(),
            'history new refs match ' + pname: history.HistorySet({"new.refs": {"$regex": pattern}}).count()
        })

    return ret
开发者ID:JonMosenkis,项目名称:Sefaria-Project,代码行数:27,代码来源:count_index_dependencies.py

示例4: process_index_title_change_in_history

def process_index_title_change_in_history(indx, **kwargs):
    print "Cascading History {} to {}".format(kwargs['old'], kwargs['new'])
    """
    Update all history entries which reference 'old' to 'new'.
    """
    from sefaria.model.text import prepare_index_regex_for_dependency_process
    pattern = prepare_index_regex_for_dependency_process(indx)
    pattern = pattern.replace(re.escape(indx.title), re.escape(kwargs["old"]))
    title_pattern = ur'(^{}$)'.format(re.escape(kwargs["old"]))

    text_hist = HistorySet({"ref": {"$regex": pattern}})
    print "Cascading Text History {} to {}".format(kwargs['old'], kwargs['new'])
    for h in text_hist:
        h.ref = h.ref.replace(kwargs["old"], kwargs["new"], 1)
        h.save()

    link_hist = HistorySet({"new.refs": {"$regex": pattern}})
    print "Cascading Link History {} to {}".format(kwargs['old'], kwargs['new'])
    for h in link_hist:
        h.new["refs"] = [r.replace(kwargs["old"], kwargs["new"], 1) for r in h.new["refs"]]
        h.save()

    note_hist = HistorySet({"new.ref": {"$regex": pattern}})
    print "Cascading Note History {} to {}".format(kwargs['old'], kwargs['new'])
    for h in note_hist:
        h.new["ref"] = h.new["ref"].replace(kwargs["old"], kwargs["new"], 1)
        h.save()

    title_hist = HistorySet({"title": {"$regex": title_pattern}})
    print "Cascading Index History {} to {}".format(kwargs['old'], kwargs['new'])
    for h in title_hist:
        h.title = h.title.replace(kwargs["old"], kwargs["new"], 1)
        h.save()
开发者ID:pzp1997,项目名称:Sefaria-Project,代码行数:33,代码来源:history.py

示例5: inline_one

def inline_one(start: str, end: str, nest=Nesting.FRAME, sub=None, display=Display.INLINE):
  """
  """
  patt = re.compile(Patterns.single_group.value.format(
    re.escape(start), re.escape(end)))
  return inline(patt, escape=[start[0], end[0]],
                nest=nest, display=display, sub=sub)
开发者ID:vshesh,项目名称:glue,代码行数:7,代码来源:elements.py

示例6: process_index_delete_in_links

def process_index_delete_in_links(indx, **kwargs):
    if indx.is_commentary():
        pattern = ur'^{} on '.format(re.escape(indx.title))
    else:
        commentators = text.IndexSet({"categories.0": "Commentary"}).distinct("title")
        pattern = ur"(^{} \d)|^({}) on {} \d".format(re.escape(indx.title), "|".join(commentators), re.escape(indx.title))
    LinkSet({"refs": {"$regex": pattern}}).delete()
开发者ID:Elisha4700,项目名称:Sefaria-Project,代码行数:7,代码来源:link.py

示例7: guess_split

def guess_split(majiribun, reading):
    kanjis=[]
    matchreg_greedy=''
    matchreg_nongreedy=''
    for char in majiribun:
        if kanji_re.match(char):
            kanjis.append(char)
            matchreg_greedy += "(\p{Hiragana}+)"
            matchreg_nongreedy += "(\p{Hiragana}+?)"
        else:
            matchreg_greedy += re.escape(char)
            matchreg_nongreedy += re.escape(char)

    m = re.match(matchreg_greedy + '$', reading)
    if m:
        yomis = m.groups()

        yomis_nongreedy = re.match(matchreg_nongreedy + '$', reading).groups()
        if yomis != yomis_nongreedy:
            # Ambiguous!
            return None
        d = {}
        for idx in range(0, len(kanjis)):
            d[kanjis[idx]] = yomis[idx]
        return(d)
开发者ID:leoboiko,项目名称:yomisplit,代码行数:25,代码来源:__init__.py

示例8: dep_counts

def dep_counts(name):
    commentators = model.IndexSet({"categories.0": "Commentary"}).distinct("title")
    ref_patterns = {
        'alone': r'^{} \d'.format(re.escape(name)),
        'commentor': r'{} on'.format(re.escape(name)),
        'commentee': r'^({}) on {} \d'.format("|".join(commentators), re.escape(name))
    }

    commentee_title_pattern = r'^({}) on {} \d'.format("|".join(commentators), re.escape(name))

    ret = {
        'version title exact match': model.VersionSet({"title": name}).count(),
        'version title match commentor': model.VersionSet({"title": {"$regex": ref_patterns["commentor"]}}).count(),
        'version title match commentee': model.VersionSet({"title": {"$regex": commentee_title_pattern}}).count(),
        'history title exact match': model.HistorySet({"title": name}).count(),
        'history title match commentor': model.HistorySet({"title": {"$regex": ref_patterns["commentor"]}}).count(),
        'history title match commentee': model.HistorySet({"title": {"$regex": commentee_title_pattern}}).count(),
    }

    for pname, pattern in ref_patterns.items():
        ret.update({
            'note match ' + pname: model.NoteSet({"ref": {"$regex": pattern}}).count(),
            'link match ' + pname: model.LinkSet({"refs": {"$regex": pattern}}).count(),
            'history refs match ' + pname: model.HistorySet({"ref": {"$regex": pattern}}).count(),
            'history new refs match ' + pname: model.HistorySet({"new.refs": {"$regex": pattern}}).count()
        })

    return ret
开发者ID:digideskio,项目名称:Sefaria-Project,代码行数:28,代码来源:text_test.py

示例9: expand_parens

def expand_parens(string, parens="()", include_spaces=False, substitute_string=''):
    output = []
    open_paren = re.escape(parens[0])
    close_paren = re.escape(parens[1])
    substitute_string = re.escape(substitute_string)
    in_string = re.sub(open_paren + substitute_string, parens[0], string)
    in_string = re.sub(substitute_string + close_paren, parens[1], in_string)

    if include_spaces:
        regex1 = regex2 = re.compile(r'(^.*)' + open_paren + r'(.+)' + close_paren + r'(.*$)')
    else:
        regex1 = re.compile(r'(^.*\S)' + open_paren + r'(\S+)' + close_paren + r'(.*$)')
        regex2 = re.compile(r'(^.*)' + open_paren + r'(\S+)' + close_paren + r'(\S.*$)')

    re_match1 = regex1.search(in_string)
    re_match2 = regex2.search(in_string)
    if re_match1:
        within = re_match1.group(1) + re_match1.group(2) + re_match1.group(3)
        without = re_match1.group(1) + re_match1.group(3)
    elif re_match2:
        within = re_match2.group(1) + re_match2.group(2) + re_match2.group(3)
        without = re_match2.group(1) + re_match2.group(3)
    else:
        return [string]

    output = [clean_str(without), clean_str(within)]

    return output
开发者ID:longnow,项目名称:panlex-tools,代码行数:28,代码来源:string_manipulation.py

示例10: __init__

    def __init__(self, charset: Union[Dict[str, Sequence[int]], Sequence[str], str]) -> None:
        """
        Builds a codec converting between graphemes/code points and integer
        label sequences.

        charset may either be a string, a list or a dict. In the first case
        each code point will be assigned a label, in the second case each
        string in the list will be assigned a label, and in the final case each
        key string will be mapped to the value sequence of integers. In the
        first two cases labels will be assigned automatically.

        As 0 is the blank label in a CTC output layer, output labels and input
        dictionaries are/should be 1-indexed.

        Args:
            charset (unicode, list, dict): Input character set.
        """
        if isinstance(charset, dict):
            self.c2l = charset
        else:
            self.c2l = {k: [v] for v, k in enumerate(sorted(charset), start=1)}
        # map integer labels to code points because regex only works with strings
        self.l2c = {}  # type: Dict[str, str]
        for k, v in self.c2l.items():
            self.l2c[''.join(chr(c) for c in v)] = k

        # sort prefixes for c2l regex
        self.c2l_regex = regex.compile(r'|'.join(regex.escape(x) for x in sorted(self.c2l.keys(), key=len, reverse=True)))
        # sort prefixes for l2c regex
        self.l2c_regex = regex.compile(r'|'.join(regex.escape(x) for x in sorted(self.l2c.keys(), key=len, reverse=True)))
开发者ID:mittagessen,项目名称:kraken,代码行数:30,代码来源:codec.py

示例11: _match_by_edit_distance

def _match_by_edit_distance(full_text, text_to_match):
    text_to_match = text_to_match.replace("-LRB-", "(").replace("-RRB-", ")")
    text_to_match = text_to_match.replace("-LCB-", "{").replace("-RCB-", "}")
    text_to_match = re.sub(r'\[\\\]\\\)\]$', ')', text_to_match)

    try:
        end_point = (text_to_match.index(" ") if " " in text_to_match else len(text_to_match))
        potential_matches = [full_text[m.start():(m.start() + len(text_to_match) + 1)] for m in 
                             re.finditer(re.escape(text_to_match[0:end_point]), full_text, re.U | re.I)]
    except:
        import sys

        print(full_text)
        print()
        print(text_to_match)
        sys.exit(1)
        
    if len(potential_matches) == 0:
        potential_matches = [full_text[m.start():(m.start() + len(text_to_match) + 1)] for m in 
                             re.finditer(re.escape(text_to_match[0]), full_text, re.U)]
    if len(potential_matches) == 0:
        text_to_match = text_to_match.replace("(", "[")
        potential_matches = [full_text[m.start():(m.start() + len(text_to_match) + 1)] for m in 
                             re.finditer(re.escape(text_to_match[0]), full_text, re.U)]

    potential_matches = [(p[0:p.rindex(text_to_match[-1])+1] 
                          if text_to_match[-1] in p and len(p) > len(text_to_match)
                          else p)
                         for p in potential_matches]

    if len(potential_matches) == 0:
        # No idea why this would ever happen, but it does
        return text_to_match

    match_with_lowest_edit_distance = ""
    lowest_edit_distance = -1
    for match in potential_matches:
        e_d = edit_distance(match, text_to_match)
        if lowest_edit_distance == -1 or e_d <= lowest_edit_distance:
            lowest_edit_distance = e_d
            match_with_lowest_edit_distance = match

    result = match_with_lowest_edit_distance.strip()
    if text_to_match[-1] in result:
        while result[-1] != text_to_match[-1]:
            result = result[0:-1]
    elif text_to_match[-1] == '"' and re.search(r'["”\u201d]', result):
        while result[-1] not in ['"', '”', "\u201d"]:
            result = result[0:-1]
    elif text_to_match[-1] not in [']', '}', ')'] and text_to_match[-2:] != "..":
        while result[-1] != text_to_match[-1]:
            result += full_text[full_text.index(result) + len(result)][-1]

    return result
开发者ID:EducationalTestingService,项目名称:match,代码行数:54,代码来源:Match.py

示例12: process_index_title_change_in_links

def process_index_title_change_in_links(indx, **kwargs):
    if indx.is_commentary():
        pattern = r'^{} on '.format(re.escape(kwargs["old"]))
    else:
        commentators = text.IndexSet({"categories.0": "Commentary"}).distinct("title")
        pattern = r"(^{} \d)|(^({}) on {} \d)".format(re.escape(kwargs["old"]), "|".join(commentators), re.escape(kwargs["old"]))
        #pattern = r'(^{} \d)|( on {} \d)'.format(re.escape(kwargs["old"]), re.escape(kwargs["old"]))
    links = LinkSet({"refs": {"$regex": pattern}})
    for l in links:
        l.refs = [r.replace(kwargs["old"], kwargs["new"], 1) if re.search(pattern, r) else r for r in l.refs]
        l.save()
开发者ID:rivkahcarl,项目名称:Sefaria-Project,代码行数:11,代码来源:link.py

示例13: process_index_title_change_in_links

def process_index_title_change_in_links(indx, **kwargs):
    print "Cascading Links {} to {}".format(kwargs['old'], kwargs['new'])
    pattern = text.Ref(indx.title).regex()
    pattern = pattern.replace(re.escape(indx.title), re.escape(kwargs["old"]))
    links = LinkSet({"refs": {"$regex": pattern}})
    for l in links:
        l.refs = [r.replace(kwargs["old"], kwargs["new"], 1) if re.search(pattern, r) else r for r in l.refs]
        try:
            l.save()
        except InputError: #todo: this belongs in a better place - perhaps in abstract
            logger.warning("Deleting link that failed to save: {} - {}".format(l.refs[0], l.refs[1]))
            l.delete()
开发者ID:pzp1997,项目名称:Sefaria-Project,代码行数:12,代码来源:link.py

示例14: process_index_title_change_in_notes

def process_index_title_change_in_notes(indx, **kwargs):
    print "Cascading Notes {} to {}".format(kwargs['old'], kwargs['new'])
    pattern = Ref(indx.title).regex()
    pattern = pattern.replace(re.escape(indx.title), re.escape(kwargs["old"]))
    notes = NoteSet({"ref": {"$regex": pattern}})
    for n in notes:
        try:
            n.ref = n.ref.replace(kwargs["old"], kwargs["new"], 1)
            n.save()
        except Exception:
            logger.warning("Deleting note that failed to save: {}".format(n.ref))
            n.delete()
开发者ID:arielallon,项目名称:Sefaria-Project,代码行数:12,代码来源:note.py

示例15: __init__

 def __init__(self, leading_allow=None, trailing_allow=None):
     """
     :param list leading_allow: The leading punctuation characters to allow.
     :param list trailing_allow: The trailing punctuation characters to allow.
     """
     leading_pattern = "" if not leading_allow else r"[%s]*" % regex.escape("".join(leading_allow))
     trailing_pattern = "" if not trailing_allow else r"[%s]" % regex.escape("".join(trailing_allow))
     if trailing_pattern:
         super(OuterPunctuationFilter, self).__init__(
             "%s[^\W_]+(?:$|.*[^\W_]%s*|%s*)" % (leading_pattern, trailing_pattern, trailing_pattern)
         )
     else:
         super(OuterPunctuationFilter, self).__init__("%s[^\W_](?:$|.*[^\W_])" % leading_pattern)
开发者ID:cjrh,项目名称:caterpillar,代码行数:13,代码来源:filter.py


注:本文中的regex.escape函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。