当前位置: 首页>>代码示例>>Python>>正文


Python re2.sub函数代码示例

本文整理汇总了Python中re2.sub函数的典型用法代码示例。如果您正苦于以下问题:Python sub函数的具体用法?Python sub怎么用?Python sub使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了sub函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parsers

def parsers(uscfiles, findreplace):
    parsedfiles = []
    for counter, section in enumerate(uscfiles):

        #print ""
        #print "File", counter
        parsedfile = subfile(section, findreplace)
        #print parsedfile
        # Replace Multiple Section references with links
        #   Include [^<] to make sure no group is transformed twice
        pattern =  r'@@@\s[Ss]ections?\s([^<]*?)@@@@@(.*?)@@'
        pattern_replace = [r'%s' % u'(\d+\w*(?:\(\w+\))*[-|–]?\d*)([, @])', r'<a href="/laws/target/%s/\1" class="sec">\1</a>\2']  
        parsedfile = parsesections(pattern, pattern_replace, parsedfile)
        #parsedfile = re.sub(r'@[email protected]', r'ref-Title-'+title, parsedfile)
        parsedfile = re.sub(r'@@ref-.*[email protected]', r'', parsedfile)
        #parsedfile = re.sub(r'ref-title-this', r'ref-title-'+title, parsedfile)



        # Encode Named Acts by removing lowercase and non-word characters, and appending the length of the name w/o non-word characters
        #pattern =  r'@@ref-namedact-(.*?)@@'
        pattern =  r'/ref-namedact-(.*?)/'
        parsedfile = parsenamedacts(pattern, parsedfile)

        parsedfile = re.sub(r'@[email protected]', r'ref-title-this', parsedfile)
        parsedfile = re.sub(r'@@ref-.*[email protected]', r'', parsedfile)
        
        # Remove remaining @
        parsedfile = parsedfile.replace('@','')#.translate(None, '@')
        parsedfiles.append(parsedfile)
    return parsedfiles 
开发者ID:aih,项目名称:uscites,代码行数:31,代码来源:autoparser.py

示例2: handle_techniques

def handle_techniques(line, **opts):

    vb_vars = opts["vb_vars"]
    enc_func_name = opts["enc_func_name"]
    decrypt_func = opts["decrypt_func"]

    def var_substitute(m):
        var = m.group(1)

    line = line.replace('"', '"""')
    line = re.sub(r'"""([A-F0-9]{2,})"""', decode_hex, line)
    line = re.sub(r'"""([\w_+=/]{2,})"""', decode_base64, line)
    line = re.sub(r'(?i)Chr[A-Z$]\(Asc[A-Z$](.+?)\)\)', r"\1", line)
    line = re.sub(r'(?i)Asc[A-Z$]\("""(\w)\w*"""\)', lambda m: str(ord(m.group(1))), line)
    line = re.sub(r'(?i)((?:Chr[A-Z$]?\(\d+\)\s*&?\s*)+)', decode_chr, line)
    line = re.sub(r'(?i)\b%s\s*\(\w+\("""(.+?)"""\),\s*\w+\("""(.+?)"""' % enc_func_name, decrypt_func, line)
    line = re.sub(r'(?i)\b%s\((?:""")?(.+?)(?:""")?,\s*(?:""")?(.+?)(?:""")?\)' % enc_func_name, decrypt_func, line)
    line = re.sub(r'(?i)StrReverse\(.+?"""(.+?)"""\)', decode_reverse, line)
    line = re.sub(r'""".+?"""\s+&+\s+""".+?""".+', concatenate, line)
    while "Chr(Asc(" in line:
        lastline = line
        line = re.sub(r'(?i)Chr\(Asc\((.+?)\)\)', r"\1",line)
        if line == lastline:
            break
    # Remove quotes before regexing against them.
    line = line.replace('""" + """','')
    line = line.replace('"""','')
    # Remove a few concat patterns. Theres a bug with some obfuscation
    # techniques.
    line = line.replace(" + ", "")
    line = line.replace(" & ","")
    return line
开发者ID:CIRCL,项目名称:cuckoo-modified,代码行数:32,代码来源:vbadeobf.py

示例3: clean

def clean(t):
    """
    normalize numbers, discard some punctuation that can be ambiguous
    """
    t = re.sub('[.,\d]*\d', '<NUM>', t)
    t = re.sub('[^a-zA-Z0-9,.;:<>\-\'\/?!$% ]', '', t)
    t = t.replace('--', ' ') # sometimes starts a sentence... trouble
    return t
开发者ID:katherinehuwu,项目名称:Word_Fit,代码行数:8,代码来源:sbd.py

示例4: test_bug_449000

 def test_bug_449000(self):
     # Test for sub() on escaped characters
     self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
                      'abc\ndef\n')
     self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
                      'abc\ndef\n')
     self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
                      'abc\ndef\n')
     self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
                      'abc\ndef\n')
开发者ID:PeterScott,项目名称:pyre2,代码行数:10,代码来源:test_re.py

示例5: termcenter

def termcenter():
	parser = argparse.ArgumentParser(description='Center stuff on terminals')
	parser.add_argument('string', nargs='*', type=str)
	args = parser.parse_args()

	for e in [sys.stdin] + args.string:
		lines = [e] if isinstance(e, str) else e.readlines()
		if lines:
			width = max(map(len, map(lambda s: re.sub(r'\x1B\[[0-9;]+m|\$.*\$', '', s), lines)))
			pad = int((os.get_terminal_size()[0]- width)/2)
			for line in lines:
				print(' '*pad + re.sub(r'\$.*\$|\n', '', line))
开发者ID:jaseg,项目名称:ponysay,代码行数:12,代码来源:__init__.py

示例6: get_features

def get_features(frag, model):
    """
    ... w1. (sb?) w2 ...
    Features, listed roughly in order of importance:

    (1) w1: word that includes a period
    (2) w2: the next word, if it exists
    (3) w1length: number of alphabetic characters in w1
    (4) w2cap: true if w2 is capitalized
    (5) both: w1 and w2
    (6) w1abbr: log count of w1 in training without a final period
    (7) w2lower: log count of w2 in training as lowercased
    (8) w1w2upper: w1 and w2 is capitalized
    """
    words1 = clean(frag.tokenized).split()
    if not words1: w1 = ''
    else: w1 = words1[-1]
    if frag.next:
        words2 = clean(frag.next.tokenized).split()
        if not words2: w2 = ''
        else: w2 = words2[0]
    else:
        words2 = []
        w2 = ''

    c1 = re.sub('(^.+?\-)', '', w1)
    c2 = re.sub('(\-.+?)$', '', w2)

    feats = {}
    
    feats['w1'] = c1
    feats['w2'] = c2
    feats['both'] = c1 + '_' + c2

    len1 = min(10, len(re.sub('\W', '', c1)))
    
    if c1.replace('.','').isalpha():
        feats['w1length'] = str(len1)
        try: feats['w1abbr'] = str(int(math.log(1+model.non_abbrs[c1[:-1]])))
        except: feats['w1abbr'] = str(int(math.log(1)))

    if c2.replace('.','').isalpha():
        feats['w2cap'] = str(c2[0].isupper())
        try: feats['w2lower'] = str(int(math.log(1+model.lower_words[c2.lower()])))
        except: feats['w2lower'] = str(int(math.log(1)))        
        feats['w1w2upper'] = c1 + '_' + str(c2[0].isupper())

    return feats
开发者ID:katherinehuwu,项目名称:Word_Fit,代码行数:48,代码来源:sbd.py

示例7: on_call

    def on_call(self, call, process):
        # Legacy, modern Dyre doesn't have hardcoded hashes in
        # CryptHashData anymore
        iocs = [
            "J7dnlDvybciDvu8d46D\\x00",
            "qwererthwebfsdvjaf+\\x00",
        ]
        pipe = [
            "\\??\\pipe\\3obdw5e5w4",
            "\\??\\pipe\\g2fabg5713",
        ]
        if call["api"] == "CryptHashData":
            buf = self.get_argument(call, "Buffer")
            if buf in iocs:
                self.cryptoapis = True
            tmp = re.sub(r"\\x[0-9A-Fa-f]{2}", "", buf)
            if self.compname in tmp:
                if re.match("^" + self.compname + "[0-9 ]+$", tmp):
                    self.cryptoapis = True
        elif call["api"] == "HttpOpenRequestA":
            buf = self.get_argument(call, "Path")
            if len(buf) > 10:
                self.networkapis.add(buf)
        elif call["api"] == "NtCreateNamedPipeFile":
            buf = self.get_argument(call, "PipeName")
            for npipe in pipe:
                if buf == npipe:
                    self.syncapis = True
                    break

        return None
开发者ID:453483289,项目名称:community-modified,代码行数:31,代码来源:dyre_apis.py

示例8: process_entry

    def process_entry(self, entry):
        out = {'type': 'feed'}

        # UGnich - mooduck
        if entry['published'].endswith('UT'):
            entry['published'] = '%sC' % entry['published']

        tz = timezone(settings.timezone)
        try:
            out['created'] = \
               dateutil.parser.parse(entry['published']).astimezone(tz)
        except ValueError:
            entry['created'] = \
               dateutil.parser.parse(entry['published'])

        out['link'] = entry['link']
        out['title'] = re.sub(r'&#(?P<c>\d+);',
                              lambda c: unichr(int(c.group('c'))),
                              unescape(entry['title'])) \
                              if 'title' in entry else ''
        out['text'] = self.process_text(entry['summary'])
        out['tags'] = [ t['label'] or t['term'] for t in entry['tags'] ] \
                        if 'tags' in entry else []

        return out
开发者ID:isqua-test,项目名称:point-core,代码行数:25,代码来源:feedproc.py

示例9: _files

def _files(files):
    if not files:
        files = []
    hash = md5(str(datetime.now())).hexdigest()
    dest = '%s/%s/%s/%s' % (env.user.login[0], env.user.login,
                            hash[:2], hash[2:4])

    files_del = env.request.args('del-attach', [])
    if not isinstance(files_del, (list, tuple)):
        files_del = [files_del]
    for f in files_del:
        if f not in files:
            continue
        remove_attach(f)
        files.remove(f)

    files_in = env.request.args('attach', [])
    files_p = env.request.files('attach')

    if not isinstance(files_in, (list, tuple)):
        files_in = [files_in]
        files_p = [files_p]

    for i, file in enumerate(files_in[:10]):
        if isinstance(file, str):
            file = file.decode('utf-8')
        file = re.sub(r'[^\w\.]+', '-', unidecode(file))
        d = "%s/%s/" % (dest, randint(1000, 9999))
        make_attach(files_p[i], d, file, remove=True)
        files.append(os.path.join(d, file))

    return files
开发者ID:radjah,项目名称:point-www,代码行数:32,代码来源:blog.py

示例10: __reWildcard

 def __reWildcard(regexp, string):
     """Wildcard-based regular expression system"""
     regexp = re.sub("\*+", "*", regexp)
     match = True
     if regexp.count("*") == 0:
         if regexp == string:
             return True
         else:
             return False
     blocks = regexp.split("*")
     start = ""
     end = ""
     if not regexp.startswith("*"):
         start = blocks[0]
     if not regexp.endswith("*"):
         end = blocks[-1]
     if start != "":
         if string.startswith(start):
             blocks = blocks[1:]
         else:
             return False
     if end != "":
         if string.endswith(end):
             blocks = blocks[:-1]
         else:
             return False
     blocks = [block for block in blocks if block != ""]
     if not blocks:
         return match
     for block in blocks:
         i = string.find(block)
         if i == -1:
             return False
         string = string[i + len(block):]
     return match
开发者ID:psuedoelastic,项目名称:wapiti,代码行数:35,代码来源:lswww.py

示例11: render_pony

def render_pony(name, text, balloonstyle, width=80, center=False, centertext=False):
	pony = load_pony(name)
	balloon = link_l = link_r = ''
	if text:
		[link_l, link_r] = balloonstyle[-2:]
	for i,line in enumerate(pony):
		match = re.search('\$balloon([0-9]*)\$', line)
		if match:
			minwidth = int(match.group(1) or '0')
			pony[i:i+1] = render_balloon(text, balloonstyle, minwidth=minwidth, maxwidth=int(width/2), pad=str.center if centertext else str.ljust)
			break
	try:
		first = pony.index('$$$')
		second = pony[first+1:].index('$$$')
		pony[first:] = pony[first+1+second+1:]
	except:
		pass
	pony = [ line.replace('$\\$', link_l).replace('$/$', link_r) for line in pony ]
	indent = ''
	if center:
		ponywidth = max([ len(re.sub(r'\x1B\[[0-9;]+m|\$.*\$', '', line)) for line in pony ])
		indent = ' '*int((width-ponywidth)/2)
	wre = re.compile('((\x1B\[[0-9;]+m)*.){0,%s}' % width)
	reset = '[39;49m\n'
	return indent+(reset+indent).join([ wre.search(line).group() for line in pony ])+reset
开发者ID:jaseg,项目名称:ponysay,代码行数:25,代码来源:__init__.py

示例12: replace_placeholders

def replace_placeholders(string, item, match):
    """Replaces placeholders in the string."""
    if isinstance(item, praw.objects.Comment):
        string = string.replace('{{body}}', item.body)
    else:
        string = string.replace('{{body}}', item.selftext)
    string = string.replace('{{domain}}', getattr(item, 'domain', ''))
    string = string.replace('{{permalink}}', get_permalink(item))
    string = string.replace('{{subreddit}}', item.subreddit.display_name)
    if isinstance(item, praw.objects.Comment):
        string = string.replace('{{title}}', item.link_title)
    else:
        string = string.replace('{{title}}', item.title)
    string = string.replace('{{url}}', getattr(item, 'url', ''))
    if item.author:
        string = string.replace('{{user}}', item.author.name)
    else:
        string = string.replace('{{user}}', '[deleted]')

    # replace any {{match_##}} with the corresponding match groups
    string = re.sub(r'\{\{match-(\d+)\}\}', r'\\\1', string)
    if match:
        string = match.expand(string)

    return string
开发者ID:plbogen,项目名称:AutoModerator,代码行数:25,代码来源:automoderator.py

示例13: processText

def processText(text):
	'''
	strips some unwanted characters. Originally stripped the "references" section according to pubGeneric but it wasn't working. Splits full text strings by a simple sentence filter.
	'''
	text = re.sub(r'\x07|\r', '', text)
	#text = re.sub(r'\x07|\r|[(\s{0,3}\d{1,3}\s{0,3})(,\s{0,3}\d{1,3}\s{0,3}){0,7}\]', '', text)
		# strip ^G, \r, and inline citations
	#sections = pubGeneric.sectionRanges(text)
	#if sections is not None:
	#	try:
	#		dropRange = sections['ack']
	#		text = text[:dropRange[0]] + text[dropRange[1]:]
	#	except KeyError:
	#		pass
	#	try:
	#		dropRange = sections['refs']
	#		text = text[:dropRange[0]] + text[dropRange[1]:]
	#	except KeyError:
	#		pass
	
	# split by period followed by capital letter within 3 proceeding characters
	previousThreshold = -2
	threshold = 0
	for threshold in re.finditer('\..?.?([A-Z])', text):
		threshold = threshold.start()
		yield text[previousThreshold+2:threshold+1]
		previousThreshold = threshold
	yield text[threshold:]
开发者ID:bylin,项目名称:text-mining,代码行数:28,代码来源:interactionFinder.py

示例14: html2md

def html2md(s):
    h2t = HTML2Text()
    h2t.body_width = 0
    #h2t.ignore_links = True
    #h2t.ignore_images = True
    s = h2t.handle(s)
    s = re.sub(r'\!?\[\]\((?P<url>.+?)\)', lambda m: " %s " % m.group('url'), s)
    return s
开发者ID:isqua-test,项目名称:point-core,代码行数:8,代码来源:feedproc.py

示例15: replace

 def replace(self, m):
     return "%s%s%s%s%s%s" % (
         m.group("scheme"),
         m.group("pass"),
         m.group("authority"),
         m.group("undef"),
         m.group("query"),
         re.sub(r":", "%3a", m.group("fragment")),
     )
开发者ID:isqua-test,项目名称:point-core,代码行数:9,代码来源:md.py


注:本文中的re2.sub函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。