本文整理汇总了Python中re2.sub函数的典型用法代码示例。如果您正苦于以下问题:Python sub函数的具体用法?Python sub怎么用?Python sub使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了sub函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parsers
def parsers(uscfiles, findreplace):
parsedfiles = []
for counter, section in enumerate(uscfiles):
#print ""
#print "File", counter
parsedfile = subfile(section, findreplace)
#print parsedfile
# Replace Multiple Section references with links
# Include [^<] to make sure no group is transformed twice
pattern = r'@@@\s[Ss]ections?\s([^<]*?)@@@@@(.*?)@@'
pattern_replace = [r'%s' % u'(\d+\w*(?:\(\w+\))*[-|–]?\d*)([, @])', r'<a href="/laws/target/%s/\1" class="sec">\1</a>\2']
parsedfile = parsesections(pattern, pattern_replace, parsedfile)
#parsedfile = re.sub(r'@[email protected]', r'ref-Title-'+title, parsedfile)
parsedfile = re.sub(r'@@ref-.*[email protected]', r'', parsedfile)
#parsedfile = re.sub(r'ref-title-this', r'ref-title-'+title, parsedfile)
# Encode Named Acts by removing lowercase and non-word characters, and appending the length of the name w/o non-word characters
#pattern = r'@@ref-namedact-(.*?)@@'
pattern = r'/ref-namedact-(.*?)/'
parsedfile = parsenamedacts(pattern, parsedfile)
parsedfile = re.sub(r'@[email protected]', r'ref-title-this', parsedfile)
parsedfile = re.sub(r'@@ref-.*[email protected]', r'', parsedfile)
# Remove remaining @
parsedfile = parsedfile.replace('@','')#.translate(None, '@')
parsedfiles.append(parsedfile)
return parsedfiles
示例2: handle_techniques
def handle_techniques(line, **opts):
vb_vars = opts["vb_vars"]
enc_func_name = opts["enc_func_name"]
decrypt_func = opts["decrypt_func"]
def var_substitute(m):
var = m.group(1)
line = line.replace('"', '"""')
line = re.sub(r'"""([A-F0-9]{2,})"""', decode_hex, line)
line = re.sub(r'"""([\w_+=/]{2,})"""', decode_base64, line)
line = re.sub(r'(?i)Chr[A-Z$]\(Asc[A-Z$](.+?)\)\)', r"\1", line)
line = re.sub(r'(?i)Asc[A-Z$]\("""(\w)\w*"""\)', lambda m: str(ord(m.group(1))), line)
line = re.sub(r'(?i)((?:Chr[A-Z$]?\(\d+\)\s*&?\s*)+)', decode_chr, line)
line = re.sub(r'(?i)\b%s\s*\(\w+\("""(.+?)"""\),\s*\w+\("""(.+?)"""' % enc_func_name, decrypt_func, line)
line = re.sub(r'(?i)\b%s\((?:""")?(.+?)(?:""")?,\s*(?:""")?(.+?)(?:""")?\)' % enc_func_name, decrypt_func, line)
line = re.sub(r'(?i)StrReverse\(.+?"""(.+?)"""\)', decode_reverse, line)
line = re.sub(r'""".+?"""\s+&+\s+""".+?""".+', concatenate, line)
while "Chr(Asc(" in line:
lastline = line
line = re.sub(r'(?i)Chr\(Asc\((.+?)\)\)', r"\1",line)
if line == lastline:
break
# Remove quotes before regexing against them.
line = line.replace('""" + """','')
line = line.replace('"""','')
# Remove a few concat patterns. Theres a bug with some obfuscation
# techniques.
line = line.replace(" + ", "")
line = line.replace(" & ","")
return line
示例3: clean
def clean(t):
"""
normalize numbers, discard some punctuation that can be ambiguous
"""
t = re.sub('[.,\d]*\d', '<NUM>', t)
t = re.sub('[^a-zA-Z0-9,.;:<>\-\'\/?!$% ]', '', t)
t = t.replace('--', ' ') # sometimes starts a sentence... trouble
return t
示例4: test_bug_449000
def test_bug_449000(self):
# Test for sub() on escaped characters
self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
'abc\ndef\n')
self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
'abc\ndef\n')
self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
'abc\ndef\n')
self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
'abc\ndef\n')
示例5: termcenter
def termcenter():
parser = argparse.ArgumentParser(description='Center stuff on terminals')
parser.add_argument('string', nargs='*', type=str)
args = parser.parse_args()
for e in [sys.stdin] + args.string:
lines = [e] if isinstance(e, str) else e.readlines()
if lines:
width = max(map(len, map(lambda s: re.sub(r'\x1B\[[0-9;]+m|\$.*\$', '', s), lines)))
pad = int((os.get_terminal_size()[0]- width)/2)
for line in lines:
print(' '*pad + re.sub(r'\$.*\$|\n', '', line))
示例6: get_features
def get_features(frag, model):
"""
... w1. (sb?) w2 ...
Features, listed roughly in order of importance:
(1) w1: word that includes a period
(2) w2: the next word, if it exists
(3) w1length: number of alphabetic characters in w1
(4) w2cap: true if w2 is capitalized
(5) both: w1 and w2
(6) w1abbr: log count of w1 in training without a final period
(7) w2lower: log count of w2 in training as lowercased
(8) w1w2upper: w1 and w2 is capitalized
"""
words1 = clean(frag.tokenized).split()
if not words1: w1 = ''
else: w1 = words1[-1]
if frag.next:
words2 = clean(frag.next.tokenized).split()
if not words2: w2 = ''
else: w2 = words2[0]
else:
words2 = []
w2 = ''
c1 = re.sub('(^.+?\-)', '', w1)
c2 = re.sub('(\-.+?)$', '', w2)
feats = {}
feats['w1'] = c1
feats['w2'] = c2
feats['both'] = c1 + '_' + c2
len1 = min(10, len(re.sub('\W', '', c1)))
if c1.replace('.','').isalpha():
feats['w1length'] = str(len1)
try: feats['w1abbr'] = str(int(math.log(1+model.non_abbrs[c1[:-1]])))
except: feats['w1abbr'] = str(int(math.log(1)))
if c2.replace('.','').isalpha():
feats['w2cap'] = str(c2[0].isupper())
try: feats['w2lower'] = str(int(math.log(1+model.lower_words[c2.lower()])))
except: feats['w2lower'] = str(int(math.log(1)))
feats['w1w2upper'] = c1 + '_' + str(c2[0].isupper())
return feats
示例7: on_call
def on_call(self, call, process):
# Legacy, modern Dyre doesn't have hardcoded hashes in
# CryptHashData anymore
iocs = [
"J7dnlDvybciDvu8d46D\\x00",
"qwererthwebfsdvjaf+\\x00",
]
pipe = [
"\\??\\pipe\\3obdw5e5w4",
"\\??\\pipe\\g2fabg5713",
]
if call["api"] == "CryptHashData":
buf = self.get_argument(call, "Buffer")
if buf in iocs:
self.cryptoapis = True
tmp = re.sub(r"\\x[0-9A-Fa-f]{2}", "", buf)
if self.compname in tmp:
if re.match("^" + self.compname + "[0-9 ]+$", tmp):
self.cryptoapis = True
elif call["api"] == "HttpOpenRequestA":
buf = self.get_argument(call, "Path")
if len(buf) > 10:
self.networkapis.add(buf)
elif call["api"] == "NtCreateNamedPipeFile":
buf = self.get_argument(call, "PipeName")
for npipe in pipe:
if buf == npipe:
self.syncapis = True
break
return None
示例8: process_entry
def process_entry(self, entry):
out = {'type': 'feed'}
# UGnich - mooduck
if entry['published'].endswith('UT'):
entry['published'] = '%sC' % entry['published']
tz = timezone(settings.timezone)
try:
out['created'] = \
dateutil.parser.parse(entry['published']).astimezone(tz)
except ValueError:
entry['created'] = \
dateutil.parser.parse(entry['published'])
out['link'] = entry['link']
out['title'] = re.sub(r'&#(?P<c>\d+);',
lambda c: unichr(int(c.group('c'))),
unescape(entry['title'])) \
if 'title' in entry else ''
out['text'] = self.process_text(entry['summary'])
out['tags'] = [ t['label'] or t['term'] for t in entry['tags'] ] \
if 'tags' in entry else []
return out
示例9: _files
def _files(files):
if not files:
files = []
hash = md5(str(datetime.now())).hexdigest()
dest = '%s/%s/%s/%s' % (env.user.login[0], env.user.login,
hash[:2], hash[2:4])
files_del = env.request.args('del-attach', [])
if not isinstance(files_del, (list, tuple)):
files_del = [files_del]
for f in files_del:
if f not in files:
continue
remove_attach(f)
files.remove(f)
files_in = env.request.args('attach', [])
files_p = env.request.files('attach')
if not isinstance(files_in, (list, tuple)):
files_in = [files_in]
files_p = [files_p]
for i, file in enumerate(files_in[:10]):
if isinstance(file, str):
file = file.decode('utf-8')
file = re.sub(r'[^\w\.]+', '-', unidecode(file))
d = "%s/%s/" % (dest, randint(1000, 9999))
make_attach(files_p[i], d, file, remove=True)
files.append(os.path.join(d, file))
return files
示例10: __reWildcard
def __reWildcard(regexp, string):
"""Wildcard-based regular expression system"""
regexp = re.sub("\*+", "*", regexp)
match = True
if regexp.count("*") == 0:
if regexp == string:
return True
else:
return False
blocks = regexp.split("*")
start = ""
end = ""
if not regexp.startswith("*"):
start = blocks[0]
if not regexp.endswith("*"):
end = blocks[-1]
if start != "":
if string.startswith(start):
blocks = blocks[1:]
else:
return False
if end != "":
if string.endswith(end):
blocks = blocks[:-1]
else:
return False
blocks = [block for block in blocks if block != ""]
if not blocks:
return match
for block in blocks:
i = string.find(block)
if i == -1:
return False
string = string[i + len(block):]
return match
示例11: render_pony
def render_pony(name, text, balloonstyle, width=80, center=False, centertext=False):
pony = load_pony(name)
balloon = link_l = link_r = ''
if text:
[link_l, link_r] = balloonstyle[-2:]
for i,line in enumerate(pony):
match = re.search('\$balloon([0-9]*)\$', line)
if match:
minwidth = int(match.group(1) or '0')
pony[i:i+1] = render_balloon(text, balloonstyle, minwidth=minwidth, maxwidth=int(width/2), pad=str.center if centertext else str.ljust)
break
try:
first = pony.index('$$$')
second = pony[first+1:].index('$$$')
pony[first:] = pony[first+1+second+1:]
except:
pass
pony = [ line.replace('$\\$', link_l).replace('$/$', link_r) for line in pony ]
indent = ''
if center:
ponywidth = max([ len(re.sub(r'\x1B\[[0-9;]+m|\$.*\$', '', line)) for line in pony ])
indent = ' '*int((width-ponywidth)/2)
wre = re.compile('((\x1B\[[0-9;]+m)*.){0,%s}' % width)
reset = '[39;49m\n'
return indent+(reset+indent).join([ wre.search(line).group() for line in pony ])+reset
示例12: replace_placeholders
def replace_placeholders(string, item, match):
"""Replaces placeholders in the string."""
if isinstance(item, praw.objects.Comment):
string = string.replace('{{body}}', item.body)
else:
string = string.replace('{{body}}', item.selftext)
string = string.replace('{{domain}}', getattr(item, 'domain', ''))
string = string.replace('{{permalink}}', get_permalink(item))
string = string.replace('{{subreddit}}', item.subreddit.display_name)
if isinstance(item, praw.objects.Comment):
string = string.replace('{{title}}', item.link_title)
else:
string = string.replace('{{title}}', item.title)
string = string.replace('{{url}}', getattr(item, 'url', ''))
if item.author:
string = string.replace('{{user}}', item.author.name)
else:
string = string.replace('{{user}}', '[deleted]')
# replace any {{match_##}} with the corresponding match groups
string = re.sub(r'\{\{match-(\d+)\}\}', r'\\\1', string)
if match:
string = match.expand(string)
return string
示例13: processText
def processText(text):
'''
strips some unwanted characters. Originally stripped the "references" section according to pubGeneric but it wasn't working. Splits full text strings by a simple sentence filter.
'''
text = re.sub(r'\x07|\r', '', text)
#text = re.sub(r'\x07|\r|[(\s{0,3}\d{1,3}\s{0,3})(,\s{0,3}\d{1,3}\s{0,3}){0,7}\]', '', text)
# strip ^G, \r, and inline citations
#sections = pubGeneric.sectionRanges(text)
#if sections is not None:
# try:
# dropRange = sections['ack']
# text = text[:dropRange[0]] + text[dropRange[1]:]
# except KeyError:
# pass
# try:
# dropRange = sections['refs']
# text = text[:dropRange[0]] + text[dropRange[1]:]
# except KeyError:
# pass
# split by period followed by capital letter within 3 proceeding characters
previousThreshold = -2
threshold = 0
for threshold in re.finditer('\..?.?([A-Z])', text):
threshold = threshold.start()
yield text[previousThreshold+2:threshold+1]
previousThreshold = threshold
yield text[threshold:]
示例14: html2md
def html2md(s):
h2t = HTML2Text()
h2t.body_width = 0
#h2t.ignore_links = True
#h2t.ignore_images = True
s = h2t.handle(s)
s = re.sub(r'\!?\[\]\((?P<url>.+?)\)', lambda m: " %s " % m.group('url'), s)
return s
示例15: replace
def replace(self, m):
return "%s%s%s%s%s%s" % (
m.group("scheme"),
m.group("pass"),
m.group("authority"),
m.group("undef"),
m.group("query"),
re.sub(r":", "%3a", m.group("fragment")),
)