本文整理汇总了Python中re.sub方法的典型用法代码示例。如果您正苦于以下问题:Python re.sub方法的具体用法?Python re.sub怎么用?Python re.sub使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类re
的用法示例。
在下文中一共展示了re.sub方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_quo9
# 需要导入模块: import re [as 别名]
# 或者: from re import sub [as 别名]
def test_quo9(self):
#right: "<h5 id='id824837' onload='chat(\'id705147\',1,\' width=\\\'2pt\\\'\')'>"
# ^ -- esc() -- ^
#wrong: "<h5 id='id824837' onload='chat(\'id705147\',1,\\\' width=\\\'2pt\'\')'>"
# ^ -- esc() -- ^
w = Grammar("@id 8\n"
"root \"<h5 id='\" id \"' onload='\" esc(func) \"'>\" #rclean\n"
"id 'id' [0-9]{6}\n"
"func \"chat('\" id \"',\" [0-9] \",'\" esc(\" width='2pt'\") \"')\"\n"
, esc=lambda x:re.sub(r"('|\\)", r"\\\1", x))
self.assertRegex(w.generate(), r"^<h5 id='id[0-9]{6}' onload='chat\(\\'id[0-9]{6}"
r"\\',[0-9],\\' width=\\\\\\'2pt\\\\\\'\\'\)'>$")
# same grammar with '@id' in chat() instead of 'id'
w = Grammar("@id 8\n"
"root \"<h5 id='\" id \"' onload='\" esc(func) \"'>\" #rclean\n"
"id 'id' [0-9]{6}\n"
"func \"chat('\" @id \"',\" [0-9] \",'\" esc(\" width='2pt'\") \"')\"\n"
, esc=lambda x:re.sub(r"('|\\)", r"\\\1", x))
self.assertRegex(w.generate(), r"^<h5 id='(id[0-9]{6})' onload='chat\(\\'\1"
r"\\',[0-9],\\' width=\\\\\\'2pt\\\\\\'\\'\)'>$")
示例2: display
# 需要导入模块: import re [as 别名]
# 或者: from re import sub [as 别名]
def display(self):
"""
Output the current progress string.
"""
if self._io.is_quiet():
return
if self._format is None:
self._set_real_format(
self._internal_format or self._determine_best_format()
)
self._overwrite(
re.sub(
r"(?i)%([a-z\-_]+)(?::([^%]+))?%",
self._overwrite_callback,
self._format,
)
)
示例3: test_quo9
# 需要导入模块: import re [as 别名]
# 或者: from re import sub [as 别名]
def test_quo9(self):
#right: "<h5 id='id824837' onload='chat(\'id705147\',1,\' width=\\\'2pt\\\'\')'>"
# ^ -- esc() -- ^
#wrong: "<h5 id='id824837' onload='chat(\'id705147\',1,\\\' width=\\\'2pt\'\')'>"
# ^ -- esc() -- ^
w = Grammar("root \"<h5 id='\" id \"' onload='\" esc(func) \"'>\"\n"
"id 'id' /[0-9]{6}/\n"
"func \"chat('\" id \"',\" /[0-9]/ \",'\" esc(\" width='2pt'\") \"')\"\n"
, esc=lambda x: re.sub(r"('|\\)", r"\\\1", x))
self.assertRegex(w.generate(), r"^<h5 id='id[0-9]{6}' onload='chat\(\\'id[0-9]{6}"
r"\\',[0-9],\\' width=\\\\\\'2pt\\\\\\'\\'\)'>$")
# same grammar with '@id' in chat() instead of 'id'
w = Grammar("root \"<h5 id='\" id \"' onload='\" esc(func) \"'>\"\n"
"id 'id' /[0-9]{6}/\n"
"func \"chat('\" @id \"',\" /[0-9]/ \",'\" esc(\" width='2pt'\") \"')\"\n"
, esc=lambda x: re.sub(r"('|\\)", r"\\\1", x))
self.assertRegex(w.generate(), r"^<h5 id='(id[0-9]{6})' onload='chat\(\\'\1"
r"\\',[0-9],\\' width=\\\\\\'2pt\\\\\\'\\'\)'>$")
示例4: add_comp_one
# 需要导入模块: import re [as 别名]
# 或者: from re import sub [as 别名]
def add_comp_one(compstr):
"""
Adds stoichiometries of 1 to compstr that don't have them
:param compstr: composition as a string
:return: compositon with stoichiometries of 1 added
"""
sample = re.sub(r"([A-Z])", r" \1", compstr).split()
sample = ["".join(g) for _, g in groupby(str(sample), str.isalpha)]
samp_new = ""
for k in range(len(sample)):
spl_samp = re.sub(r"([A-Z])", r" \1", sample[k]).split()
for l in range(len(spl_samp)):
if spl_samp[l][-1].isalpha() and spl_samp[l][-1] != "x":
spl_samp[l] = spl_samp[l] + "1"
samp_new += spl_samp[l]
return samp_new
示例5: add_comp_one
# 需要导入模块: import re [as 别名]
# 或者: from re import sub [as 别名]
def add_comp_one(compstr):
"""
Adds stoichiometries of 1 to compstr that don't have them
:param compstr: composition as a string
:return: compositon with stoichiometries of 1 added
"""
sample = pd.np.array(re.sub(r"([A-Z])", r" \1", compstr).split()).astype(str)
sample = ["".join(g) for _, g in groupby(sample, str.isalpha)]
samp_new = ""
for k in range(len(sample)):
spl_samp = re.sub(r"([A-Z])", r" \1", sample[k]).split()
for l in range(len(spl_samp)):
if spl_samp[l][-1].isalpha() and spl_samp[l][-1] != "x":
spl_samp[l] = spl_samp[l] + "1"
samp_new += spl_samp[l]
return samp_new
示例6: parse_text
# 需要导入模块: import re [as 别名]
# 或者: from re import sub [as 别名]
def parse_text(self,txt):
err=udpipe.ProcessingError()
tokenized=""
current_block=[]
for line in txt.split("\n"):
if re.match(comment_regex, line.lstrip()): # comment line
if current_block:
tokenized+=self.pipeline.process("\n".join(current_block),err)
current_block=[]
tokenized+=re.sub(comment_regex, "# ", line.lstrip()+"\n")
continue
# normal text line, save to current block to be tokenized
current_block.append(line)
if current_block:
tokenized+=self.pipeline.process("\n".join(current_block),err)
return tokenized
示例7: update_sys_cfg_file
# 需要导入模块: import re [as 别名]
# 或者: from re import sub [as 别名]
def update_sys_cfg_file(uninstall_distro_dir_name):
"""
Main function to remove uninstall distro specific operations.
:return:
"""
sys_cfg_file = os.path.join(config.usb_mount, "multibootusb", "syslinux.cfg")
if not os.path.exists(sys_cfg_file):
gen.log("syslinux.cfg file not found for updating changes.")
else:
gen.log("Updating syslinux.cfg file...")
string = open(sys_cfg_file).read()
string = re.sub(r'#start ' + re.escape(uninstall_distro_dir_name)
+ '.*?' + '#end '
+ re.escape(uninstall_distro_dir_name)
+ r'\s*', '', string, flags=re.DOTALL)
config_file = open(sys_cfg_file, "w")
config_file.write(string)
config_file.close()
示例8: update_grub_cfg_file
# 需要导入模块: import re [as 别名]
# 或者: from re import sub [as 别名]
def update_grub_cfg_file(uninstall_distro_dir_name):
"""
Main function to remove uninstall distro name from the grub.cfg file.
:return:
"""
grub_cfg_file = os.path.join(config.usb_mount, "multibootusb",
"grub", "grub.cfg")
if not os.path.exists(grub_cfg_file):
gen.log("grub.cfg file not found for updating changes.")
else:
gen.log("Updating grub.cfg file...")
string = open(grub_cfg_file).read()
string = re.sub(r'#start ' + re.escape(uninstall_distro_dir_name)
+ '.*?' + '#end '
+ re.escape(uninstall_distro_dir_name)
+ r'\s*', '', string, flags=re.DOTALL)
config_file = open(grub_cfg_file, "w")
config_file.write(string)
config_file.close()
示例9: buildPaginationHeader
# 需要导入模块: import re [as 别名]
# 或者: from re import sub [as 别名]
def buildPaginationHeader(resultCount, resultsPerPage, pageArg, url):
"""Build link header for result pagination"""
lastPage = resultCount / resultsPerPage
if pageArg:
page = int(pageArg)
next_url = re.sub("page=[0-9]+", "page={}".format(page + 1), url)
prev_url = re.sub("page=[0-9]+", "page={}".format(page - 1), url)
first_url = re.sub("page=[0-9]+", "page=1", url)
last_url = re.sub("page=[0-9]+", "page={}".format(lastPage), url)
else:
page = 1
next_url = url + "?page=2"
prev_url = ""
first_url = url + "?page=1"
last_url = url + "?page={}".format(lastPage)
if page == 1:
headerLink = "<{}>; rel=next, <{}>; rel=last".format(next_url, last_url)
elif page == lastPage:
headerLink = "<{}>; rel=prev, <{}>; rel=first".format(prev_url, first_url)
else:
headerLink = "<{}>; rel=next, <{}>; rel=prev, <{}>; rel=first, <{}>; rel=last".format(next_url, prev_url, first_url, last_url)
return headerLink
示例10: paginate_query
# 需要导入模块: import re [as 别名]
# 或者: from re import sub [as 别名]
def paginate_query(query, results_per_page, get_args):
"""Modify the given query so that it can be paginated. The paginated query will
split display a maximum of `results_per_page`."""
page = get_args.get('page', 1)
glogger.info("Paginating query for page {}, {} results per page".format(page, results_per_page))
# If contains LIMIT or OFFSET, remove them
glogger.debug("Original query: " + query)
no_limit_query = re.sub("((LIMIT|OFFSET)\s+[0-9]+)*", "", query)
glogger.debug("No limit query: " + no_limit_query)
# Append LIMIT results_per_page OFFSET (page-1)*results_per_page
paginated_query = no_limit_query + " LIMIT {} OFFSET {}".format(results_per_page,
(int(page) - 1) * results_per_page)
glogger.debug("Paginated query: " + paginated_query)
return paginated_query
示例11: removeLines
# 需要导入模块: import re [as 别名]
# 或者: from re import sub [as 别名]
def removeLines(mg):
removeListFilePath = '/net/metagenomics/projects/PPSmg/data/V35/genome_ncbids.txt'
#removeListFilePath = '/net/metagenomics/projects/PPSmg/data/V35/genome_accession_silva.txt'
srcFilePath = str('/net/metagenomics/projects/PPSmg/data/markerGenes/db/' + mg + '_bact+arch_dnaV.tax')
dstFilePath = str('/net/metagenomics/projects/PPSmg/data/V35/genomesRemoved/markerGenes/db/' + mg + '_bact+arch_dnaV.tax')
#srcFilePath = str('/net/metagenomics/projects/PPSmg/data/silva/' + mg + '_silva106_ncbitax.bacteria+archaea.tax' )
#dstFilePath = str('/net/metagenomics/projects/PPSmg/data/V35/genomesRemoved/silva/' + mg + '_silva106_ncbitax.bacteria+archaea.tax' )
pattern = r'.*ncbid:([0-9]+)$'
#pattern = r'^([^\-]+)\-.*$'
removeSet = set(csv.getColumnAsList(removeListFilePath, colNum=0, comment='#'))
col0 = csv.getColumnAsList(srcFilePath, colNum=0, sep='\t', comment='#')
col1 = csv.getColumnAsList(srcFilePath, colNum=1, sep='\t', comment='#')
out = csv.OutFileBuffer(dstFilePath)
removed = 0
for col0,col1 in zip(col0,col1):
if re.sub(pattern, r'\1', col0) not in removeSet:
out.writeText(str(col0 + '\t' + col1 + '\n'))
else:
removed += 1
out.close()
print mg, 'removeLines', removed
示例12: removeSequences
# 需要导入模块: import re [as 别名]
# 或者: from re import sub [as 别名]
def removeSequences(mg):
removeListFilePath = '/net/metagenomics/projects/PPSmg/data/V35/genome_ncbids.txt'
#removeListFilePath = '/net/metagenomics/projects/PPSmg/data/V35/genome_accession_silva.txt'
srcFilePath = str('/net/metagenomics/projects/PPSmg/data/markerGenes/db/' + mg + '_bact+arch_dnaV.noalign.fna')
dstFilePath = str('/net/metagenomics/projects/PPSmg/data/V35/genomesRemoved/markerGenes/db/' + mg + '_bact+arch_dnaV.noalign.fna')
#srcFilePath = str('/net/metagenomics/projects/PPSmg/data/silva/' + mg + '_silva106_ncbitax.bacteria+archaea.fna' )
#dstFilePath = str('/net/metagenomics/projects/PPSmg/data/V35/genomesRemoved/silva/' + mg + '_silva106_ncbitax.bacteria+archaea.fna' )
pattern = r'.*ncbid:([0-9]+)$'
#pattern = r'^([^\-]+)\-.*$'
removeSet = set(csv.getColumnAsList(removeListFilePath, colNum=0, comment='#'))
seqIdToSeq = fas.fastaFileToDict(srcFilePath)
out = csv.OutFileBuffer(dstFilePath)
removed = 0
for seqId in seqIdToSeq:
if re.sub(pattern, r'\1', str(seqId)) not in removeSet:
out.writeText(str('>' + str(seqId) + '\n' + str(seqIdToSeq[seqId]) + '\n'))
else:
removed += 1
out.close()
print mg, 'removeSequences', removed
示例13: parse
# 需要导入模块: import re [as 别名]
# 或者: from re import sub [as 别名]
def parse(self, line):
lineArray = line.split()
if len(lineArray) != 2:
print '_MothurOutFileParser: wrong line', line
return
name = re.sub(r'^([0-9]+_[0-9]+)_[0-9]+_[0-9]+_[pr]+[0-2]$',r'\1', lineArray[0])
tag = re.sub(r'^[0-9]+_[0-9]+_([0-9]+_[0-9]+_[pr]+[0-2])$',r'\1', lineArray[0])
placementList = lineArray[1].replace('unclassified;', '').rsplit(';')
if len(placementList) < 2:
#print '_MothurOutFileParser: skip line', line
return
placement = placementList[-2]
try:
clade = int(re.sub('([0-9]+)\(.*', r'\1' , placement))
except ValueError:
return
weight = float(re.sub('[0-9]+\(([0-9\.]+)\)', r'\1' , placement))
entry = str(str(name) + '\t' + str(clade) + '\t' + str(weight) + '\t' + str(self.source) + '\t' + str(tag))
if self.outBuffer.isEmpty():
self.outBuffer.writeText(entry)
else:
self.outBuffer.writeText(str('\n' + entry))
示例14: __init__
# 需要导入模块: import re [as 别名]
# 或者: from re import sub [as 别名]
def __init__(self, line):
tokens = line.split(',')
self._threshold = float(re.sub(r'^([^\t]+)\t[^\t]+\t.*', r'\1', tokens[0]))
tokens[0] = re.sub(r'^[^\t]+\t[^\t]+\t(.*)', r'\1', tokens[0])
self.groupIdCount = 0
self.seqNameToGroupId = dict([])
self.groupIdToSeqNameSet = dict([])
for token in tokens:
names = token.split('\t')
self.groupIdToSeqNameSet[self.groupIdCount] = set([])
for name in names:
#print name
if re.match(r'^[0-9]+_.*$', name):
seqName = re.sub(r'^([0-9]+_[0-9]+)_.*$',r'\1', name)
self.seqNameToGroupId[seqName] = self.groupIdCount
self.groupIdToSeqNameSet[self.groupIdCount].add(seqName)
self.groupIdCount += 1
示例15: _clean_str
# 需要导入模块: import re [as 别名]
# 或者: from re import sub [as 别名]
def _clean_str(string):
"""
Tokenization/string cleaning for all datasets except for SST.
Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
"""
string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
string = re.sub(r"\'s", " \'s", string)
string = re.sub(r"\'ve", " \'ve", string)
string = re.sub(r"n\'t", " n\'t", string)
string = re.sub(r"\'re", " \'re", string)
string = re.sub(r"\'d", " \'d", string)
string = re.sub(r"\'ll", " \'ll", string)
string = re.sub(r",", " , ", string)
string = re.sub(r"!", " ! ", string)
string = re.sub(r"\(", " \( ", string)
string = re.sub(r"\)", " \) ", string)
string = re.sub(r"\?", " \? ", string)
string = re.sub(r"\s{2,}", " ", string)
return string.strip().lower()