本文整理汇总了Python中re.split函数的典型用法代码示例。如果您正苦于以下问题:Python split函数的具体用法?Python split怎么用?Python split使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了split函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse_dmapdumpstring
def parse_dmapdumpstring(dumpstring):
scandata = {}
scan = dumpstring.split('scalars:')[-1].split('arrays:')
scalars = scan[0].split('\n')
vectors = re.split(VECTOR_SPLITTER, scan[1])
for scalar in scalars:
if scalar == '':
continue
assignment = scalar.split('\t')[-1].split(' = ')
var = assignment[0].lstrip('"').rstrip('"')
value = eval(assignment[1])
scandata[var] = value
for vector in vectors:
vector = vector.split('=')
if len(vector) <= 1:
continue
var = vector[0].split('"')[1]
vecvalue = []
for v in re.split(ELEM_SPLITTER, vector[1]):
v = v.rstrip(',')
if v == '':
continue
if v == 'inf' or v == 'nan' or v == '-nan':
v = 'float("NaN")'
try:
vecvalue.append(eval(v))
except:
print 'error parsing vector'
scandata[var] = np.array(vecvalue)
return scandata
示例2: __init__
def __init__(self, filename, myopen=open, swapYZ=False):
super(MeshPLY,self).__init__()
with myopen(filename, "r") as f:
assert f.readline().strip() == "ply"
assert f.readline().strip().startswith("format ascii")
elementCounts = []
while True:
line = f.readline().strip()
if line == "end_header":
break
args = re.split("\\s+",line)
if len(args) >= 3 and args[0] == 'element':
elementCounts.append((args[1],int(args[2])))
assert len(elementCounts) >= 2
for element,count in elementCounts:
for i in range(count):
line = f.readline().strip()
if element == 'vertex':
args = re.split("\\s+",line)
if swapYZ:
v = V3(float(args[0]),float(args[2]),-float(args[1]))
else:
v = V3(float(args[0]),float(args[1]),float(args[2]))
self.vertices.append(v)
elif element == 'face':
args = re.split("\\s+",line)
count = int(args.pop(0))
v = tuple(int(args[j]) for j in range(count))
self.faces.append((0,v))
assert self.vertices
assert self.faces
示例3: get_head_words
def get_head_words(s, nwords, ctype):
#print ctype
#print s
# first limit to before any commas, semicolons; and remove stop list phrases
s = re.split(r';,', s)[0]
remove_list = r'(a\splurality\sof\s|at\sleast|composition\sof|the\ssteps\sof|wherein\s*(?:said)?|first|second|third|(?:[a-z]|\d+)?(?:\)|\.))'
s = re.sub(remove_list, '', s)
if ctype == 'device':
# get first ~ <JJ>*<NN>+ chunk
return first_JN_chunk(s, nwords)
elif ctype == 'method':
# first try to split around "method" (for first parent node)
msplit1 = re.split(r'method\s(of|for|to)', s)
if len(msplit1) > 1:
return first_V_chunk(msplit1[2], nwords)
msplit2 = re.split(r'method', s)
if len(msplit2) > 1:
return first_V_chunk(msplit2[0], nwords)
# else, get first VBG + its subject if possible
return first_V_chunk(s, nwords)
示例4: scrape_and_look_for_next_link
def scrape_and_look_for_next_link(url):
html = scraperwiki.scrape(url)
#print html
root = lxml.html.fromstring(html)
soup = BeautifulSoup(html) #using BeautifulSoup to find next page links
scrape_table(root) #before carrying on scrape the hrefs using the scrape_table function
#print soup
items = soup.findAll('a',title="Next page") # findAll "next page" links
if items: # if there is a next page link continue
next_link = root.cssselect("div.srch-Page.srch-Page-bg a")
#print next_link
if next_link:
next_link2 = next_link[2].attrib['href']
#print next_link2
split_link = re.split("\)+",next_link2)
split_link2 = re.split("\=+",split_link[0])
split_link3 = re.split("\'+",split_link2[2])
#print split_link3[0]
#print split_link2
#if split_link ==11:
next_url = nextlink_url+split_link3[0]
if next_url:
print next_url
scrape_and_look_for_next_link(next_url)
开发者ID:carriercomm,项目名称:scraperwiki-scraper-vault,代码行数:26,代码来源:aqp_nhs_contracts_scraper_resting_findall_function.py
示例5: tokenize
def tokenize(lines):
tokens = []
strings = []
functions = {}
new_lines = ''
for i, line in enumerate(lines):
line = re.sub(r'#.*$', "", line)
line = re.sub('\n', ' ', line)
line = re.sub('\t', '', line)
line = re.split('\'', line)
for j, c in enumerate(line):
if j % 2 == 0:
new_lines += c
else:
strings.append(c)
new_lines += 'string ' + str(len(strings) - 1)
new_lines = re.split(';', new_lines)
for i, token in enumerate(new_lines):
if token != '' and token != ' ' and token != '\t':
token = token.strip()
token = re.split(' ', token)
if i % 2 != 0:
functions[token[0]] = token[1:]
else:
tokens += token
tokens = substitute_tokens(tokens)
return [tokens, strings, functions]
示例6: update_index_html
def update_index_html(dest_dir, sectnum):
# Process index.html separately from the modules files
with open(dest_dir + 'index.html', 'r') as index_html_file:
index_html = index_html_file.readlines()
for line_num, line in enumerate(index_html):
#inject css rule to remove haiku's orange bullets
if '</head>' in line:
index_html[line_num] = line.replace('</head>','<style>\nul li {\n\tbackground: none;\n\tlist-style-type: none;\n}\n</style>\n</head>')
elif 'class="section"' in line:
sectnum += 1
elif 'RegisterBook' in line:
#remove registerbook page from TOC
index_html[line_num] = ''
elif 'hide-from-toc' in line:
#remove stub chapter title
if '<h1>' in index_html[line_num-1]:
index_html[line_num-1] = ''
elif 'class="toctree-l' in line and 'Gradebook' not in line and 'TODO List' not in line:
title = re.split('>', re.split('</a>', line, re.IGNORECASE)[0], re.IGNORECASE)[-1]
new_title = '%s.' % sectnum + title
index_html[line_num] = line.replace(title, new_title)
# Write the modified contents back to index.html
with open(dest_dir + 'index.html', 'wb') as index_html_file:
index_html_file.writelines(index_html)
示例7: verify
def verify(self, data, chans, botops):
"""Verify a configuration, and make changes if needed."""
verify = input('Is this configuration correct? [y/n]: ').lower()
if verify == 'y':
return
else:
verify = ''
while verify != 'y':
print('\n')
name = data[0]
nick = self.prompt("Nick", data[1])
realname = self.prompt("Ident", data[2])
ident = self.prompt("Realname", data[3])
chans = self.prompt("Chans", ", ".join(chans))
botop = self.prompt("Bot operator(s)", ", ".join(botops))
password = self.prompt("Server password (optional)", hidden=True)
youtube = self.prompt("YouTube API key (optional)", hidden=True)
chans = re.split(',? ', chans)
botop = re.split(',? ', botop)
self.display((name, nick, realname, ident, password, youtube), chans, botop)
verify = input('Is this configuration correct? [y/n]: ').lower()
self.delete(name)
cursor = self.db_conn.cursor()
cursor.execute('''DELETE FROM channels WHERE config = ?''', (name,))
cursor.execute('''DELETE FROM users WHERE config = ?''', (name,))
self.db_conn.commit()
cursor.close()
self.save_config((name, nick, realname, ident, chans, botop, password, youtube))
示例8: extractValues
def extractValues (self, line):
parts = re.split(':', line)
raw_values = re.split(',', parts[1])
values = []
for rv in raw_values:
values.append(self.cleanValue(rv))
return values
示例9: find_time_interval
def find_time_interval(fits):
"""
find time interval of the fits file
input: fits --- fits file name
output: [tmin, tmax] --- start and stop time in seconds from 1998.1.1
"""
cmd = 'dmstat "' + fits + '[cols time]" centroid=no >' + zspace
scf.run_ascds(cmd)
out = scf.read_file(zspace, remove=1)
chk = 0
for val in out:
mc1 = re.search('min', val)
mc2 = re.search('max', val)
if mc1 is not None:
atemp = re.split('\s+', val)
tmin = int(float(atemp[1]))
chk += 1
elif mc2 is not None:
atemp = re.split('\s+', val)
tmax = int(float(atemp[1]))
chk += 1
if chk > 1:
break
return [tmin, tmax]
示例10: epg_list
def epg_list(self):
try:
now = datetime.datetime.now()
now = '%04d' % now.year + '%02d' % now.month + '%02d' % now.day + '%02d' % now.hour + '%02d' % now.minute + '%02d' % now.second
file = open(addonEPG,'r')
read = file.read()
file.close()
programmes = re.compile('(<programme.+?</programme>)').findall(read)
except:
return
for programme in programmes:
try:
start = re.compile('start="(.+?)"').findall(programme)[0]
start = re.split('\s+', start)[0]
stop = re.compile('stop="(.+?)"').findall(programme)[0]
stop = re.split('\s+', stop)[0]
if not int(start) <= int(now) <= int(stop): raise Exception()
channel = common.parseDOM(programme, "programme", ret="channel")[0]
title = common.parseDOM(programme, "title")[0]
title = common.replaceHTMLCodes(title).encode('utf-8')
desc = common.parseDOM(programme, "desc")[0]
desc = common.replaceHTMLCodes(desc).encode('utf-8')
epg = "[B][%s] - %s[/B]\n%s" % ('ÔÙÑÁ'.decode('iso-8859-7').encode('utf-8'), title, desc)
self.epg.update({channel: epg})
except:
pass
示例11: __init__
def __init__(self, gtf_line):
self.gtf_list = gtf_line
self.seqname, self.source, self.feature, self.start, self.end, self.score, self.strand, self.frame, self.attribute = gtf_line # These indexes are defined by the GTF spec
tmp = map(lambda x: re.split('\s+', x.replace('"', '')),
re.split('\s*;\s*', self.attribute.strip().strip(';')))
self.attribute = dict([x for x in tmp if len(x)==2]) # convert attrs to dict
self.start, self.end = int(self.start) - 1, int(self.end)
示例12: ReadCropAttrs
def ReadCropAttrs(cropFile):
if not os.path.exists(cropFile):
cropFile = TXT_DB_DIR + os.sep + CROP_FILE
f = open(cropFile)
lines = f.readlines()
f.close()
attrDic = {}
fields = [item.replace('"', '')
for item in re.split('\t|\n', lines[0]) if item is not '']
n = len(fields)
for i in xrange(n):
attrDic[fields[i]] = {}
for line in lines[2:]:
items = [item.replace('"', '')
for item in re.split('\t', line) if item is not '']
id = int(items[0])
for i in xrange(n):
dic = attrDic[fields[i]]
try:
dic[id] = float(items[i])
except:
dic[id] = items[i]
return attrDic
示例13: GetRestaurantGrid
def GetRestaurantGrid(d, zip):
br.select_form("Form1")
br.set_all_readonly(False)
dt = 'dgResults$ctl' + str(d) + '$ctl00'
# print dt
br["__EVENTTARGET"] = dt
br["__EVENTARGUMENT"] = ''
request = br.click()
response1 = br1.open(request)
# find the window open hidden in the script
html1 = response1.read()
# print html1
root1 = lxml.html.fromstring(html1)
rest_name = root1.cssselect("span#lblName")[0].text
rest_address = root1.cssselect("span#lblAddress")[0].text
cityStateZip = root1.cssselect("span#lblCityStateZip")[0].text
city = re.split(",", cityStateZip)[0]
rest_inspectionDate = root1.cssselect("span#lblLastInspection")[0].text
if rest_inspectionDate == " ":
date = ""
else:
date = re.split(":", rest_inspectionDate)[1].strip()
violations = parseViolations(html1)
# print violations
scraperwiki.sqlite.save(unique_keys=["dt"], data={"dt": dt + "_" + zip + "_" + str(datetime.date.today()), "name": rest_name, "address": rest_address, "city": city, "state":"NY", "zip": zip, "inspection_date": date, "violations": violations, "time_scraped":datetime.datetime.now(), "page_id" : dt})
示例14: split_string_with_lines
def split_string_with_lines(string, indentation = "", chars_per_line = 100):
# expert splitting mode
matches = re.split(''',(?=(?:[^'"]|'[^']*'|"[^"]*")*$)''', string)
splitted = []
for s in matches:
splitted.append( s + ("," if s != matches[-1] else ""))
res = []
buf = ""
for s in splitted:
if len(s) > chars_per_line:
splitted2 = re.split(''' (?=(?:[^'"]|'[^']*'|"[^"]*")*$)''', s)
for s2 in splitted2:
ext = s2
if s2 == splitted2[-2]:
if len(splitted2[-1]) <= 5:
ext += " " + splitted2[-1]
buf += ext + (" " if s2 != splitted2[-1] and ext == s2 else "")
if len(buf) >= chars_per_line or s2 == splitted2[-1]:
res.append(buf)
buf = ""
if ext != s2:
break
else:
buf += s
if len(buf) >= chars_per_line or s == splitted[-1]:
res.append(buf)
buf = ""
return ("\n%s"%indentation).join( res ), len( res )
示例15: compare_time
def compare_time(start, end):
"""
<Purpose>
Manually compares two times.
Returns True if end time is more recent than start time.
Returns False otherwise.
<Arguments>
start time
end time
<Exceptions>
None
<Returns>
Bool
"""
s = re.split('-|\+|:| ', start)
e = re.split('-|\+|:| ', end)
if s[0] > e[0]:
return False
if s[1] > e[1]:
return False
if s[2] > e[2]:
return False
if s[3] > e[3]:
return False
if s[4] > e[4]:
return False
if s[5] > e[5]:
return False
return True