本文整理汇总了Python中mechanize.Browser.follow_link方法的典型用法代码示例。如果您正苦于以下问题:Python Browser.follow_link方法的具体用法?Python Browser.follow_link怎么用?Python Browser.follow_link使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mechanize.Browser
的用法示例。
在下文中一共展示了Browser.follow_link方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: backup_database
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import follow_link [as 别名]
def backup_database(url, username, password, database, destination):
if not url.endswith("/"):
url+="/"
br = Browser()
br.open(url)
br.select_form(name="login_form")
br["pma_username"] = username
br["pma_password"] = password
login_response = br.submit()
resp = br.follow_link(url_regex=re.compile("^main\.php.*"))
resp = br.follow_link(url_regex=re.compile("\./server_export\.php.*"))
br.select_form(name="dump")
# Select SQL export
br.find_control(name="what").get(id="radio_plugin_sql").selected = True
# Select database to export
br.find_control(name="db_select[]").get(name=database).selected = True
# Add SQL DROP statements to export
br.find_control(name="sql_drop").get(id="checkbox_sql_drop").selected = True
# Send as file
br.find_control(name="asfile").get("sendit").selected = True
# Compress file with bzip
br.find_control(name="compression").get(id="radio_compression_bzip").selected = True
ret = br.submit()
open(destination, 'w').write(ret.read())
示例2: Guarani
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import follow_link [as 别名]
class Guarani(object):
def __init__(self, user, passwd):
self.br = Browser()
self.user = user
self.passwd = passwd
self._login()
def _login(self):
self.br.open("https://guarani.exa.unicen.edu.ar/Guarani3w/")
self.br.open("https://guarani.exa.unicen.edu.ar/Guarani3w/includes/barra.inc.php")
self.br.follow_link(text_regex="Iniciar")
self.br.select_form(nr=0)
self.br["fUsuario"] = self.user
self.br["fClave"] = self.passwd
self.br.submit()
def _parseNotas(self, html):
soup = BeautifulSoup(html)
s_notas = soup.findAll('tr', {'class' : 'normal'})
notas = []
for s_nota in s_notas:
materia, nota = [x.text for x in s_nota.findAll('td')[:2]]
if nota != '':
notas.append([materia, nota])
return notas
def getNotasFinales(self):
self.br.open("https://guarani.exa.unicen.edu.ar/Guarani3w/operaciones.php")
self.br.follow_link(url_regex="consultarActProvisoria")
return self._parseNotas(self.br.response().read())
示例3: login
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import follow_link [as 别名]
def login(url):
# Use mechanize to get the set name URLs to scrape
br = Browser()
br.addheaders = [('User-Agent', ua)]
br.open(url)
# Select the form
for form in br.forms():
if form.attrs['id'] == 'loginFrm':
br.form = form
break
br["email"] = EMAIL # replace with email
br["password"] = PASSWORD # replace with password
# Submit the form
br.submit()
for form in br.forms():
if form.attrs['id'] == 'pop_report_form':
br.form = form
break
br['sport_id'] = ['185223']
br['set_name'] = "T206"
br.submit(name="search")
# Follow link to the correct set
br.follow_link(url="http://www.beckett.com/grading/set_match/3518008")
return br.response().read()
示例4: BachBrowser
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import follow_link [as 别名]
class BachBrowser(object):
def __init__(self):
self._b = Browser()
def login(self, user, pin):
# submit login form
login_data = urlencode({'yzbks': user, 'jklwd': pin})
self._b.open(servlet_url('SSOLogin'), login_data)
# open the content frame
self._b.follow_link(name='content')
# ack the "please don't forget to logout" form (if present)
try:
self._b.select_form('Main')
self._b.submit()
except FormNotFoundError:
pass
# store current session id
self._b.select_form('Main')
self._session_id = self._b['sessionid']
# store the url of the "overview" page
self._root_url = self._b.geturl()
def logout(self):
try:
self._b.open(servlet_url('Logout'))
except HTTPError, e:
if e.code != 503:
raise e
示例5: begin_scraper
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import follow_link [as 别名]
def begin_scraper():
br = Browser()
br.addheaders = [('User-agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_8; rv:16:0) Gecko/20100101 Firefox/16.0')]
br.set_handle_robots(False)
br.open("https://wwws.mint.com/login.event")
assert br.viewing_html()
formcount=0
for f in br.forms():
if str(f.attrs["id"]) == "form-login":
break
formcount = formcount+1
br.select_form(nr=formcount)
br["username"] = "[email protected]" #Put your username here
br["password"] = getpass()
#import pdb; pdb.set_trace()
# Submit the user credentials to login to mint
response = br.submit()
response = br.follow_link(text="Transactions")
links_to_transactions = br.links(text_regex="Export all \d+ transactions")
link = ""
for f in links_to_transactions:
link = f
response2 = br.follow_link(link)
text_file = open("transactions.csv", "w")
text_file.write(response2.read())
text_file.close()
示例6: setup
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import follow_link [as 别名]
def setup():
print "Setting up browser!"
br = Browser()
br.open("***URL to fog bugz***")
br.follow_link(url_regex=re.compile("pgLogon"))
br.select_form(name="formWithPerson")
br["sPerson"] = "**********" # username of account to use
br["sPassword"] = "*********" # password of account to use
br.submit()
print "Logged in"
return br
示例7: main
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import follow_link [as 别名]
def main():
br = Browser()
br.open("http://www.ec.gc.ca/contracts-contrats/default.asp?lang=En&n=168B9233-11")
# follow link with element text matching regular expression
response1 = br.follow_link(text_regex=r"Reports")
assert br.viewing_html()
response2 = br.follow_link(text_regex=r"Quarter")
assert br.viewing_html()
html = response2.read();
response2.close()
parse(html)
示例8: ParseMagazine
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import follow_link [as 别名]
class ParseMagazine(Downloader):
''' Class for parsing traner-on-line.ru '''
def __init__(self, url):
Downloader.__init__(self, url)
self.links = ()
self.br = Browser()
self.br.open(self.url)
self.br.select_form(nr=0)
self.br['username'] = user
self.br['passwd'] = passwd
self.br.submit()
self.parse_home()
def parse_home(self):
self.download(self.url)
soup = BeautifulSoup(self.content)
table = soup.find('td', {'class': 'tablenews'})
self.links = set(link['href'] for link in table.findAll('a') if link['href'].startswith('index.php'))
def parse_issue(self, url):
self.br.open(self.url)
link = self.br.find_link(url=url)
response = self.br.follow_link(link=link)
data = response.read()
soup = BeautifulSoup(data)
issue = soup.find('table', {'class': 'blog'})
return issue.renderContents()
def parse(self):
with open('index.html', 'w') as f:
f.write('''<?xml version="1.0" encoding="windows-1251"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html><body>''')
for link in self.links:
page = self.parse_issue(link)
soup = BeautifulSoup(page)
links = set(link['href'] for link in soup.findAll('a') if link['href'].startswith('http://trener-on-line.ru/index.php'))
for l in links:
print l
self.br.open(link)
sublink = self.br.find_link(url=l)
response = self.br.follow_link(link=sublink)
data = response.read()
soup = BeautifulSoup(data)
issue = soup.find('td', {'class': 'main'}) # TD!!!
f.write(issue.renderContents())
f.write('</body></html>')
示例9: searchTitle
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import follow_link [as 别名]
def searchTitle(rawtitle):
br = Browser()
# Ignore robots.txt
br.set_handle_robots( False )
# Google demands a user-agent that isn't a robot
br.addheaders = [('User-agent', 'Firefox')]
br.open( "http://www.google.com " )
br.select_form( 'f' )
s='imdb'+' + '+' '.join(re.compile('[\.]').split(rawtitle))
br.form[ 'q' ] = s
br.submit()
resp = None
for link in br.links():
siteMatch = re.compile( 'www.imdb.com/title/tt[0-9]*/$' ).search( link.url )
if siteMatch:
resp = br.follow_link( link )
print link.url
break
soup = BeautifulSoup(resp.get_data())
title = re.sub(' - IMDb','',soup.find('title').string)
title = re.sub('\([0-9]*\)','',title)
return title
示例10: getRatings
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import follow_link [as 别名]
def getRatings(url):
# url = 'http://us.imdb.com/M/title-exact?Moighty%20Aphrodite%20(1995)'
try:
br = Browser()
br.set_handle_robots(False)
br.open(url)
if re.search(r'/title/tt.*', br.geturl()):
soup = BeautifulSoup(MyOpener().open(url).read())
else:
link = br.find_link(url_regex = re.compile(r'/title/tt.*'))
res = br.follow_link(link)
soup = BeautifulSoup(res.read())
# movie_title = soup.find('title').contents[0]
des = (soup.find('meta',{'name':'description'})['content']).encode('utf-8')
rate = soup.find('span',itemprop='ratingValue')
# print movie_title
# print des
except:
print 'Error no rating'
rating = str(0)
des = ""
else:
if rate:
rating = str(rate.contents[0])
else:
rating = str(0)
print 'No rate'
return rating, des
示例11: name
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import follow_link [as 别名]
def name(request, string):
movie = string.replace("_", "+")
br = Browser()
br.open("http://www.imdb.com/find?s=tt&q="+movie)
link = br.find_link(url_regex=re.compile(r"/title/tt.*"))
data = br.follow_link(link)
soup = BeautifulSoup(data.read())
title = soup.find('h1').contents[0].strip()
name = title.replace(" ", "")
rating = soup.find('span', itemprop='ratingValue').contents[0]
duration = soup.find('time', itemprop='duration').contents[0].strip()
releaseDate = soup.find('a', title='See more release dates').contents[0]
director = soup.find('span', itemprop='director').getText()
actor_all = []
actors = soup.findAll('span', itemprop='actors')
for i in range(len(actors)):
actor_all.append((actors[i].contents[1]).getText())
genres_all = []
genres = soup.findAll('span', itemprop='genre')
for i in range(len(genres)):
genres_all.append(genres[i].getText())
jsonObject = {}
jsonObject['Name:'] = name
jsonObject['IMDB Rating:'] = rating
jsonObject['Duration'] = duration
jsonObject["Actors: "] = actor_all
jsonObject['Director:'] = director
jsonObject['Genres'] = genres_all
jsonObject['Release Date'] = releaseDate
movie_details = json.dumps(jsonObject)
return HttpResponse(movie_details)
示例12: fetch
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import follow_link [as 别名]
def fetch():
br = Browser() # Create a browser
map = {};
# br.open(login_url) # Open the login page
# br.select_form(id="signform") # Find the login form
# br['username'] = username # Set the form values
# br['password'] = password
# resp = br.submit() # Submit the form
br.open('http://www.verycd.com/sto/music/china/')
nice_links = [l for l in br.links()
if 'topics' in l.url]
if not nice_links:
return None
for link in nice_links:
if link.url in map.keys():
continue
try:
response = br.follow_link(link)
map[link.url] = br.title()
except Exception, e:
print >> sys.stderr, e
示例13: downloadBuild
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import follow_link [as 别名]
def downloadBuild(build_file, target_directory):
"""Download a build file from the SESI website and place it in the target
directory.
"""
print "Attempting to download build: {}".format(build_file)
user, password = _getSESIAuthInfo()
browser = Browser()
browser.set_handle_robots(False)
browser.open("https://www.sidefx.com/login/?next=/download/daily-builds/")
browser.select_form(nr=0)
browser.form['username'] = user
browser.form['password'] = password
browser.submit()
browser.open('http://www.sidefx.com/download/daily-builds/')
resp = browser.follow_link(text=build_file, nr=0)
url = resp.geturl()
url += 'get/'
resp = browser.open(url)
target_path = os.path.join(target_directory, build_file)
print "Downloading to {}".format(target_path)
with open(target_path, 'wb') as handle:
handle.write(resp.read())
print "Download complete"
return target_path
示例14: _process
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import follow_link [as 别名]
def _process(self):
"""Start the work."""
movie = '+'.join(self.title.split())
br = Browser()
url = "%s/find?s=tt&q=%s" % (self.BASE_URL, movie)
br.open(url)
if re.search(r'/title/tt.*', br.geturl()):
self.url = "%s://%s%s" % urlparse.urlparse(br.geturl())[:3]
soup = BeautifulSoup( MyOpener().open(url).read() )
else:
link = br.find_link(url_regex = re.compile(r'/title/tt.*'))
res = br.follow_link(link)
self.url = urlparse.urljoin(self.BASE_URL, link.url)
soup = BeautifulSoup(res.read())
try:
self.title = soup.find('h1').contents[0].strip()
for span in soup.findAll('span'):
if span.has_key('itemprop') and span['itemprop'] == 'ratingValue':
self.rating = span.contents[0]
break
self.found = True
except:
pass
示例15: test_reload_read_incomplete
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import follow_link [as 别名]
def test_reload_read_incomplete(self):
from mechanize import Browser
browser = Browser()
r1 = browser.open(urljoin(self.uri, "bits/mechanize_reload_test.html"))
# if we don't do anything and go straight to another page, most of the
# last page's response won't be .read()...
r2 = browser.open(urljoin(self.uri, "mechanize"))
self.assert_(len(r1.get_data()) < 4097) # we only .read() a little bit
# ...so if we then go back, .follow_link() for a link near the end (a
# few kb in, past the point that always gets read in HTML files because
# of HEAD parsing) will only work if it causes a .reload()...
r3 = browser.back()
browser.follow_link(text="near the end")
# ... good, no LinkNotFoundError, so we did reload.
# we have .read() the whole file
self.assertEqual(len(r3._seek_wrapper__cache.getvalue()), 4202)