本文整理汇总了Python中mechanize.Browser.find_link方法的典型用法代码示例。如果您正苦于以下问题:Python Browser.find_link方法的具体用法?Python Browser.find_link怎么用?Python Browser.find_link使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类mechanize.Browser
的用法示例。
在下文中一共展示了Browser.find_link方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ParseMagazine
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
class ParseMagazine(Downloader):
''' Class for parsing traner-on-line.ru '''
def __init__(self, url):
Downloader.__init__(self, url)
self.links = ()
self.br = Browser()
self.br.open(self.url)
self.br.select_form(nr=0)
self.br['username'] = user
self.br['passwd'] = passwd
self.br.submit()
self.parse_home()
def parse_home(self):
self.download(self.url)
soup = BeautifulSoup(self.content)
table = soup.find('td', {'class': 'tablenews'})
self.links = set(link['href'] for link in table.findAll('a') if link['href'].startswith('index.php'))
def parse_issue(self, url):
self.br.open(self.url)
link = self.br.find_link(url=url)
response = self.br.follow_link(link=link)
data = response.read()
soup = BeautifulSoup(data)
issue = soup.find('table', {'class': 'blog'})
return issue.renderContents()
def parse(self):
with open('index.html', 'w') as f:
f.write('''<?xml version="1.0" encoding="windows-1251"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html><body>''')
for link in self.links:
page = self.parse_issue(link)
soup = BeautifulSoup(page)
links = set(link['href'] for link in soup.findAll('a') if link['href'].startswith('http://trener-on-line.ru/index.php'))
for l in links:
print l
self.br.open(link)
sublink = self.br.find_link(url=l)
response = self.br.follow_link(link=sublink)
data = response.read()
soup = BeautifulSoup(data)
issue = soup.find('td', {'class': 'main'}) # TD!!!
f.write(issue.renderContents())
f.write('</body></html>')
示例2: getRatings
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
def getRatings(url):
# url = 'http://us.imdb.com/M/title-exact?Moighty%20Aphrodite%20(1995)'
try:
br = Browser()
br.set_handle_robots(False)
br.open(url)
if re.search(r'/title/tt.*', br.geturl()):
soup = BeautifulSoup(MyOpener().open(url).read())
else:
link = br.find_link(url_regex = re.compile(r'/title/tt.*'))
res = br.follow_link(link)
soup = BeautifulSoup(res.read())
# movie_title = soup.find('title').contents[0]
des = (soup.find('meta',{'name':'description'})['content']).encode('utf-8')
rate = soup.find('span',itemprop='ratingValue')
# print movie_title
# print des
except:
print 'Error no rating'
rating = str(0)
des = ""
else:
if rate:
rating = str(rate.contents[0])
else:
rating = str(0)
print 'No rate'
return rating, des
示例3: _process
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
def _process(self):
"""Start the work."""
movie = "+".join(self.title.split())
br = Browser()
url = "%s/find?s=tt&q=%s" % (self.BASE_URL, movie)
br.open(url)
if re.search(r"/title/tt.*", br.geturl()):
self.url = "%s://%s%s" % urlparse.urlparse(br.geturl())[:3]
soup = BeautifulSoup(MyOpener().open(url).read(), "html.parser")
else:
link = br.find_link(url_regex=re.compile(r"/title/tt.*"))
res = br.follow_link(link)
self.url = urlparse.urljoin(self.BASE_URL, link.url)
soup = BeautifulSoup(res.read(), "html.parser")
try:
self.title = soup.find("h1").contents[0].strip()
for span in soup.findAll("span"):
if span.has_attr("itemprop") and span["itemprop"] == "ratingValue":
self.rating = span.contents[0]
break
self.found = True
except:
pass
self.genre = []
infobar = soup.find("div", {"class": "infobar"})
r = infobar.find("", {"title": True})["title"]
self.genrelist = infobar.findAll("a", {"href": True})
for i in range(len(self.genrelist) - 1):
self.genrelist[i] = self.genrelist[i].encode("ascii")
self.genre.append(self.genrelist[i][16 : self.genrelist[i].index("?")])
self.mainGenre = self.genre[0]
示例4: name
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
def name(request, string):
movie = string.replace("_", "+")
br = Browser()
br.open("http://www.imdb.com/find?s=tt&q="+movie)
link = br.find_link(url_regex=re.compile(r"/title/tt.*"))
data = br.follow_link(link)
soup = BeautifulSoup(data.read())
title = soup.find('h1').contents[0].strip()
name = title.replace(" ", "")
rating = soup.find('span', itemprop='ratingValue').contents[0]
duration = soup.find('time', itemprop='duration').contents[0].strip()
releaseDate = soup.find('a', title='See more release dates').contents[0]
director = soup.find('span', itemprop='director').getText()
actor_all = []
actors = soup.findAll('span', itemprop='actors')
for i in range(len(actors)):
actor_all.append((actors[i].contents[1]).getText())
genres_all = []
genres = soup.findAll('span', itemprop='genre')
for i in range(len(genres)):
genres_all.append(genres[i].getText())
jsonObject = {}
jsonObject['Name:'] = name
jsonObject['IMDB Rating:'] = rating
jsonObject['Duration'] = duration
jsonObject["Actors: "] = actor_all
jsonObject['Director:'] = director
jsonObject['Genres'] = genres_all
jsonObject['Release Date'] = releaseDate
movie_details = json.dumps(jsonObject)
return HttpResponse(movie_details)
示例5: _process
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
def _process(self):
"""Start the work."""
movie = '+'.join(self.title.split())
br = Browser()
url = "%s/find?s=tt&q=%s" % (self.BASE_URL, movie)
br.open(url)
if re.search(r'/title/tt.*', br.geturl()):
self.url = "%s://%s%s" % urlparse.urlparse(br.geturl())[:3]
soup = BeautifulSoup( MyOpener().open(url).read() )
else:
link = br.find_link(url_regex = re.compile(r'/title/tt.*'))
res = br.follow_link(link)
self.url = urlparse.urljoin(self.BASE_URL, link.url)
soup = BeautifulSoup(res.read())
try:
self.title = soup.find('h1').contents[0].strip()
for span in soup.findAll('span'):
if span.has_key('itemprop') and span['itemprop'] == 'ratingValue':
self.rating = span.contents[0]
break
self.found = True
except:
pass
示例6: searchMovie
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
def searchMovie(movie):
movie_search = "+".join(movie.split())
base_url = "http://www.imdb.com/find?q="
url = base_url + movie_search + "&s=all"
title_search = re.compile("/title/ttd+")
br = Browser()
br.open(url)
link = br.find_link(url_regex=re.compile(r"/title/tt.*"))
res = br.follow_link(link)
soup = BeautifulSoup(res.read())
info = {}
movie_title = getunicode(soup.find("title"))
movie_title = movie_title.split(" - IMDb")[0]
print movie_title
info["title"] = movie_title
try:
rate = soup.find("span", itemprop="ratingValue")
rating = getunicode(rate)
info["rating"] = rating
except:
info["rating"] = "Not available"
try:
img = soup.find("img", {"itemprop": "image"})["src"]
except:
return 1
image = getunicode(img)
info["image"] = image
try:
des = soup.find("meta", {"name": "description"})["content"]
except:
return 1
descp = getunicode(des)
info["description"] = descp
genre = []
infobar = soup.find("div", {"class": "infobar"})
try:
r = infobar.find("", {"title": True})["title"]
except:
return 1
genrelist = infobar.findAll("a", {"href": True})
for i in range(len(genrelist) - 1):
genre.append(getunicode(genrelist[i]))
gnre = ""
for gnr in genre:
gnre = gnre + str(gnr) + ","
gnre = gnre[:-1]
info["genre"] = gnre
release_date = getunicode(genrelist[-1])
info["date"] = release_date
return info
示例7: get_soup
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
def get_soup(movie):
movie_search = '+'.join(movie.split())
url = "http://www.imdb.com/find?q=" + movie_search + "&s=all"
br = Browser()
try:
br.open(url)
link = br.find_link(url_regex=re.compile(r'/title/tt.*'))
res = br.follow_link(link)
except:
return "error"
else:
soup = BeautifulSoup(res.read())
return str(soup)
示例8: main
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
def main():
movie = str(raw_input('Movie Name: '))
movie_search = '+'.join(movie.split())
base_url = 'http://www.imdb.com/find?q='
url = base_url+movie_search+'&s=all'
title_search = re.compile('/title/tt\d+')
br = Browser()
br.open(url)
link = br.find_link(url_regex = re.compile(r'/title/tt.*'))
res = br.follow_link(link)
soup = BeautifulSoup(res.read())
movie_title = getunicode(soup.find('title'))
rate = soup.find('span',itemprop='ratingValue')
rating = getunicode(rate)
actors=[]
actors_soup = soup.findAll('a',itemprop='actors')
for i in range(len(actors_soup)):
actors.append(getunicode(actors_soup[i]))
des = soup.find('meta',{'name':'description'})['content']
genre=[]
infobar = soup.find('div',{'class':'infobar'})
r = infobar.find('',{'title':True})['title']
genrelist = infobar.findAll('a',{'href':True})
for i in range(len(genrelist)-1):
genre.append(getunicode(genrelist[i]))
release_date = getunicode(genrelist[-1])
print movie_title,rating+'/10.0'
print 'Relase Date:',release_date
print 'Rated',r
print ''
print 'Genre:',
print ', '.join(genre)
print '\nActors:',
print ', '.join(actors)
print '\nDescription:'
print des
示例9: getsoup
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
def getsoup(URL, proxy = None):
br = Browser()
if proxy is not None:
br.set_proxies(proxy)
br.open(URL)
try:
title_URL = br.find_link(url_regex = re.compile(r'/title/tt.*'))
except LinkNotFoundError:
return None
try:
res = br.follow_link(title_URL)
except URLError:
return None
soup = BeautifulSoup(res.read())
return soup
示例10: get_rating
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
def get_rating(self ):
try:
print "Checking IMDb rating of "+ self.movie_name
movie_search = '+'.join(self.movie_name.split())
movie_url = base_url + movie_search + '&s=all'
print(movie_url)
br = Browser()
br.open(movie_url)
link = br.find_link(url_regex=re.compile(r'/title/tt.*'))
res = br.follow_link(link)
soup = BeautifulSoup(res.read(), "lxml")
movie_title = soup.find('title').contents[0]
rate = soup.find('span', itemprop='ratingValue')
if rate is not None:
self.movie_rating=rate
except:
self.movie_rating='-'
示例11: get_ratings
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
def get_ratings(movies_of_my_genre):
for movie in movies_of_my_genre:
try:
print "Checking IMDb rating of : " + movie.movie_name.replace('\t','')
movie_search = '+'.join(movie.movie_name.split())
movie_url = base_url + movie_search + '&s=all'
br = Browser()
br.open(movie_url)
link = br.find_link(url_regex=re.compile(r'/title/tt.*'))
res = br.follow_link(link)
soup = BeautifulSoup(res.read(), "lxml")
movie_title = soup.find('title').contents[0]
rate = soup.find('span', itemprop='ratingValue')
if rate is not None:
movie.movie_rating=float(rate.contents[0])
else:
movie.movie_rating=0
except:
movie.movie_rating = 0
示例12: _process
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
def _process(self):
movie = '+'.join(self.title.split())
br = Browser()
url = "%s/find?s=tt&q=%s" % (self.BASE_URL, movie)
br.open(url)
if re.search(r'/title/tt.*', br.geturl()):
self.url = "%s://%s%s" % urlparse.urlparse(br.geturl())[:3]
soup = BeautifulSoup( MyOpener().open(url).read() )
else:
link = br.find_link(url_regex = re.compile(r'/title/tt.*'))
res = br.follow_link(link)
self.url = urlparse.urljoin(self.BASE_URL, link.url)
soup = BeautifulSoup(res.read())
try:
self.title = soup.find('h1').contents[0].strip()
self.rating = soup.find('span',attrs='rating-rating').contents[0]
self.found = True
except:
pass
示例13: getRating
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
def getRating(self):
self.found=False
self.BASE_URL ='http://www.imdb.com'
self.title = self.arguments.Title
self.name= self.title
self.movie = '_'.join(self.title.split())
br = Browser()
url = "%s/find?s=tt&q=%s" % (self.BASE_URL, self.movie)
try:
br.open(url)
except:
self.msg="internet connection error or movie not found"
return
if re.search(r'/title/tt.*', br.geturl()):
#self.url = "%s://%s%s" % urlparse.urlparse(br.geturl())[:3]
soup = BeautifulSoup( MyOpener().open(url).read() )
else:
try:
self.link = br.find_link(url_regex = re.compile(r'/title/tt.*'))
except:
self.msg="Movie not found"
return
res = br.follow_link(self.link)
#self.url = urlparse.urljoin(self.BASE_URL, self.link.url)
soup = BeautifulSoup(res.read())
try:
self.title=soup.find('h1',{'class':'header'}).find('span',{'class':'itemprop'}).contents[0]
for span in soup.findAll('span'):
if span.has_key('itemprop') and span['itemprop'] == 'ratingValue':
self.rating = span.contents[0]
break
self.year=soup.find('span',{'class':'nobr'}).find('a').contents[0]
self.nusers=soup.find('div',{'class':'star-box-details'}).find('a').find('span').contents[0]
self.found=True
except:
pass
if self.found:
self.msg="{0} {1}, RATING: {2}/10.0 from {3} people ".format(self.title.upper(),self.year,self.rating,self.nusers)
else:
self.msg="Movie Not found"
示例14: main
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
def main():
## create a browser object
## NWEA has a pretty aggressive robots.txt
## here's what we'll do about that: ignore it
br = Browser()
#br.set_handle_redirect(False)
br.set_handle_robots(False)
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
## open the login page, form is called loginForm
br.open(LOGIN_URL)
br.select_form(name="loginForm")
br['username'] = USERNAME
br['password'] = SECRET
response = br.submit() ## submit and store response
print 'credentials successful, logged in'
#print response.read()
#once logged in, navigate to reports page
br.open(BASE_URL + '/report/home/map')
#CDF file looks like "https://kippteamschools-admin.mapnwea.org/report/download/cdf/7492"
#get the matching cdf and build the full url
cdf_string = '/report/download/cdf/[0-9]+'
file_target = br.find_link(url_regex=cdf_string)
file_loc = BASE_URL + file_target.url
print 'cdf is located at %s' % (file_loc)
#retrieve will get file at the location and save to a temp directory
cdf_zipped = br.retrieve(file_loc)[0]
print 'temp file is located at %s' % cdf_zipped
sourceZip = ZipFile(cdf_zipped, 'r')
print
print 'beginning unzip'
for name in sourceZip.namelist():
print 'extracted %s...' % (name)
sourceZip.extract(name, UNZIPPED_DEST)
sourceZip.close()
示例15: parse_movie_imdb
# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
def parse_movie_imdb(self, link):
br = Browser()
br.open(link)
link = br.find_link(url_regex=re.compile(r'/title/tt.*'))
res = br.follow_link(link)
soup = BeautifulSoup(res.read())
movie_title = self.getunicode(soup.find('title'))
rate = soup.find('span', itemprop='ratingValue')
rating = self.getunicode(rate)
actors = []
actors_soup = soup.findAll('a', itemprop='actors')
for i in range(len(actors_soup)):
actors.append(self.getunicode(actors_soup[i]))
des = soup.find('meta', {'name': 'description'})['content']
genre = []
infobar = soup.find('div', {'class': 'infobar'})
r = infobar.find('', {'title': True})['title']
genrelist = infobar.findAll('a', {'href': True})
for i in range(len(genrelist) - 1):
genre.append(self.getunicode(genrelist[i]))
release_date = self.getunicode(genrelist[-1])
print movie_title, rating + '/10.0'
print 'Relase Date:', release_date
print 'Rated', r
print ''
print 'Genre:',
print ', '.join(genre)
print '\nActors:',
print ', '.join(actors)
print '\nDescription:'
print des