当前位置: 首页>>代码示例>>Python>>正文


Python Browser.find_link方法代码示例

本文整理汇总了Python中mechanize.Browser.find_link方法的典型用法代码示例。如果您正苦于以下问题:Python Browser.find_link方法的具体用法?Python Browser.find_link怎么用?Python Browser.find_link使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在mechanize.Browser的用法示例。


在下文中一共展示了Browser.find_link方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: ParseMagazine

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
class ParseMagazine(Downloader):
	''' Class for parsing traner-on-line.ru '''

	def __init__(self, url):
		Downloader.__init__(self, url)
		self.links = ()
		self.br = Browser()
		self.br.open(self.url)
		self.br.select_form(nr=0)
		self.br['username'] = user
		self.br['passwd'] = passwd
		self.br.submit()
		self.parse_home()

	def parse_home(self):
		self.download(self.url)
		soup = BeautifulSoup(self.content)
		table = soup.find('td', {'class': 'tablenews'})
		self.links = set(link['href'] for link in table.findAll('a') if link['href'].startswith('index.php'))
	
	def parse_issue(self, url):
		self.br.open(self.url)
		link = self.br.find_link(url=url)
		response = self.br.follow_link(link=link)
		data = response.read()
		soup = BeautifulSoup(data)
		issue = soup.find('table', {'class': 'blog'})
		return issue.renderContents()

	def parse(self):
		with open('index.html', 'w') as f:
			f.write('''<?xml version="1.0" encoding="windows-1251"?>
					<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
					<html><body>''')
			for link in self.links:
				page = self.parse_issue(link)
				soup = BeautifulSoup(page)
				links = set(link['href'] for link in soup.findAll('a') if link['href'].startswith('http://trener-on-line.ru/index.php'))
				for l in links:
					print l
					self.br.open(link)
					sublink = self.br.find_link(url=l)
					response = self.br.follow_link(link=sublink)
					data = response.read()
					soup = BeautifulSoup(data)
					issue = soup.find('td', {'class': 'main'}) # TD!!!
					f.write(issue.renderContents())
			f.write('</body></html>')
开发者ID:Vostbur,项目名称:traner-on-line-magazine-scraper,代码行数:50,代码来源:scraper.py

示例2: getRatings

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
def getRatings(url):

	# url = 'http://us.imdb.com/M/title-exact?Moighty%20Aphrodite%20(1995)'
	try:
		br = Browser()
		br.set_handle_robots(False)
		br.open(url)

		if re.search(r'/title/tt.*', br.geturl()):
			soup = BeautifulSoup(MyOpener().open(url).read())
		else:
			link = br.find_link(url_regex = re.compile(r'/title/tt.*'))
			res = br.follow_link(link)
			soup = BeautifulSoup(res.read())
		
		# movie_title = soup.find('title').contents[0]
		des = (soup.find('meta',{'name':'description'})['content']).encode('utf-8')
		rate = soup.find('span',itemprop='ratingValue')
		# print movie_title
		# print des
	except:
		print 'Error no rating'
		rating = str(0)
		des = ""
	else:
		if rate:
			rating = str(rate.contents[0])
		else:
			rating = str(0)
			print 'No rate'
	
	return rating, des
开发者ID:diogoAntunes,项目名称:eadw,代码行数:34,代码来源:soupTools.py

示例3: _process

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
    def _process(self):
        """Start the work."""
        movie = "+".join(self.title.split())
        br = Browser()
        url = "%s/find?s=tt&q=%s" % (self.BASE_URL, movie)
        br.open(url)

        if re.search(r"/title/tt.*", br.geturl()):
            self.url = "%s://%s%s" % urlparse.urlparse(br.geturl())[:3]
            soup = BeautifulSoup(MyOpener().open(url).read(), "html.parser")
        else:
            link = br.find_link(url_regex=re.compile(r"/title/tt.*"))
            res = br.follow_link(link)
            self.url = urlparse.urljoin(self.BASE_URL, link.url)
            soup = BeautifulSoup(res.read(), "html.parser")

        try:
            self.title = soup.find("h1").contents[0].strip()
            for span in soup.findAll("span"):
                if span.has_attr("itemprop") and span["itemprop"] == "ratingValue":
                    self.rating = span.contents[0]
                    break
            self.found = True
        except:
            pass

        self.genre = []
        infobar = soup.find("div", {"class": "infobar"})
        r = infobar.find("", {"title": True})["title"]
        self.genrelist = infobar.findAll("a", {"href": True})

        for i in range(len(self.genrelist) - 1):
            self.genrelist[i] = self.genrelist[i].encode("ascii")
            self.genre.append(self.genrelist[i][16 : self.genrelist[i].index("?")])
        self.mainGenre = self.genre[0]
开发者ID:samcmuldroch,项目名称:m158a-node_python_odot,代码行数:37,代码来源:imbd.py

示例4: name

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
def name(request, string):

    movie = string.replace("_", "+")
    br = Browser()
    br.open("http://www.imdb.com/find?s=tt&q="+movie)
    link = br.find_link(url_regex=re.compile(r"/title/tt.*"))
    data = br.follow_link(link)
    soup = BeautifulSoup(data.read())

    title = soup.find('h1').contents[0].strip()
    name = title.replace("&nbsp;", "")
    rating = soup.find('span', itemprop='ratingValue').contents[0]
    duration = soup.find('time', itemprop='duration').contents[0].strip()
    releaseDate = soup.find('a', title='See more release dates').contents[0]
    director = soup.find('span', itemprop='director').getText()
    actor_all = []
    actors = soup.findAll('span', itemprop='actors')
    for i in range(len(actors)):
        actor_all.append((actors[i].contents[1]).getText())
    genres_all = []
    genres = soup.findAll('span', itemprop='genre')
    for i in range(len(genres)):
        genres_all.append(genres[i].getText())

    jsonObject = {}
    jsonObject['Name:'] = name
    jsonObject['IMDB Rating:'] = rating
    jsonObject['Duration'] = duration
    jsonObject["Actors: "] = actor_all
    jsonObject['Director:'] = director
    jsonObject['Genres'] = genres_all
    jsonObject['Release Date'] = releaseDate
    movie_details = json.dumps(jsonObject)
    return HttpResponse(movie_details)
开发者ID:manimanjari,项目名称:imdbMovieFinder,代码行数:36,代码来源:views.py

示例5: _process

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
    def _process(self):
        """Start the work."""
        movie = '+'.join(self.title.split())
        br = Browser()
        url = "%s/find?s=tt&q=%s" % (self.BASE_URL, movie)
        br.open(url)

        if re.search(r'/title/tt.*', br.geturl()):
            self.url = "%s://%s%s" % urlparse.urlparse(br.geturl())[:3]
            soup = BeautifulSoup( MyOpener().open(url).read() )
        else:
            link = br.find_link(url_regex = re.compile(r'/title/tt.*'))
            res = br.follow_link(link)
            self.url = urlparse.urljoin(self.BASE_URL, link.url)
            soup = BeautifulSoup(res.read())

        try:
            self.title = soup.find('h1').contents[0].strip()
            for span in soup.findAll('span'):
                if span.has_key('itemprop') and span['itemprop'] == 'ratingValue':
                    self.rating = span.contents[0]
                    break
            self.found = True
        except:
            pass
开发者ID:abhinavgupta,项目名称:IMDB-rating,代码行数:27,代码来源:IMDB.py

示例6: searchMovie

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
def searchMovie(movie):
    movie_search = "+".join(movie.split())
    base_url = "http://www.imdb.com/find?q="
    url = base_url + movie_search + "&s=all"
    title_search = re.compile("/title/ttd+")
    br = Browser()
    br.open(url)
    link = br.find_link(url_regex=re.compile(r"/title/tt.*"))
    res = br.follow_link(link)
    soup = BeautifulSoup(res.read())
    info = {}
    movie_title = getunicode(soup.find("title"))
    movie_title = movie_title.split(" - IMDb")[0]
    print movie_title
    info["title"] = movie_title
    try:
        rate = soup.find("span", itemprop="ratingValue")
        rating = getunicode(rate)
        info["rating"] = rating
    except:
        info["rating"] = "Not available"

    try:
        img = soup.find("img", {"itemprop": "image"})["src"]
    except:
        return 1
    image = getunicode(img)
    info["image"] = image
    try:
        des = soup.find("meta", {"name": "description"})["content"]
    except:
        return 1

    descp = getunicode(des)
    info["description"] = descp
    genre = []
    infobar = soup.find("div", {"class": "infobar"})
    try:
        r = infobar.find("", {"title": True})["title"]
    except:
        return 1
    genrelist = infobar.findAll("a", {"href": True})
    for i in range(len(genrelist) - 1):
        genre.append(getunicode(genrelist[i]))
    gnre = ""
    for gnr in genre:
        gnre = gnre + str(gnr) + ","
    gnre = gnre[:-1]
    info["genre"] = gnre
    release_date = getunicode(genrelist[-1])
    info["date"] = release_date
    return info
开发者ID:priyapappachan,项目名称:cdrs,代码行数:54,代码来源:views.py

示例7: get_soup

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
def get_soup(movie):
    movie_search = '+'.join(movie.split())
    url = "http://www.imdb.com/find?q=" + movie_search + "&s=all"
    br = Browser()
    try:
        br.open(url)
        link = br.find_link(url_regex=re.compile(r'/title/tt.*'))
        res = br.follow_link(link)
    except:
        return "error"
    else:
        soup = BeautifulSoup(res.read())
        return str(soup)
开发者ID:swaraj7,项目名称:movie_rater,代码行数:15,代码来源:movie_rater.py

示例8: main

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
def main():
	movie = str(raw_input('Movie Name: '))
	movie_search = '+'.join(movie.split())
    
	base_url = 'http://www.imdb.com/find?q='
	url = base_url+movie_search+'&s=all'
    
	title_search = re.compile('/title/tt\d+')
    
	br = Browser()

    
                     
	br.open(url)

	link = br.find_link(url_regex = re.compile(r'/title/tt.*'))
	res = br.follow_link(link)
    
	soup = BeautifulSoup(res.read())
    
	movie_title = getunicode(soup.find('title'))
	rate = soup.find('span',itemprop='ratingValue')
	rating = getunicode(rate)
    
	actors=[]
	actors_soup = soup.findAll('a',itemprop='actors')
	for i in range(len(actors_soup)):
		actors.append(getunicode(actors_soup[i]))
    
	des = soup.find('meta',{'name':'description'})['content']

	genre=[]
	infobar = soup.find('div',{'class':'infobar'})
	r = infobar.find('',{'title':True})['title']
	genrelist = infobar.findAll('a',{'href':True})
	
	for i in range(len(genrelist)-1):
		genre.append(getunicode(genrelist[i]))
	release_date = getunicode(genrelist[-1])

	print movie_title,rating+'/10.0'
	print 'Relase Date:',release_date
	print 'Rated',r
	print ''
	print 'Genre:',
	print ', '.join(genre)
	print '\nActors:',
	print ', '.join(actors)
	print '\nDescription:'
	print des    
开发者ID:shubhanshu-gupta,项目名称:2015lab1,代码行数:52,代码来源:SearchByMovieName.py

示例9: getsoup

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
def getsoup(URL, proxy = None):
    br = Browser()
    if proxy is not None:
        br.set_proxies(proxy)
    br.open(URL)
    try:
        title_URL = br.find_link(url_regex = re.compile(r'/title/tt.*'))
    except LinkNotFoundError:
        return None
    try:
        res = br.follow_link(title_URL)
    except URLError:
        return None
    
    soup = BeautifulSoup(res.read())
    return soup
开发者ID:jayrambhia,项目名称:DeskWid,代码行数:18,代码来源:imdb.py

示例10: get_rating

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
    def get_rating(self ):
        try:
            print "Checking IMDb rating of "+ self.movie_name
            movie_search = '+'.join(self.movie_name.split())
            movie_url = base_url + movie_search + '&s=all'
            print(movie_url)
            br = Browser()
            br.open(movie_url)
            link = br.find_link(url_regex=re.compile(r'/title/tt.*'))
            res = br.follow_link(link)
            soup = BeautifulSoup(res.read(), "lxml")
            movie_title = soup.find('title').contents[0]
            rate = soup.find('span', itemprop='ratingValue')
            if rate is not None:
                self.movie_rating=rate

        except:
            self.movie_rating='-'
开发者ID:madan96,项目名称:La-Z-Boy,代码行数:20,代码来源:__main__.py

示例11: get_ratings

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
def get_ratings(movies_of_my_genre):
    for movie in movies_of_my_genre:
        try:
            print "Checking IMDb rating of :   " + movie.movie_name.replace('\t','')
            movie_search = '+'.join(movie.movie_name.split())
            movie_url = base_url + movie_search + '&s=all'
            br = Browser()
            br.open(movie_url)
            link = br.find_link(url_regex=re.compile(r'/title/tt.*'))
            res = br.follow_link(link)
            soup = BeautifulSoup(res.read(), "lxml")
            movie_title = soup.find('title').contents[0]
            rate = soup.find('span', itemprop='ratingValue')
            if rate is not None:
                movie.movie_rating=float(rate.contents[0])
            else:
                movie.movie_rating=0
        except:
            movie.movie_rating = 0
开发者ID:madan96,项目名称:La-Z-Boy,代码行数:21,代码来源:__main__.py

示例12: _process

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
    def _process(self):
        movie = '+'.join(self.title.split())
        br = Browser()
        url = "%s/find?s=tt&q=%s" % (self.BASE_URL, movie)
        br.open(url)
 
        if re.search(r'/title/tt.*', br.geturl()):
            self.url = "%s://%s%s" % urlparse.urlparse(br.geturl())[:3]
            soup = BeautifulSoup( MyOpener().open(url).read() )
        else:
            link = br.find_link(url_regex = re.compile(r'/title/tt.*'))
            res = br.follow_link(link)
            self.url = urlparse.urljoin(self.BASE_URL, link.url)
            soup = BeautifulSoup(res.read())
 
        try:
            self.title = soup.find('h1').contents[0].strip()
            self.rating = soup.find('span',attrs='rating-rating').contents[0]
            self.found = True
        except:
            pass
开发者ID:apurvg,项目名称:beautiful-data,代码行数:23,代码来源:IMDbanalyze1.py

示例13: getRating

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
 def getRating(self):
     self.found=False
     self.BASE_URL ='http://www.imdb.com'
     self.title = self.arguments.Title
     self.name= self.title
     self.movie = '_'.join(self.title.split())
     br = Browser()
     url = "%s/find?s=tt&q=%s" % (self.BASE_URL, self.movie)
     try:
         br.open(url)            
     except:
         self.msg="internet connection error or movie not found"
         return
     if re.search(r'/title/tt.*', br.geturl()):
         #self.url = "%s://%s%s" % urlparse.urlparse(br.geturl())[:3]
         soup = BeautifulSoup( MyOpener().open(url).read() )
     else:
         try:
             self.link = br.find_link(url_regex = re.compile(r'/title/tt.*'))
         except:
             self.msg="Movie not found"
             return
         res = br.follow_link(self.link)
         #self.url = urlparse.urljoin(self.BASE_URL, self.link.url)
         soup = BeautifulSoup(res.read())
     try:
         self.title=soup.find('h1',{'class':'header'}).find('span',{'class':'itemprop'}).contents[0]
         for span in soup.findAll('span'):
             if span.has_key('itemprop') and span['itemprop'] == 'ratingValue':
                 self.rating = span.contents[0]
                 break
         self.year=soup.find('span',{'class':'nobr'}).find('a').contents[0]
         self.nusers=soup.find('div',{'class':'star-box-details'}).find('a').find('span').contents[0]
         self.found=True
     except:
         pass
     if self.found:
         self.msg="{0} {1}, RATING: {2}/10.0 from {3} people ".format(self.title.upper(),self.year,self.rating,self.nusers)  
     else:
         self.msg="Movie Not found"
开发者ID:vinay-nadig,项目名称:Remote_Desktop_Control,代码行数:42,代码来源:movierating.py

示例14: main

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
def main():
    ## create a browser object
    ## NWEA has a pretty aggressive robots.txt
    ## here's what we'll do about that: ignore it
    br = Browser()
    #br.set_handle_redirect(False)    
    br.set_handle_robots(False)
    br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

    ## open the login page, form is called loginForm
    br.open(LOGIN_URL)
    br.select_form(name="loginForm")    
    br['username'] = USERNAME
    br['password'] = SECRET    
    response = br.submit()  ## submit and store response
    print 'credentials successful, logged in'
    #print response.read()

    #once logged in, navigate to reports page
    br.open(BASE_URL + '/report/home/map')

    #CDF file looks like "https://kippteamschools-admin.mapnwea.org/report/download/cdf/7492"
    #get the matching cdf and build the full url
    cdf_string = '/report/download/cdf/[0-9]+'
    file_target = br.find_link(url_regex=cdf_string)
    file_loc =  BASE_URL + file_target.url
    print 'cdf is located at %s' % (file_loc)

    #retrieve will get file at the location and save to a temp directory
    cdf_zipped = br.retrieve(file_loc)[0]
    print 'temp file is located at %s' % cdf_zipped

    sourceZip = ZipFile(cdf_zipped, 'r')
    print
    print 'beginning unzip'
    for name in sourceZip.namelist():
        print 'extracted %s...' % (name)
        sourceZip.extract(name, UNZIPPED_DEST)
    sourceZip.close()
开发者ID:cbini,项目名称:eduextractor,代码行数:41,代码来源:map_loader.py

示例15: parse_movie_imdb

# 需要导入模块: from mechanize import Browser [as 别名]
# 或者: from mechanize.Browser import find_link [as 别名]
    def parse_movie_imdb(self, link):
        br = Browser()
        br.open(link)

        link = br.find_link(url_regex=re.compile(r'/title/tt.*'))
        res = br.follow_link(link)

        soup = BeautifulSoup(res.read())

        movie_title = self.getunicode(soup.find('title'))
        rate = soup.find('span', itemprop='ratingValue')
        rating = self.getunicode(rate)

        actors = []
        actors_soup = soup.findAll('a', itemprop='actors')
        for i in range(len(actors_soup)):
            actors.append(self.getunicode(actors_soup[i]))

        des = soup.find('meta', {'name': 'description'})['content']

        genre = []
        infobar = soup.find('div', {'class': 'infobar'})
        r = infobar.find('', {'title': True})['title']
        genrelist = infobar.findAll('a', {'href': True})

        for i in range(len(genrelist) - 1):
            genre.append(self.getunicode(genrelist[i]))
        release_date = self.getunicode(genrelist[-1])

        print movie_title, rating + '/10.0'
        print 'Relase Date:', release_date
        print 'Rated', r
        print ''
        print 'Genre:',
        print ', '.join(genre)
        print '\nActors:',
        print ', '.join(actors)
        print '\nDescription:'
        print des
开发者ID:obayhan,项目名称:hddiyari_presentation,代码行数:41,代码来源:engine.py


注:本文中的mechanize.Browser.find_link方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。