当前位置: 首页>>代码示例>>Python>>正文


Python BeautifulSoup.find方法代码示例

本文整理汇总了Python中lib.BeautifulSoup.BeautifulSoup.find方法的典型用法代码示例。如果您正苦于以下问题:Python BeautifulSoup.find方法的具体用法?Python BeautifulSoup.find怎么用?Python BeautifulSoup.find使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在lib.BeautifulSoup.BeautifulSoup的用法示例。


在下文中一共展示了BeautifulSoup.find方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parse_response

# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
 def parse_response(self):
     soup                = BeautifulSoup(self.response)
     
     head                = soup.find("head")
     
     self.max_points     = int(_get_value_from_soup(head, "meta", "value", {"name": "max-points"}, 0))
     
     if _get_value_from_soup(head, "meta", "value", {"name": "status"}) == "accepted":
         self.is_accepted= True
     
     meta_title          = _get_value_from_soup(head, "meta", "content", {"name": "DC.Title"})
     if meta_title:
         self.meta["title"]  = meta_title
     else:
         title               = soup.find("title")
         if title:
             self.meta["title"]  = title.contents
     
     self.meta["description"] = _get_value_from_soup(head, "meta", "content", {"name": "DC.Description"}, "")
     
     points              = _get_value_from_soup(head, "meta", "value", {"name": "points"})
     if points != None:
         self.points     = int(points)
         self.is_graded  = True
         self.is_accepted= True
     
     exercise_div        = soup.body.find("div", {"id": "exercise"})
     
     if exercise_div != None:
         self.content    = exercise_div.renderContents()
     else:
         self.content    = soup.body.renderContents()
开发者ID:OpenDSA,项目名称:OpenDSA-devserver,代码行数:34,代码来源:exercise_page.py

示例2: render

# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
    def render(self):
        # TODO: fix and enable caching
        # content         =  cache.get(self.content_url)
        content = None

        url = self.content_url

        # If the page is not cached, retrieve it
        if content == None:
            opener      = urllib2.build_opener()
            content     = opener.open(url, timeout=5).read()
            
            # Save the page in cache
            # cache.set(self.content_url, content)
        
        soup            = BeautifulSoup(content)

        # TODO: Disabled. Add GET parameter support and enable.
        # Make links absolute, quoted from http://stackoverflow.com/a/4468467:
        #for tag in soup.findAll('a', href=True):
        #    tag['href'] = urlparse.urljoin(self.content_url, tag['href'])
        
        # If there's no element specified, use the BODY. 
        # Otherwise find the element with given id.
        if self.element_id == "":
            html        = soup.find("body").renderContents()
        else:
            html        = str(soup.find(id=self.element_id))
        
        return html
开发者ID:kahaeia1,项目名称:a-plus,代码行数:32,代码来源:models.py

示例3: parse

# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
    def parse(property_id, ratecode='SPGCP'):
        valid_property = False
        hotel_props = {'id': property_id}

        property_url = "%s?propertyID=%s" % (starwood_url, property_id)
        logging.debug("Property URL: %s" % property_url)
        starwood_response = urlfetch.fetch(url=property_url, deadline=10)
        if starwood_response:
            try:
                soup = BeautifulSoup(starwood_response.content).find(attrs={'id': 'propertyHighlight'}).find(attrs={'class': 'propertyContainer'})
            except:
                soup = None

            if soup:
                try:
                    hotel_props['name'] = unicode(soup.find("a", "propertyName").contents[0]).strip()
                    hotel_props['category'] = int(str(soup.find("span", "spgCategory").contents[0]).split()[-1])

                    valid_property = True
                except:
                    pass

                if valid_property:
                    hotel_props['address'] = StarwoodParser.parse_address(soup)
                    #hotel_props['awards'] = StarwoodParser.parse_starwood(soup.find("div", "tabsContentContainer").findAll("div", "tabContent"))
                    hotel_props['image_url'] = str("http://www.starwoodhotels.com%s" % (soup.find("img", "propertyThumbnail")['src']))

        return valid_property and hotel_props or None
开发者ID:mshafrir,项目名称:awardr,代码行数:30,代码来源:parsers.py

示例4: get_organic_data

# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def get_organic_data(html_data):
    bs = BeautifulSoup(str(html_data))
    div_filter = bs.find('div',{'id':'ires'})
    if div_filter:
        contents = div_filter.findAll('li',{'class':'g'})
        return contents
    return None
开发者ID:nava45,项目名称:gpage_crawler,代码行数:9,代码来源:webpage_splitter.py

示例5: handler

# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def handler(sock, url):
    htmlsource=sock.read()
    soup = BeautifulSoup(htmlsource)
    content = soup.find(id=re.compile("postmessage_\d+"),name="td")
    if content is None:
        return "failed to read content"
    return unicode(content)
开发者ID:cylonbrain,项目名称:FullTextRss,代码行数:9,代码来源:powerapple.py

示例6: handler

# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def handler(sock, url):
    htmlsource=sock.read().decode('gb18030','replace').encode('utf-8') 
    soup = BeautifulSoup(htmlsource)
    content = soup.find("td",{"class":"jiawenzhang-type"})
    if content is None:
        return "content not found"
    return unicode(content)
开发者ID:cylonbrain,项目名称:FullTextRss,代码行数:9,代码来源:mitbbs.py

示例7: fetch_trains

# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def fetch_trains(place_from, place_to, date):  
    key = 'trains_' + place_from + '_' + place_to + '_' + str(date)
    data = memcache.get(key) #@UndefinedVariable
    if data != None:
        return data
      
    params = {'fromName': place_from,
              'toName': place_to,
              'when': utils.date_serialize(date),
              'search_type': 'suburban'}
    url = 'http://m.rasp.yandex.ru/search?' + urllib.urlencode(params)
    response = urlfetch.fetch(url)
    html = response.content
    soup = BeautifulSoup(html)
    list_node = soup.find("ul", { "class" : "b-holster b-search-result" })
    if list_node != None:
        regex = re.compile(r'<.*?>')
        b_nodes = list_node.findAll("b")
        result = []
        for b_node in b_nodes:
            data = regex.split(b_node.renderContents())
            try:
                time = [datetime.datetime.strptime(x, '%H:%M').time() for x in data]
                result.append(TrainTiming(time[0], time[1]))
            except:
                pass
        memcache.add(key, result, 60*60)  #@UndefinedVariable
        return result
开发者ID:bordanton,项目名称:Youth,代码行数:30,代码来源:bot.py

示例8: assert_no_error_message_in_response

# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
 def assert_no_error_message_in_response(self, response):
     """Check that response has no error messages."""
     soup = BeautifulSoup(response)
     el = soup.find("p", "alert-error")
     if el:
         self.fail("error message found in response unexpectedly: {}".format(el.contents))
     el = soup.findAll("label", "alert-error")
     if el:
         self.fail("error message found in response unexpectedly: {}".format(el.contents))
开发者ID:JElbourne,项目名称:PubCart,代码行数:11,代码来源:test_helpers.py

示例9: parse_matchup

# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def parse_matchup(league, team_id, week=None):
	team_info = {}
	
	params = {'mid1': team_id}
	if week:
		params.update({'week': week})
		
	matchup_url = build_url(league_id=league.id, page='matchup', params=params, access_code=league.access_code)
	soup = BeautifulSoup(urlfetch.fetch(matchup_url).content).find('div', attrs={'id': 'yspmain'})
	
	if True: #try:
		week = int(soup.find('div', attrs={'id': 'matchupweeksubnav'}).find('li', 'current selected').find('a').contents[0])
	else: #except:
		week = 0
		
	matchup_soup = soup.find('div', attrs={'id': 'matchup'})

	team_name = matchup_soup.find('h2').find('a')['title']
	lineup_soup = matchup_soup.find('table', attrs={'id': 'statTable1'}).find('tbody')
	
	lineup = []
	for lineup_row in lineup_soup.findAll('tr')[:-1]:
		position = str(lineup_row.find('td').contents[0].strip())

		player_cell = lineup_row.find('td', 'player')
			
		try:
			player = player_cell.find('div').find('a').contents[0].strip()
		except:
			player = None
			
		try:
			player_status = player_cell.find('div', 'detail').find('span', 'status').contents[0].strip()
		except:
			player_status = None
			
		opp = str(lineup_row.find('td', 'opp').contents[0].strip()).replace('&nbsp;', '')
		
		projected_points = float(lineup_row.find('td', 'stat wide').contents[0])
		
		lineup.append({'position': position, 'player': player, 'status': player_status, 'projected': projected_points, 'opp': opp != 'Bye' and opp or None})
		
	return {'name': team_name, 'lineup': lineup, 'week': week}
开发者ID:mshafrir,项目名称:Rotoist,代码行数:45,代码来源:yahoo.py

示例10: parse_page

# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def parse_page(writer, catalogue, page=1):
    print 'Parsing page %s' % page
    
    url = urllib.urlopen(URL % (catalogue, page))
    soup = BeautifulSoup(url)
    
    table = soup.find('table', attrs={'class': 'snippets'})
    for tr in table.findAll('tr'):
        # get name of the page
        name = tr.td.h4.a.string
        
        # get URL of the page
        url = tr.td.h4.a['href'].encode('utf-8')
        
        #get stats info
        stats = '?'
        stats_element = tr.find('p', attrs={'class': 'Stats'})
        if stats_element:
            stats = stats_element.strong.nextSibling.string[1:-11].replace(' ', '')
            if stats == 'wtrakc': 
                stats = '?'
        
        # get price
        price = tr.find('td', attrs={'class': 'Price'}).strong.string[0:-12]
        
        # calculate CPM
        cpm = '?'
        try:
            cpm = (float(price)*30) / int(stats) * 1000
        except:
            cpm = '?'
        
        # write to the file
        row = [name, url, stats, price.replace('.', ','), str(cpm).replace('.', ',')]
        print row
        writer.writerow(row)
    
    # find last page of the catalogue
    anchors = soup.findAll('a', href=re.compile('/networks/[0-9]+/websites\?page=[0-9]+'))
    if not anchors:
        return
    
    pages = []
    for anchor in anchors:
        number = re.match('/networks/[0-9]+/websites\?page=([0-9]+)', anchor['href']).group(1)
        pages.append(int(number))

    pages.sort()
    last = pages[-1]
    
    # parse next page if exists
    if last > page:
        next = page + 1
        parse_page(writer, catalogue, next)
开发者ID:kosciak,项目名称:kosciak-misc,代码行数:56,代码来源:adtaily2csv.py

示例11: parse_organic_contents

# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def parse_organic_contents(raw_content,organic_pos):
    data_dict = {}
    data_dict['position'] = organic_pos

    b = BeautifulSoup(raw_content)
    rtitle = b.find('a')
    headline = p.sub('',str(rtitle))
    data_dict['title'] = headline

    display_url = parse_display_url(str(raw_content))
    data_dict['display_url'] = display_url

    rhref=b.find('a',href=True)
    url = str(rhref['href'])
    data_dict['url'] = ul.unquote(url)

    rtext=b.findAll('div',{'class':'s'})
    text=p.sub('',str(rtext))
    data_dict['text'] = text.replace(']','').replace('[','')
    return data_dict
开发者ID:nava45,项目名称:gpage_crawler,代码行数:22,代码来源:webpage_parser.py

示例12: load

# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
	def load(self):
		league_soup = BeautifulSoup(urllib2.urlopen(league_url).read())
		if league_soup:
			self.name = League.name(league_soup)
			self.mb = MessageBoard(self)
			
			team_rows = league_soup.find('table', attrs={'id': 'standingstable'}).tbody.findAll('tr')
			teams = [Team(self, team_id) for team_id in xrange(1,2)] # xrange(1, len(team_rows) + 1)]
			for team in teams:
				print "%s, %s, \"%s\" %s\n" % (team.name, team.record, team.smack, team.roster)
			'''
开发者ID:mshafrir,项目名称:FantaPy,代码行数:13,代码来源:fantapy.py

示例13: get_shows

# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def get_shows():
	"""docstring for get_shows"""
	html = retrieve_url(BASE_URL)
	soup = BeautifulSoup(html, fromEncoding="utf-8")
	#print soup
	#print "Autómánia"
	showsHtml = soup.find(id="topnav04-ul").findAll("li")
	shows = []
	for show in showsHtml:
		shows.append({"title" : show.a.string, "url" : show.a['href']})
	return shows
开发者ID:Xmister,项目名称:rtlmost-xbmc,代码行数:13,代码来源:test.py

示例14: basic_league_info

# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def basic_league_info(league_id, access_code=None):
	league_url = build_url(league_id=league_id, access_code=access_code)
	league_soup = BeautifulSoup(urlfetch.fetch(url=league_url).content).find('div', attrs={'id': 'yspmain'})

	if league_soup.find('div', attrs={'class': 'errors'}):
		valid_league = False
	else:
		valid_league = True
	
	if valid_league:
		league_name = str(league_soup.find('h1').contents[0].strip())
		try:
			teams_count = len(league_soup.find('table', attrs={'id': 'standingstable'}).find('tbody').findAll('tr'))
		except:
			teams_count = 0
	
		return {'name': league_name, 'teams_count': teams_count, 'errors': False}
		
	else:
		return {'errors': True}
开发者ID:mshafrir,项目名称:Rotoist,代码行数:22,代码来源:yahoo.py

示例15: getWeatherInfo

# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
	def getWeatherInfo(self, my_phone):
		for user in self.users:
			url = self.url + self.province_map[user.province.encode('gbk')] + '/' + self.city_map[user.city.encode('gbk')] + '.html' #构造查询URL
			#print url
			page = urllib2.urlopen(url).read().decode('GBK').encode('utf-8')
			soup = BeautifulSoup(page)
			#print page.decode('utf-8').encode('gbk')
			city_body = soup.find('div', {'class': 'w365border city_body'})
			weather_info = city_body.findAll('div', {'class': 'weather_div'})
			self.sendSMS(my_phone, weather_info[1], user) #明天的天气
			self.sendSMS(my_phone, weather_info[2], user) # 后天的天气
开发者ID:zuojie,项目名称:KKT,代码行数:13,代码来源:WeatherReport2.py


注:本文中的lib.BeautifulSoup.BeautifulSoup.find方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。