本文整理汇总了Python中lib.BeautifulSoup.BeautifulSoup.find方法的典型用法代码示例。如果您正苦于以下问题:Python BeautifulSoup.find方法的具体用法?Python BeautifulSoup.find怎么用?Python BeautifulSoup.find使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lib.BeautifulSoup.BeautifulSoup
示例1: parse_response
# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def parse_response(self):
soup = BeautifulSoup(self.response)
head = soup.find("head")
self.max_points = int(_get_value_from_soup(head, "meta", "value", {"name": "max-points"}, 0))
if _get_value_from_soup(head, "meta", "value", {"name": "status"}) == "accepted":
self.is_accepted= True
meta_title = _get_value_from_soup(head, "meta", "content", {"name": "DC.Title"})
if meta_title:
self.meta["title"] = meta_title
title = soup.find("title")
if title:
self.meta["title"] = title.contents
self.meta["description"] = _get_value_from_soup(head, "meta", "content", {"name": "DC.Description"}, "")
points = _get_value_from_soup(head, "meta", "value", {"name": "points"})
if points != None:
self.points = int(points)
self.is_graded = True
self.is_accepted= True
exercise_div = soup.body.find("div", {"id": "exercise"})
if exercise_div != None:
self.content = exercise_div.renderContents()
self.content = soup.body.renderContents()
示例2: render
# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def render(self):
# TODO: fix and enable caching
# content = cache.get(self.content_url)
content = None
url = self.content_url
# If the page is not cached, retrieve it
if content == None:
opener = urllib2.build_opener()
content = opener.open(url, timeout=5).read()
# Save the page in cache
# cache.set(self.content_url, content)
soup = BeautifulSoup(content)
# TODO: Disabled. Add GET parameter support and enable.
# Make links absolute, quoted from http://stackoverflow.com/a/4468467:
#for tag in soup.findAll('a', href=True):
# tag['href'] = urlparse.urljoin(self.content_url, tag['href'])
# If there's no element specified, use the BODY.
# Otherwise find the element with given id.
if self.element_id == "":
html = soup.find("body").renderContents()
html = str(soup.find(id=self.element_id))
return html
示例3: parse
# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def parse(property_id, ratecode='SPGCP'):
valid_property = False
hotel_props = {'id': property_id}
property_url = "%s?propertyID=%s" % (starwood_url, property_id)
logging.debug("Property URL: %s" % property_url)
starwood_response = urlfetch.fetch(url=property_url, deadline=10)
if starwood_response:
soup = BeautifulSoup(starwood_response.content).find(attrs={'id': 'propertyHighlight'}).find(attrs={'class': 'propertyContainer'})
soup = None
if soup:
hotel_props['name'] = unicode(soup.find("a", "propertyName").contents[0]).strip()
hotel_props['category'] = int(str(soup.find("span", "spgCategory").contents[0]).split()[-1])
valid_property = True
if valid_property:
hotel_props['address'] = StarwoodParser.parse_address(soup)
#hotel_props['awards'] = StarwoodParser.parse_starwood(soup.find("div", "tabsContentContainer").findAll("div", "tabContent"))
hotel_props['image_url'] = str("http://www.starwoodhotels.com%s" % (soup.find("img", "propertyThumbnail")['src']))
return valid_property and hotel_props or None
示例4: get_organic_data
# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def get_organic_data(html_data):
bs = BeautifulSoup(str(html_data))
div_filter = bs.find('div',{'id':'ires'})
if div_filter:
contents = div_filter.findAll('li',{'class':'g'})
return contents
return None
示例5: handler
# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def handler(sock, url):
soup = BeautifulSoup(htmlsource)
content = soup.find(id=re.compile("postmessage_\d+"),name="td")
if content is None:
return "failed to read content"
return unicode(content)
示例6: handler
# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def handler(sock, url):
soup = BeautifulSoup(htmlsource)
content = soup.find("td",{"class":"jiawenzhang-type"})
if content is None:
return "content not found"
return unicode(content)
示例7: fetch_trains
# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def fetch_trains(place_from, place_to, date):
key = 'trains_' + place_from + '_' + place_to + '_' + str(date)
data = memcache.get(key) #@UndefinedVariable
if data != None:
return data
params = {'fromName': place_from,
'toName': place_to,
'when': utils.date_serialize(date),
'search_type': 'suburban'}
url = 'http://m.rasp.yandex.ru/search?' + urllib.urlencode(params)
response = urlfetch.fetch(url)
html = response.content
soup = BeautifulSoup(html)
list_node = soup.find("ul", { "class" : "b-holster b-search-result" })
if list_node != None:
regex = re.compile(r'<.*?>')
b_nodes = list_node.findAll("b")
result = []
for b_node in b_nodes:
data = regex.split(b_node.renderContents())
time = [datetime.datetime.strptime(x, '%H:%M').time() for x in data]
result.append(TrainTiming(time[0], time[1]))
memcache.add(key, result, 60*60) #@UndefinedVariable
return result
示例8: assert_no_error_message_in_response
# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def assert_no_error_message_in_response(self, response):
"""Check that response has no error messages."""
soup = BeautifulSoup(response)
el = soup.find("p", "alert-error")
if el:
self.fail("error message found in response unexpectedly: {}".format(el.contents))
el = soup.findAll("label", "alert-error")
if el:
self.fail("error message found in response unexpectedly: {}".format(el.contents))
示例9: parse_matchup
# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def parse_matchup(league, team_id, week=None):
team_info = {}
params = {'mid1': team_id}
if week:
params.update({'week': week})
matchup_url = build_url(league_id=league.id, page='matchup', params=params, access_code=league.access_code)
soup = BeautifulSoup(urlfetch.fetch(matchup_url).content).find('div', attrs={'id': 'yspmain'})
if True: #try:
week = int(soup.find('div', attrs={'id': 'matchupweeksubnav'}).find('li', 'current selected').find('a').contents[0])
else: #except:
week = 0
matchup_soup = soup.find('div', attrs={'id': 'matchup'})
team_name = matchup_soup.find('h2').find('a')['title']
lineup_soup = matchup_soup.find('table', attrs={'id': 'statTable1'}).find('tbody')
lineup = []
for lineup_row in lineup_soup.findAll('tr')[:-1]:
position = str(lineup_row.find('td').contents[0].strip())
player_cell = lineup_row.find('td', 'player')
player = player_cell.find('div').find('a').contents[0].strip()
player = None
player_status = player_cell.find('div', 'detail').find('span', 'status').contents[0].strip()
player_status = None
opp = str(lineup_row.find('td', 'opp').contents[0].strip()).replace(' ', '')
projected_points = float(lineup_row.find('td', 'stat wide').contents[0])
lineup.append({'position': position, 'player': player, 'status': player_status, 'projected': projected_points, 'opp': opp != 'Bye' and opp or None})
return {'name': team_name, 'lineup': lineup, 'week': week}
示例10: parse_page
# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def parse_page(writer, catalogue, page=1):
print 'Parsing page %s' % page
url = urllib.urlopen(URL % (catalogue, page))
soup = BeautifulSoup(url)
table = soup.find('table', attrs={'class': 'snippets'})
for tr in table.findAll('tr'):
# get name of the page
name = tr.td.h4.a.string
# get URL of the page
url = tr.td.h4.a['href'].encode('utf-8')
#get stats info
stats = '?'
stats_element = tr.find('p', attrs={'class': 'Stats'})
if stats_element:
stats = stats_element.strong.nextSibling.string[1:-11].replace(' ', '')
if stats == 'wtrakc':
stats = '?'
# get price
price = tr.find('td', attrs={'class': 'Price'}).strong.string[0:-12]
# calculate CPM
cpm = '?'
cpm = (float(price)*30) / int(stats) * 1000
cpm = '?'
# write to the file
row = [name, url, stats, price.replace('.', ','), str(cpm).replace('.', ',')]
print row
# find last page of the catalogue
anchors = soup.findAll('a', href=re.compile('/networks/[0-9]+/websites\?page=[0-9]+'))
if not anchors:
pages = []
for anchor in anchors:
number = re.match('/networks/[0-9]+/websites\?page=([0-9]+)', anchor['href']).group(1)
last = pages[-1]
# parse next page if exists
if last > page:
next = page + 1
parse_page(writer, catalogue, next)
示例11: parse_organic_contents
# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def parse_organic_contents(raw_content,organic_pos):
data_dict = {}
data_dict['position'] = organic_pos
b = BeautifulSoup(raw_content)
rtitle = b.find('a')
headline = p.sub('',str(rtitle))
data_dict['title'] = headline
display_url = parse_display_url(str(raw_content))
data_dict['display_url'] = display_url
url = str(rhref['href'])
data_dict['url'] = ul.unquote(url)
data_dict['text'] = text.replace(']','').replace('[','')
return data_dict
示例12: load
# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def load(self):
league_soup = BeautifulSoup(urllib2.urlopen(league_url).read())
if league_soup:
self.name = League.name(league_soup)
self.mb = MessageBoard(self)
team_rows = league_soup.find('table', attrs={'id': 'standingstable'}).tbody.findAll('tr')
teams = [Team(self, team_id) for team_id in xrange(1,2)] # xrange(1, len(team_rows) + 1)]
for team in teams:
print "%s, %s, \"%s\" %s\n" % (team.name, team.record, team.smack, team.roster)
示例13: get_shows
# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def get_shows():
"""docstring for get_shows"""
html = retrieve_url(BASE_URL)
soup = BeautifulSoup(html, fromEncoding="utf-8")
#print soup
#print "Autómánia"
showsHtml = soup.find(id="topnav04-ul").findAll("li")
shows = []
for show in showsHtml:
shows.append({"title" : show.a.string, "url" : show.a['href']})
return shows
示例14: basic_league_info
# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def basic_league_info(league_id, access_code=None):
league_url = build_url(league_id=league_id, access_code=access_code)
league_soup = BeautifulSoup(urlfetch.fetch(url=league_url).content).find('div', attrs={'id': 'yspmain'})
if league_soup.find('div', attrs={'class': 'errors'}):
valid_league = False
valid_league = True
if valid_league:
league_name = str(league_soup.find('h1').contents[0].strip())
teams_count = len(league_soup.find('table', attrs={'id': 'standingstable'}).find('tbody').findAll('tr'))
teams_count = 0
return {'name': league_name, 'teams_count': teams_count, 'errors': False}
return {'errors': True}
示例15: getWeatherInfo
# 需要导入模块: from lib.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from lib.BeautifulSoup.BeautifulSoup import find [as 别名]
def getWeatherInfo(self, my_phone):
for user in self.users:
url = self.url + self.province_map[user.province.encode('gbk')] + '/' + self.city_map[user.city.encode('gbk')] + '.html' #构造查询URL
#print url
page = urllib2.urlopen(url).read().decode('GBK').encode('utf-8')
soup = BeautifulSoup(page)
#print page.decode('utf-8').encode('gbk')
city_body = soup.find('div', {'class': 'w365border city_body'})
weather_info = city_body.findAll('div', {'class': 'weather_div'})
self.sendSMS(my_phone, weather_info[1], user) #明天的天气
self.sendSMS(my_phone, weather_info[2], user) # 后天的天气