本文整理汇总了Python中resources.tools.webpage函数的典型用法代码示例。如果您正苦于以下问题:Python webpage函数的具体用法?Python webpage怎么用?Python webpage使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了webpage函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: episodes
def episodes(self, id):
page = webpage(self.url(id))
# <?xml version="1.0" encoding="UTF-8"?><!--urn:MEDIA:6120103:home-and-away-s2014a-ep6082-->
# http://tvnz.co.nz/home-and-away/s2014a-ep6082-video-6120103
m = re.match(r'^.*?<!--urn:MEDIA:(\d+):(.*?)-(s.*?-ep.*?)-->', page.doc)
if not m:
return []
url = "%s/%s/%s-video-%s" % (self.urls['base'], m.group(2), m.group(3),
m.group(1))
print url
page = webpage(url, agent="chrome")
if page.doc:
soup = BeautifulSoup(page.doc)
if soup:
div = soup.find('div', attrs={'id' : 'slidefullepisodes'})
shows = div.findAll('li', attrs={'class' : re.compile(r'\bshowItem\b')})
for show in shows:
item = self._episode(show)
if item:
self.xbmcitems.items.append(item)
div = soup.find('div', attrs={'id' : 'slidevideoextras'})
if div:
shows = div.findAll('li', attrs={'class' : re.compile(r'\bshowItem\b')})
for show in shows:
item = self._episode(show)
if item:
self.xbmcitems.items.append(item)
return self.xbmcitems.addall()
return []
示例2: _getMetadata
def _getMetadata(self, section, id):
metadata = {}
page = webpage("/".join((self.urls['base'], section, self.urls['videos'], id)))
if page.doc:
soup = BeautifulSoup(page.doc)
description = soup.find('meta', attrs={"name" : "description"})['content']
thumbnail = soup.find('link', attrs={'rel' : 'image_src'})['href']
videosrc = soup.find('link', attrs={'rel' : 'video_src'})['href']
match = re.search(r'mediaXML=(.*?)(?:&|$)', videosrc)
if match:
xmlfile = match.group(1)
else:
xmlfile = None
metadata['Plot'] = description
metadata['thumbnail'] = thumbnail
if xmlfile:
xmlpage = webpage(xmlfile)
if xmlpage:
xmldom = self._xml(xmlpage.doc)
title = xmldom.getElementsByTagName('title')[0].firstChild.nodeValue
url = xmldom.getElementsByTagName('media:content')[0].getAttribute('url')
metadata['Title'] = title
metadata['url'] = url
return metadata
示例3: _geturls
def _geturls(self, id, channel): #Scrape a page for a given OnDemand video and build an RTMP URL from the info in the page, then play the URL
urls = dict()
ids = id.split(",")
if len(ids) == 4:
pageUrl = "%s/%s/%s/%s/%s/%s/%s/%s/%s" % (self.channels[channel]['base'], ids[0], self.urls["video1"], ids[1], self.urls["video2"], ids[2], self.urls["video3"], ids[3], self.urls["video4"])
page = webpage(pageUrl)
else:
page = webpage(id) # Huh? - I guess this is feeding a full URL via the id variable
if page.doc:
videoid = re.search('var video ="/(.*?)/([0-9A-Z\-]+)/(.*?)";', page.doc)
if videoid:
#videoplayer = re.search('swfobject.embedSWF\("(http://static.mediaworks.co.nz/(.*?).swf)', page.doc)
videoplayer = 'http://static.mediaworks.co.nz/video/jw/5.10/df.swf'
if videoplayer:
rnd = ""
auth = re.search('random_num = "([0-9]+)";', page.doc)
if auth:
rnd = "?rnd=" + auth.group(1)
swfverify = ' swfVfy=true swfUrl=%s%s pageUrl=%s' % (videoplayer, rnd, pageUrl)
realstudio = 'tv3'
site = re.search("var pageloc='(TV-)?(.*?)-", page.doc)
if site:
realstudio = site.group(2).lower()
playlist = list()
qualities = [330]
#if re.search('flashvars.sevenHundred = "yes";', page.doc):
qualities.append(700)
#if re.search('flashvars.fifteenHundred = "yes";', page.doc):
#qualities.append(1500)
#if not re.search('flashvars.highEnd = "true";', page.doc): # flashvars.highEnd = "true";//true removes 56K option
# qualities.append(56)
#geo = re.search('var geo= "(.*?)";', page.doc)
#if geo:
# if geo.group(1) == 'geo':
for quality in qualities:
urls[quality] = '%s/%s/%s/%s/%s/%s_%sK.mp4' % (self.urls["rtmp1"], self.channels[channel]['rtmp'], self.urls["rtmp2"], videoid.group(1), videoid.group(2), urllib.quote(videoid.group(3)), quality) + swfverify
# elif geo.group(1) == 'str':
# for quality in qualities:
#app = ' app=tv3/mp4:transfer' # + videoid.group(1)
#tcurl = ' tcUrl=rtmpe://flashcontent.mediaworks.co.nz:80/'
#playpath = ' playpath=%s/%s_%sK' % (videoid.group(2), videoid.group(3), quality)
# urls[quality] = '%s/%s/%s%s/%s/%s_%sK' % (self.urls["flash1"], self._rtmpchannel(realstudio), self.urls["flash2"], videoid.group(1), urllib.quote(videoid.group(2)), urllib.quote(videoid.group(3)), quality) + ' pageUrl=' + pageUrl
#urls[quality] = '%s/%s/%s%s/%s/%s_%sK' % (self.urls["flash1"], self._rtmpchannel(realstudio), self.urls["flash2"], videoid.group(1), videoid.group(2), urllib.quote(videoid.group(3)), quality)
#urls[quality] = 'rtmpe://flashcontent.mediaworks.co.nz:80/tv3/mp4:transfer'
#urls[quality] = '%s/%s/%s%s/%s/%s_%sK' % (self.urls["flash1"], self._rtmpchannel(realstudio), self.urls["flash2"], videoid.group(1), videoid.group(2), urllib.quote(videoid.group(3)), quality) # + " swfVfy=true swfUrl=http://m1.2mdn.net/879366/DartShellPlayer9_14_39_2.swf"
#urls[quality] = '%s/%s/%s%s/%s/%s_%sK' % (self.urls["flash1"], self._rtmpchannel(realstudio), self.urls["flash2"], videoid.group(1), videoid.group(2), urllib.quote(videoid.group(3)), quality) + tcurl + app + playpath + swfverify
#urls[quality] = '%s/%s/%s%s/%s/%s_%sK' % (self.urls["flash1"], self._rtmpchannel(realstudio), self.urls["flash2"], videoid.group(1), videoid.group(2), urllib.quote(videoid.group(3)), quality) + playpath + swfverify
#urls[quality] = '%s/%s/%s%s/%s/%s_%sK' % (self.urls["flash1"], self._rtmpchannel(realstudio), self.urls["flash2"], videoid.group(1), videoid.group(2), urllib.quote(videoid.group(3)), quality) + swfverify
# elif geo.group(1) == 'no':
# for quality in qualities:
# urls[quality] = '%s/%s/%s%s/%s/%s_%s.%s' % (self.urls["http1"], "four", self.urls["http2"], videoid.group(1), videoid.group(2), urllib.quote(videoid.group(3)), quality, "mp4")
else:
sys.stderr.write("_geturls: No videoplayer")
else:
sys.stderr.write("_geturls: No videoid")
else:
sys.stderr.write("_geturls: No page.doc")
return urls
示例4: show
def show(self, catid, title, provider): #Show video items from a TV Show style TV3 webpage
baseurl = ""
if catid[:4] != "http":
baseurl = self.urls["base"]
geturl = "%s%s" % (baseurl, catid)
page = webpage(geturl)
if page.doc:
div_tag = SoupStrainer('div')
html_divtag = BeautifulSoup(page.doc, parseOnlyThese = div_tag)
tables = html_divtag.find(attrs={"xmlns:msxsl": "urn:schemas-microsoft-com:xslt"})
if tables:
programs = tables.findAll('table')
if len(programs) > 0:
count = 0
for soup in programs:
self.xbmcitems.items.append(self._itemshow(soup, provider, title))
count += 1
self.xbmcitems.addall()
else:
programs = tables.findAll('tr')
if len(programs) > 0:
count = -1
for soup in programs:
count += 1
if count > 0:
self.xbmcitems.items.append(self._itemtable(soup, provider, title))
self.xbmcitems.addall()
else:
sys.stderr.write("show: Couldn't find any videos in list")
else:
sys.stderr.write("show: Couldn't find video list")
else:
sys.stderr.write("show: Couldn't get index webpage")
示例5: index
def index(self, type = 'showall', id = ""):
page = webpage('/'.join([self.urls['base'], self.urls['media'], type, id]))
if page.doc:
div_tag = SoupStrainer('div')
html_divtag = BeautifulSoup(page.doc, parseOnlyThese = div_tag)
programmes = html_divtag.findAll(attrs={'class' : 'col gu1 video'})
if len(programmes) > 0:
for program in programmes:
item = tools.xbmcItem()
link = re.search("/media/([a-z]+)/([0-9]+)", program.p.a['href'])
if link:
item.info["Title"] = program.p.span.string
item.info["Thumb"] = "%s%s" % (self.urls['base'], program.p.a.img['src'])
if link.group(1) == "view":
item.info["Title"] += ' ' + program.p.span.next.next.next.next.next.string.strip()[6:].strip()
if self.prefetch:
item.info["FileName"] = self._geturl(link.group(2))
else:
item.playable = True
item.info["FileName"] = "%s?ch=%s&view=%s&info=%s" % (self.base, self.channel, link.group(2), item.infoencode())
else:
item.info["FileName"] = "%s?ch=%s&type=%s&id=%s" % (self.base, self.channel, link.group(1), link.group(2))
self.xbmcitems.items.append(item)
return self.xbmcitems.addall()
else:
sys.stderr.write("index: no programmes")
else:
sys.stderr.write("index: no page.doc")
示例6: sections
def sections(self, section):
page = webpage('%s/%s/%s/' % (self.urls['base'], section, self.urls['videos']))
if page.doc:
div_tag = SoupStrainer('div')
html_divtag = BeautifulSoup(page.doc, parseOnlyThese = div_tag)
#landing = html_divtag.find(attrs = {'id' : 'landing_video'})
gallery = html_divtag.find(attrs = {'class' : 'gallery_box'})
if gallery:
videos = gallery.findAll('div')
if len(videos) > 0:
for video in videos:
link = video.find("a")
if link:
if link.string:
item = tools.xbmcItem(self.channel)
link = video.find("a")
item['videoInfo']["Title"] = link.string.strip()
image = video.find("img")
if image:
item['videoInfo']["Thumb"] = image['src']
videoid = re.match('/%s/%s/([0-9]+)/' % (section, self.urls['videos']), link['href'])
if videoid:
if self.prefetch:
item.urls = [self._geturl(section, videoid)]
else:
item.playable = True
item['videoInfo']["FileName"] = "%s?ch=%s§ion=%s&id=%s" % (self.base, self.channel, section, videoid.group(1))
self.xbmcitems.items.append(item)
return self.xbmcitems.addall()
else:
sys.stderr.write("sections: no videos")
else:
sys.stderr.write("sections: no gallery_box")
else:
sys.stderr.write("sections: no page.doc")
示例7: _getMetadata
def _getMetadata(self, index):
metadata = {}
page = webpage("%s/playlist/null/%s" % (self.urls['base'], index))
if page.doc:
metadata['id'] = index
xmldom = self._xml(page.doc)
if not xmldom:
return {}
metadata['Title'] = xmldom.getElementsByTagName('title')[0].firstChild.data.strip()
metadata['Plot'] = xmldom.getElementsByTagName('description')[0].firstChild.data.strip()
metadata['Thumb'] = xmldom.getElementsByTagName('jwplayer:image')[0].firstChild.nodeValue
if not metadata['Thumb'].startswith("http://"):
metadata['Thumb'] = self.urls['base'] + metadata['Thumb']
srcUrls = xmldom.getElementsByTagName('jwplayer:source')
urls = {}
for srcUrl in srcUrls:
url = srcUrl.getAttribute('file')
if not url.startswith("http://"):
url = self.urls['base'] + url
# in format 720p or 540p
size = srcUrl.getAttribute('label')
size = int(size[:-1])
urls[size] = url
metadata['urls'] = urls
return metadata
示例8: show
def show(self, id, search = False):
if search:
import urllib
url = "%s/%s/%s?q=%s" % (self.urls['base'], self.urls['search'], self.urls['page'], urllib.quote_plus(id))
else:
url = self.url(id)
page = webpage(url)
xml = self._xml(page.doc)
if xml:
for show in xml.getElementsByTagName('Show'):
se = re.search('/content/(.*)_(episodes|extras)_group/ps3_xml_skin.xml', show.attributes["href"].value)
if se:
if se.group(2) == "episodes":
#videos = int(show.attributes["videos"].value) # Number of Extras
#episodes = int(show.attributes["episodes"].value) # Number of Episodes
#channel = show.attributes["channel"].value
item = tools.xbmcItem()
info = item.info
info["FileName"] = "%s?ch=%s&type=singleshow&id=%s%s" % (self.base, self.channel, se.group(1), self.urls['episodes'])
info["Title"] = show.attributes["title"].value
info["TVShowTitle"] = info["Title"]
#epinfo = self.firstepisode(se.group(1))
#if epinfo:
# info = dict(epinfo.items() + info.items())
self.xbmcitems.items.append(item)
#self.xbmcitems.type = "tvshows"
self.xbmcitems.addall()
示例9: _geturls
def _geturls(self, title):
url = "%s%s%s" % (self.urls['base'], self.urls['json'], title)
page = webpage(url)
returnurls = dict()
if page.doc:
import json
videos = json.loads(page.doc)
allurls = dict()
filesizes = dict()
video = videos[0]
for vidFormat, items in video.iteritems():
if type(items) is not dict:
continue
allurls[vidFormat] = dict()
filesizes[vidFormat] = dict()
for name, value in items.iteritems():
if name[-4:] == '_res':
bitrate = name[:-4]
if not bitrate in allurls[vidFormat]:
allurls[vidFormat][bitrate] = list()
if not bitrate in filesizes[vidFormat]:
filesizes[vidFormat][bitrate] = 0
allurls[vidFormat][bitrate].append(video[vidFormat][bitrate + '_res'])
if video[vidFormat][bitrate + '_res_mb']:
filesizes[vidFormat][bitrate] += video[vidFormat][bitrate + '_res_mb']
for vidFormat, bitrates in allurls.iteritems():
for bitrate, urls in bitrates.iteritems():
size = filesizes[vidFormat][bitrate]
if not size in returnurls:
returnurls[size] = list()
returnurls[size].extend(urls)
return returnurls
示例10: episodes
def episodes(self, id):
page = webpage(self.url(id))
if page.doc:
xml = self._xml(page.doc)
if xml:
#for ep in xml.getElementsByTagName('Episode').extend(xml.getElementsByTagName('Extra')):
#for ep in map(xml.getElementsByTagName, ['Episode', 'Extra']):
count = xml.getElementsByTagName('Episode').length
for ep in xml.getElementsByTagName('Episode'):
item = self._episode(ep)
if item:
self.xbmcitems.items.append(item)
if self.prefetch:
self.xbmcitems.add(count)
for ep in xml.getElementsByTagName('Extras'):
item = self._episode(ep)
if item:
self.xbmcitems.items.append(item)
#self.xbmcitems.sorting.append("DATE")
#self.xbmcitems.type = "episodes"
#self.xbmcitems.addall()
if self.prefetch:
self.xbmcitems.sort()
else:
self.xbmcitems.addall()
示例11: showsindex
def showsindex(provider): #Create a second level list of TV Shows from a TV3 webpage
#doc = resources.tools.gethtmlpage("%s/Shows/tabid/64/Default.aspx" % ("http://www.tv3.co.nz")) #Get our HTML page with a list of video categories
#doc = resources.tools.gethtmlpage("%s/Shows.aspx" % ("http://www.tv3.co.nz")) #Get our HTML page with a list of video categories
page = webpage("%s/Shows.aspx" % ("http://www.tv3.co.nz"))
if page.doc:
html_divtag = BeautifulSoup(page.doc)
linksdiv = html_divtag.find('div', attrs = {"id": "pw_8171"})
if linksdiv:
links = linksdiv.findAll('a')
if len(links) > 0:
count = 0
for link in links:
item = tools.xbmcItem()
item.info["Title"] = link.string.strip()
catid = link['href']
if item.info["Title"] == "60 Minutes": #The URL on the next line has more videos
item.info["FileName"] = "%s?ch=TV3&cat=%s&title=%s&catid=%s" % (self.base, "shows", urllib.quote(item.info["Title"]), urllib.quote(catid)) #"http://ondemand.tv3.co.nz/Default.aspx?TabId=80&cat=22"
else:
item.info["FileName"] = "%s?ch=TV3&cat=%s&title=%s&catid=%s" % (self.base, "shows", urllib.quote(item.info["Title"]), urllib.quote(catid))
self.xbmcitems.items.append(item)
self.xbmcitems.addall()
else:
sys.stderr.write("showsindex: Couldn't find any videos in list")
else:
sys.stderr.write("showsindex: Couldn't find video list")
else:
sys.stderr.write("showsindex: Couldn't get index webpage")
示例12: programmes
def programmes(self, type, urlext):
if type == "channel":
folder = 1
url = self.urls['base']
elif type == "video":
folder = 0
url = "%s/assets/php/slider.php?channel=%s" % (self.urls['base'], urlext)
elif type == "search":
folder = 0
url = "%s/search?search_keyword=%s" % (self.urls['base'], urlext.replace(" ", "+"))
page = webpage(url)
if page.doc:
if type == "channel" or type == "search":
div_tag = SoupStrainer('div')
html_divtag = BeautifulSoup(page.doc, parseOnlyThese = div_tag)
programmes = html_divtag.findAll(attrs={'class' : 'programmes'})
elif type == "video":
div_tag = SoupStrainer('body')
html_divtag = BeautifulSoup(page.doc, parseOnlyThese = div_tag)
programmes = html_divtag.findAll(attrs={'class' : 'slider slider-small'})
if type == "search":
type = "video"
if len(programmes) > 0:
for program in programmes:
list = program.find('ul')
if list:
listitems = list.findAll('li')
count = len(listitems)
if count > 0:
for listitem in listitems:
link = listitem.find('a', attrs={'href' : re.compile("^/%s/" % type)})
if link.img:
if re.search("assets/images/%ss/" % type, link.img["src"]):
#item = tools.xbmcItem()
item = tools.xbmcItem()
if listitem.p.string:
item.info["Title"] = listitem.p.string.strip()
else:
item.info["Title"] = link.img["alt"]
item.info["Thumb"] = "%s/%s" % (self.urls['base'], link.img["src"])
index = re.search("assets/images/%ss/([0-9]*?)-mini.jpg" % type, link.img["src"]).group(1)
item.info["FileName"] = "%s?ch=%s&%s=%s" % (self.base, self.channel, type, urllib.quote(index))
if type == "video":
if self.prefetch:
item.info["FileName"] = self._geturl(index)
else:
item.playable = True
self.xbmcitems.items.append(item)
if self.prefetch:
self.xbmcitems.add(count)
if self.prefetch:
self.xbmcitems.sort()
else:
self.xbmcitems.addall()
else:
sys.stderr.write("Search returned no results")
else:
sys.stderr.write("Couldn't find any programs")
else:
sys.stderr.write("Couldn't get page")
示例13: GetSwfUrl
def GetSwfUrl(self, qsData):
url = "http://c.brightcove.com/services/viewer/federated_f9?&" + urllib.urlencode(qsData)
page = webpage(url, agent='chrome')
location = page.redirUrl
base = location.split(u"?",1)[0]
location = base.replace(u"BrightcoveBootloader.swf", u"federatedVideoUI/BrightcoveBootloader.swf")
return location
示例14: shows
def shows(self, channel): #Create a second level list of TV Shows from a TV3 webpage
#doc = resources.tools.gethtmlpage("%s/Shows/tabid/64/Default.aspx" % ("http://www.tv3.co.nz")) #Get our HTML page with a list of video categories
#doc = resources.tools.gethtmlpage("%s/Shows.aspx" % ("http://www.tv3.co.nz")) #Get our HTML page with a list of video categories
page = webpage("%s/%s/%s" % (self.channels[channel]['base'], self.channels[channel]['ondemand'], "TitleAZ.aspx"))
if page.doc:
html_divtag = BeautifulSoup(page.doc)
showsdiv = html_divtag.findAll('div', attrs = {"class": "grid_2"})
if len(showsdiv) > 0:
for show in showsdiv:
item = tools.xbmcItem()
title = show.find('p').find('a')
if title:
if title.string:
if title['href'][len('http://www.'):len('http://www.') + 3] == channel[0:3].lower():
item.info["Title"] = title.string.strip()
image = show.find("img")
if image:
item.info["Thumb"] = image['src']
item.info["FileName"] = "%s?ch=TV3&channel=%s&cat=%s&title=%s" % (self.base, channel, "show", urllib.quote(item.info["Title"].replace(" ", "")))
self.xbmcitems.items.append(item)
self.xbmcitems.addall()
else:
sys.stderr.write("showsindex: Couldn't find any videos in list")
else:
sys.stderr.write("showsindex: Couldn't get index webpage")
示例15: search
def search(self, query):
import urllib
qid = urllib.quote_plus(query)
qs = "&requiredfields=type:programme.site:tv&partialfields=programme-title:%s&fq=programme-title:%s&fq=type:programme&fq=site:tv&num=999" % (qid, qid)
url = "%s/%s/%s?q=%s%s" % (self.urls['base'], self.urls['search'], self.urls['searchpage'], qid, qs)
page = webpage(url)
soup = BeautifulSoup(page.doc)
if soup:
for show in soup.findAll('ul', attrs={'class' : 'showDetailsMain'}):
channel = show.find('li', attrs={'class' : "channel"}).contents[0].strip()
item = tools.xbmcItem(channel, self.channel)
info = item['videoInfo']
urlIn = show.a['href']
if not urlIn.startswith('http://'):
urlIn = self.urls['base'] + urlIn
info['urlIn'] = urlIn
m = re.match(r'^.*?-(\d+)', urlIn)
if not m:
continue
id = m.group(1)
info["Title"] = show.a.contents[0].strip()
info["Date"] = show.find('li', attrs={'class' : 'date'}).contents[0].strip()
info["TVShowTitle"] = info["Title"]
info["Plot"] = show.find('li', attrs={'class' : 'details'}).contents[0].strip()
info["FileName"] = "%s?ch=%s&id=%s&type=shows" % (self.base, self.channel, id)
self.xbmcitems.items.append(item)
return self.xbmcitems.addall()