本文整理汇总了Python中modules.libraries.client.parseDOM函数的典型用法代码示例。如果您正苦于以下问题:Python parseDOM函数的具体用法?Python parseDOM怎么用?Python parseDOM使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了parseDOM函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: resolve
def resolve(self, url):
try:
url = urlparse.urlparse(url).path
result = ""
links = [self.link_1, self.link_2, self.link_3]
for base_link in links:
result = client.request(urlparse.urljoin(base_link, url), headers=self.headers)
if "showvideo" in str(result):
break
result = result.decode("iso-8859-1").encode("utf-8")
url = client.parseDOM(result, "div", attrs={"id": "showvideo"})[0]
url = url.replace("<IFRAME", "<iframe").replace(" SRC=", " src=")
url = client.parseDOM(url, "iframe", ret="src")[0]
url = client.replaceHTMLCodes(url)
try:
url = urlparse.parse_qs(urlparse.urlparse(url).query)["u"][0]
except:
pass
try:
url = urlparse.parse_qs(urlparse.urlparse(url).query)["url"][0]
except:
pass
url = resolvers.request(url)
return url
except:
return
示例2: get_episode
def get_episode(self, url, imdb, tvdb, title, date, season, episode):
try:
if url == None: return
query = self.tvbase_link + self.index_link
post = urllib.urlencode({'a': 'retrieve', 'c': 'result', 'p': '{"KeyWord":"%s","Page":"1","NextToken":""}' % url})
result = client.source(query, post=post)
result = result.decode('iso-8859-1').encode('utf-8')
result = client.parseDOM(result, "tr")
show = cleantitle.tv(url)
season = '%01d' % int(season)
episode = '%02d' % int(episode)
result = [client.parseDOM(i, "h1")[0] for i in result]
result = [(client.parseDOM(i, "a", ret="href")[0], client.parseDOM(i, "a")[0]) for i in result]
result = [(i[0], re.sub('\sSeason(|\s)\d*.+', '', i[1]), re.compile('\sSeason *(\d*) *').findall(i[1])[0]) for i in result]
result = [i for i in result if show == cleantitle.tv(i[1])]
result = [i[0] for i in result if season == i[2]][0]
url = result.split('v=', 1)[-1]
url = '%s|%s' % (url, episode)
url = client.replaceHTMLCodes(url)
url = url.encode('utf-8')
return url
except:
return
示例3: resolve
def resolve(url):
try:
result = client.request(url, close=False)
post = {}
f = client.parseDOM(result, "Form", attrs={"action": ""})
k = client.parseDOM(f, "input", ret="name", attrs={"type": "hidden"})
for i in k:
post.update({i: client.parseDOM(f, "input", ret="value", attrs={"name": i})[0]})
post.update(captcha.request(result))
post = urllib.urlencode(post)
request = urllib2.Request(url, post)
for i in range(0, 5):
try:
response = urllib2.urlopen(request, timeout=10)
result = response.read()
response.close()
if "download2" in result:
raise Exception()
url = client.parseDOM(result, "a", ret="href", attrs={"target": ""})[0]
return url
except:
time.sleep(1)
except:
return
示例4: get_episode
def get_episode(self, url, imdb, tvdb, title, date, season, episode):
try:
if url == None: return
title = url
hdlr = 'S%02dE%02d' % (int(season), int(episode))
query = self.search_link % (urllib.quote_plus('%s "%s"' % (title, hdlr)))
query = urlparse.urljoin(self.tvbase_link, query)
result = client.source(query)
result = client.parseDOM(result, "header", attrs = { "class": "post-title" })
title = cleantitle.tv(title)
result = [(client.parseDOM(i, "a", ret="href"), client.parseDOM(i, "a")) for i in result]
result = [(i[0][0], i[1][0]) for i in result if len(i[0]) > 0 and len(i[1]) > 0]
result = [(i[0], re.compile('(.+?) (S\d*E\d*)').findall(i[1])) for i in result]
result = [(i[0], i[1][0][0], i[1][0][1]) for i in result if len(i[1]) > 0]
result = [i for i in result if title == cleantitle.tv(i[1])]
result = [i[0] for i in result if hdlr == i[2]][0]
url = result.replace(self.tvbase_link, '')
url = client.replaceHTMLCodes(url)
url = url.encode('utf-8')
return url
except:
return
示例5: get_show
def get_show(self, imdb, tvdb, show, show_alt, year):
try:
result = client.source(self.base_link)
if not "'index show'" in result:
cookie = client.source(self.sign_link, post=self.key_link, output="cookie")
result = client.source(self.base_link, cookie=cookie)
result = client.parseDOM(result, "div", attrs={"class": "index show"})
result = [
(
client.parseDOM(i, "a", attrs={"class": "name"})[0],
client.parseDOM(i, "span", attrs={"class": "value"})[0],
client.parseDOM(i, "a", ret="href")[0],
)
for i in result
]
shows = [cleantitle.tv(show), cleantitle.tv(show_alt)]
years = [str(year), str(int(year) + 1), str(int(year) - 1)]
result = [i for i in result if any(x in i[1] for x in years)]
result = [i[2] for i in result if any(x == cleantitle.tv(i[0]) for x in shows)][0]
try:
url = re.compile("//.+?(/.+)").findall(result)[0]
except:
url = result
url = client.replaceHTMLCodes(url)
url = url.encode("utf-8")
return url
except:
return
示例6: resolve
def resolve(url):
try:
result = client.request(url)
post = {}
f = client.parseDOM(result, "Form", attrs = { "action": "" })
k = client.parseDOM(f, "input", ret="name", attrs = { "type": "hidden" })
for i in k: post.update({i: client.parseDOM(f, "input", ret="value", attrs = { "name": i })[0]})
post.update({'method_free': 'Watch Free!'})
post = urllib.urlencode(post)
result = client.request(url, post=post)
result = result.replace('\\/', '/').replace('\n', '').replace('\'', '"').replace(' ', '')
swfUrl = re.compile('\.embedSWF\("(.+?)"').findall(result)[0]
swfUrl = urlparse.urljoin(url, swfUrl)
streamer = re.compile('flashvars=.+?"file":"(.+?)"').findall(result)[0]
playpath = re.compile('flashvars=.+?p2pkey:"(.+?)"').findall(result)[0]
url = '%s playpath=%s conn=S:%s pageUrl=%s swfUrl=%s swfVfy=true timeout=20' % (streamer, playpath, playpath, url, swfUrl)
return url
except:
return
示例7: resolve
def resolve(self, url):
try:
result = client.request(url)
if not "my_video" in result:
cookie = client.request(self.sign_link, post=self.key_link, output="cookie")
result = client.request(url, cookie=cookie)
url = None
try:
url = client.parseDOM(result, "source", ret="src", attrs={"type": "video/webm"})[0]
except:
pass
try:
url = client.parseDOM(result, "source", ret="src", attrs={"type": "video/mp4"})[0]
except:
pass
if url == None:
return
url = urlparse.urljoin(self.base_link, url)
url = "%s|Cookie=%s" % (url, urllib.quote_plus("video=true"))
return url
except:
return
示例8: get_movie
def get_movie(self, imdb, title, year):
try:
query = urlparse.urljoin(self.base_link, self.moviesearch_link + urllib.quote_plus(title))
result = cloudflare.source(query)
r = client.parseDOM(result, "li", attrs = { "class": "first element.+?" })
r += client.parseDOM(result, "li", attrs = { "class": "element.+?" })
r += client.parseDOM(result, "header", attrs = { "class": "entry-header" })
title = cleantitle.movie(title)
years = ['(%s)' % str(year), '(%s)' % str(int(year)+1), '(%s)' % str(int(year)-1)]
result = [(client.parseDOM(i, "a", ret="href"), client.parseDOM(i, "a")) for i in r]
result = [(i[0][0], i[1][0]) for i in result if len(i[0]) > 0 and len(i[1]) > 0]
result = [(i[0], re.sub('<.+?>', '', i[1])) for i in result]
result = [i for i in result if title == cleantitle.movie(i[1])]
result = [i[0] for i in result if any(x in i[1] for x in years)][0]
try: url = re.compile('//.+?(/.+)').findall(result)[0]
except: url = result
url = client.replaceHTMLCodes(url)
url = url.encode('utf-8')
return url
except:
return
示例9: get_movie
def get_movie(self, imdb, title, year):
try:
query = self.search_link % urllib.quote_plus(title)
query = urlparse.urljoin(self.base_link, query)
result = cloudflare.source(query)
result = result.decode("iso-8859-1").encode("utf-8")
result = client.parseDOM(result, "div", attrs={"class": "movie_table"})
title = cleantitle.movie(title)
years = ["(%s)" % str(year), "(%s)" % str(int(year) + 1), "(%s)" % str(int(year) - 1)]
result = [(client.parseDOM(i, "a", ret="href")[0], client.parseDOM(i, "a", ret="title")[1]) for i in result]
result = [i for i in result if title == cleantitle.movie(i[1])]
result = [i[0] for i in result if any(x in i[1] for x in years)][0]
url = client.replaceHTMLCodes(result)
try:
url = urlparse.parse_qs(urlparse.urlparse(url).query)["u"][0]
except:
pass
url = urlparse.urlparse(url).path
url = url.encode("utf-8")
return url
except:
return
示例10: get_movie
def get_movie(self, imdb, title, year):
try:
query = self.search_link % (urllib.quote_plus(title))
query = urlparse.urljoin(self.base_link, query)
result = client.source(query)
result = client.parseDOM(result, "div", attrs = { "id": "post-.+?" })
title = cleantitle.movie(title)
years = ['%s' % str(year), '%s' % str(int(year)+1), '%s' % str(int(year)-1)]
result = [(client.parseDOM(i, "a", ret="href")[0], client.parseDOM(i, "a", ret="title")[0], client.parseDOM(i, "div", attrs = { "class": "status status-year" }), client.parseDOM(i, "div", attrs = { "class": "mark-8" })) for i in result]
result = [(i[0], i[1], i[2][0], i[3]) for i in result if len(i[2]) > 0]
result = [(i[0], i[1], i[2], i[3], re.compile('Season (\d*)$').findall(i[1])) for i in result]
result = [(i[0], i[1], i[2], i[3]) for i in result if len(i[4]) == 0]
result = [(i[0], i[1], i[2]) for i in result if len(i[3]) == 0]
result = [i for i in result if title == cleantitle.movie(i[1])]
result = [i[0] for i in result if any(x in i[2] for x in years)][0]
try: url = re.compile('//.+?(/.+)').findall(result)[0]
except: url = result
url = client.replaceHTMLCodes(url)
url = url.encode('utf-8')
return url
except:
return
示例11: get_movie
def get_movie(self, imdb, title, year):
try:
query = self.search_link % (urllib.quote_plus(title))
query = urlparse.urljoin(self.base_link, query)
result = client.source(query)
result = client.parseDOM(result, "div", attrs = { "class": "home_post_cont.+?" })
title = cleantitle.movie(title)
years = ['(%s)' % str(year), '(%s)' % str(int(year)+1), '(%s)' % str(int(year)-1)]
result = [(client.parseDOM(i, "a", ret="href")[0], client.parseDOM(i, "img", ret="title")[0]) for i in result]
result = [(i[0], client.replaceHTMLCodes(i[1])) for i in result]
result = [(i[0], client.parseDOM(i[1], "a")) for i in result]
result = [(i[0], i[1][0]) for i in result if len(i[1]) > 0]
result = [i for i in result if title == cleantitle.movie(i[1])]
result = [i[0] for i in result if any(x in i[1] for x in years)][0]
try: url = re.compile('//.+?(/.+)').findall(result)[0]
except: url = result
url = client.replaceHTMLCodes(url)
url = url.encode('utf-8')
return url
except:
return
示例12: get_episode
def get_episode(self, url, imdb, tvdb, title, date, season, episode):
try:
if url == None: return
season = '%01d' % int(season)
episode = '%01d' % int(episode)
query = '%s "Season %s" "Episode %s"' % (url, season, episode)
query = urlparse.urljoin(self.base_link, self.tvsearch_link + urllib.quote_plus(query))
result = cloudflare.source(query)
result = client.parseDOM(result, "header", attrs = { "class": "entry-header" })
show = cleantitle.tv(url)
result = [(client.parseDOM(i, "a", ret="href"), client.parseDOM(i, "a")) for i in result]
result = [(i[0][0], i[1][0]) for i in result if len(i[0]) > 0 and len(i[1]) > 0]
result = [(i[0], re.compile('(.+?): Season (\d*).+?Episode (\d*)').findall(i[1])) for i in result]
result = [(i[0], i[1][0][0], i[1][0][1], i[1][0][2]) for i in result if len(i[1]) > 0]
result = [i for i in result if season == '%01d' % int(i[2]) and episode == '%01d' % int(i[3])]
result = [i[0] for i in result if show == cleantitle.tv(i[1])][0]
url = client.replaceHTMLCodes(result)
try: url = urlparse.parse_qs(urlparse.urlparse(url).query)['u'][0]
except: pass
url = urlparse.urlparse(url).path
url = url.encode('utf-8')
return url
except:
return
示例13: resolve
def resolve(url):
try:
result = client.request(url, mobile=True, close=False)
try:
post = {}
f = client.parseDOM(result, "Form", attrs = { "method": "POST" })[0]
f = f.replace('"submit"', '"hidden"')
k = client.parseDOM(f, "input", ret="name", attrs = { "type": "hidden" })
for i in k: post.update({i: client.parseDOM(f, "input", ret="value", attrs = { "name": i })[0]})
post = urllib.urlencode(post)
except:
post=None
for i in range(0, 10):
try:
result = client.request(url, post=post, mobile=True, close=False)
result = result.replace('\n','')
result = re.compile('sources *: *\[.+?\]').findall(result)[-1]
result = re.compile('file *: *"(http.+?)"').findall(result)
url = [i for i in result if '.m3u8' in i]
if len(url) > 0: return url[0]
url = [i for i in result if not '.m3u8' in i]
if len(url) > 0: return url[0]
except:
time.sleep(1)
except:
return
示例14: get_movie
def get_movie(self, imdb, title, year):
try:
query = urlparse.urljoin(self.base_link, self.moviesearch_link + urllib.quote_plus(title))
result = cloudflare.source(query)
if result == None: result = client.source(self.agent_link + urllib.quote_plus(query))
result = result.replace('> ', '>').replace(' <', '<')
r = client.parseDOM(result, "li", attrs = { "class": "first element.+?" })
r += client.parseDOM(result, "li", attrs = { "class": "element.+?" })
title = cleantitle.movie(title)
years = ['(%s)' % str(year), '(%s)' % str(int(year)+1), '(%s)' % str(int(year)-1)]
result = [(client.parseDOM(i, "a", ret="href"), re.compile('>(.+?\(\d{4}\))<').findall(i)) for i in r]
result = [(i[0][0], i[1][0]) for i in result if len(i[0]) > 0 and len(i[1]) > 0]
result = [(i[0], i[1].split('>')[-1]) for i in result]
result = [i for i in result if title == cleantitle.movie(i[1])]
result = [i[0] for i in result if any(x in i[1] for x in years)][0]
url = client.replaceHTMLCodes(result)
try: url = urlparse.parse_qs(urlparse.urlparse(url).query)['u'][0]
except: pass
url = urlparse.urlparse(url).path
url = url.encode('utf-8')
return url
except:
return
示例15: get_show
def get_show(self, imdb, tvdb, show, show_alt, year):
# import logging
# LOG_FILENAME = '/home/keeganmccallum/test.log'
# logging.basicConfig(filename=LOG_FILENAME,level=logging.DEBUG)
# logging.debug(','.join([imdb, tvdb, show, show_alt, year]))
try:
result = client.source(self.base_link)
if not "'index show'" in result:
cookie = client.source(self.sign_link, post=self.key_link, output='cookie')
result = client.source(self.base_link, cookie=cookie)
result = client.parseDOM(result, "div", attrs = { "class": "index show" })
result = [(client.parseDOM(i, "a", attrs = { "class": "name" })[0], client.parseDOM(i, "span", attrs = { "class": "value" })[0], client.parseDOM(i, "a", ret="href")[0]) for i in result]
shows = [cleantitle.tv(show), cleantitle.tv(show_alt)]
years = [str(year), str(int(year)+1), str(int(year)-1)]
result = [i for i in result if any(x in i[1] for x in years)]
result = [i[2] for i in result if any(x == cleantitle.tv(i[0]) for x in shows)][0]
try: url = re.compile('//.+?(/.+)').findall(result)[0]
except: url = result
url = client.replaceHTMLCodes(url)
url = url.encode('utf-8')
return url
except:
return