本文整理汇总了Python中core.decoder.Decoder.extract方法的典型用法代码示例。如果您正苦于以下问题:Python Decoder.extract方法的具体用法?Python Decoder.extract怎么用?Python Decoder.extract使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类core.decoder.Decoder
的用法示例。
在下文中一共展示了Decoder.extract方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: getChannels
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extract [as 别名]
def getChannels(page):
x = []
if str(page) == "0":
page = Live9net.MAIN_URL
html = Live9net.getContentFromUrl(page, "", Live9net.cookie, "")
# print html
if html.find("ESPN</") > -1: # it's a list, needs decode
table = Decoder.extract("ESPN</", "<div>", html)
x = Live9net.extractElements(table)
logger.debug("live9 channels logic done!")
else:
iframeUrl = Decoder.extract('src="', '"></iframe>', html)
html2 = Live9net.getContentFromUrl(iframeUrl, "", Live9net.cookie, page)
# print html2
if html2.find('src="http://sawlive.tv/') > -1 or html2.find('src="http://www3.sawlive') > -1:
if html2.find('src="http://sawlive.tv/') > -1:
scriptSrc = Decoder.extractWithRegex("http://sawlive", '"></script>', html2).replace(
'"></script>', ""
)
else:
scriptSrc = Decoder.extractWithRegex("http://www3.sawlive", '"></script>', html2).replace(
'"></script>', ""
)
finalRtmpUrl = Decoder.extractSawlive(scriptSrc, iframeUrl)
element = {}
element["link"] = finalRtmpUrl
element["title"] = "Watch channel"
element["permalink"] = True
logger.debug("finished append element!")
x.append(element)
return x
示例2: extractFinalRtmpUrl
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extract [as 别名]
def extractFinalRtmpUrl(url,referer):
rtmpUrl = ""
html = Mamahdcom.getContentFromUrl(url,"",Mamahdcom.cookie,referer)
if 'file:\'' in html:
file = Decoder.extract("file:'",'\',',html)
rtmp = file[0:file.rfind("/") + 1]
playpath = file[file.rfind("/") + 1:]
swfUrl = ""
secureToken = "SECURET0KEN#yw%.?()@W!"
if url.find("hdcast.org") > -1:
swfUrl = "http://player.hdcast.org/jws/jwplayer.flash.swf"
rtmpUrl = rtmp + " playPath=" + playpath + " swfUrl=" + swfUrl + " pageUrl=" + url + " flashver=WIN/2019,0,0,226 live=true timeout=14 token=" + secureToken
logger.debug("built final rtmp link: " + rtmpUrl)
elif 'allowtransparency="true" src=' in html:
logger.debug("using second way...")
secondIframe = Decoder.extract('allowtransparency="true" src=', ' ', html).replace("&","&")
logger.debug("found second way url: " + secondIframe+", referer: "+url)
headers = {
"User-Agent": Downloader.USER_AGENT,
"Accept-Language": "en-US,en;q=0.8,es-ES;q=0.5,es;q=0.3",
"Upgrade-Insecure-Requests" : "1",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Referer": url
}
html2 = Mamahdcom.getContentFromUrl(url=secondIframe,headers=headers)
logger.debug("html2 is: "+html2)
if 'file:"' in html2:
rtmpUrl = Decoder.extract('file:"', '",', html2)
logger.debug("using m3u8 for: "+rtmpUrl)
return rtmpUrl
示例3: getChannels
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extract [as 别名]
def getChannels(page='0'):
x = []
if str(page) == '0':
page = Yomvies.CHANNELS_PAGE
logger.debug("loading json data from: "+page)
bruteJSON = Yomvies.getContentFromUrl(page,"",Yomvies.cookie,Yomvies.MAIN_URL)
logger.debug("parsing string to json...")
i = 0
for jsonChannel in bruteJSON.split('{"CodCadenaTv":'):
if i>0:
element = {}
codTv = Decoder.extract('"','"',jsonChannel)
element["title"] = Decoder.extract('"Nombre":"','"',jsonChannel)
element["thumbnail"] = Decoder.extract('"Logo":"','"',jsonChannel).replace("\\","")
m3u8Url = Decoder.extract('"PuntoReproduccion":"','"',jsonChannel).replace("{network}",Yomvies.NETWORK).replace("\\","")
logger.debug("Appending channel: "+element["title"]+", with url: "+m3u8Url+", img: "+element["thumbnail"])
headers = 'Referer='+codTv
element["link"] = m3u8Url+"|"+headers
x.append(element)
i+=1
else:
link = Yomvies.extractTargetVideo(page)
element = {}
element["title"] = page
element["link"] = link
element["finalLink"] = True
x.append(element)
return x
示例4: extractAllVideosFromHtml
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extract [as 别名]
def extractAllVideosFromHtml(html):
x = []
tableHtml = Decoder.extract('class="item-section">','<div class="branded-page-box search-pager',html)
i=0
for rowHtml in tableHtml.split('<div class="yt-lockup-dismissable yt-uix-tile">'):
if i>0:
logger.debug("row html is: "+rowHtml)
element = {}
link = "/watch?"+Decoder.extract('href="/watch?', '"', rowHtml)
title = Decoder.extract(' title="','"', rowHtml)
if 'youtube.com' not in link:
link = Youtube.MAIN_URL+link
logger.debug("link: " + link + ", title is: " + title)
image = Decoder.extractWithRegex('https://i.ytimg.com/','"',rowHtml).replace('"','')
element["title"] = title
element["page"] = link
if '&list=' not in link:
element["finalLink"] = True
element["thumbnail"] = image
x.append(element)
i+=1
#add next if pagination exists
if '<div class="branded-page-box search-pager spf-link ">' in html:
bruteHtmlPaginate = Decoder.rExtract('<div class="branded-page-box search-pager spf-link ">','<div class="branded-page-v2-secondary-col">',html)
title = Decoder.rExtract(">","</span></a>",bruteHtmlPaginate)
title = title[:len(title)-2]
link = Decoder.rExtract('href="','" class="yt-uix-button', bruteHtmlPaginate)
if 'youtube.com' not in link:
link = Youtube.MAIN_URL + link
element = {}
element["title"] = title
element["page"] = link
logger.debug("link: " + link + ", title is: " + title)
x.append(element)
return x
示例5: extractElements
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extract [as 别名]
def extractElements(table):
x = []
i = 0
for value in table.split('<a '):
logger.debug("loop: "+str(i))
if i>0:
element = {}
logger.debug("processing html: "+value)
if value.find('<img ')==-1:
title = Decoder.rExtract(">",'</a></li>',value)
link = Decoder.extract("href=\"/",'"',value)
if title == '</a':
title = Decoder.extract('class="menu-item">','<',value).replace(" ","")
element["title"] = title
element["link"] = Skylinewebcamscom.MAIN_URL+link
if len(title)>0 and link.find("#")==-1 and len(element["link"])>len(Skylinewebcamscom.MAIN_URL) and (title.find("<")==-1 and title.find(">")==-1):
logger.debug("append: "+title+", link: "+element["link"])
x.append(element)
else:
logger.debug("discarted: "+title+", link: "+element["link"])
else:
img = "http://"+Decoder.extract("data-original=\"//",'" ',value)
title = Decoder.extract("class=\"title\">",'</span>',value)
link = Decoder.extract("href=\"/",'"',value)
element["title"] = title
element["link"] = Skylinewebcamscom.MAIN_URL+link
element["thumbnail"] = img
element["permaLink"] = True
if link.find(".html")>-1 and (title.find("<")==-1 and title.find(">")==-1):
logger.debug("append: "+title+", link: "+element["link"]+", img: "+element["thumbnail"])
x.append(element)
else:
logger.debug("discarted: "+title+", link: "+element["link"]+", img: "+element["thumbnail"])
i+=1
return x
示例6: drawBbcCoUkNew
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extract [as 别名]
def drawBbcCoUkNew(url):
htmlContent = Downloader.getContentFromUrl(url=url)
title = Decoder.extract('<p class="story-body__introduction">', "</p><div", htmlContent)
if 'property="articleBody"' in htmlContent:
body = Decoder.extract(
'property="articleBody"',
" </div>",
htmlContent,
)
body = body.replace('<span class="off-screen">Image copyright</span>', "")
body = body.replace('<span class="story-image-copyright">AFP</span>', "")
body = body.replace('<span class="story-image-copyright">Reuters</span>', "")
body = body.replace('<span class="off-screen">Image caption</span>', "")
body = body.replace('<span class="off-screen">Media caption</span>', "")
while '<span class="media-caption__text">' in body:
line = Decoder.extractWithRegex('<span class="media-caption__text">', "</span>", body)
body = body.replace(line, "")
elif 'class="text-wrapper"' in htmlContent:
# special content
body = Decoder.extract('class="text-wrapper"', "</p>\n", htmlContent)
dates = Decoder.extractWithRegex('<div class="date', "</div>", body)
lastUpdate = Decoder.extractWithRegex('<p class="date ', "</p>", body)
body = body.replace(dates, "")
body = body.replace(lastUpdate, "")
elif '<figcaption class="sp-media-asset' in htmlContent:
body = Decoder.extract('<figcaption class="sp-media-asset', "</p><div ", htmlContent)
if ">" in body:
body = body[body.find(">") + 1 :]
body = Decoder.removeHTML(body).replace(".", ".\n").replace(">", "")
logger.debug("body is: " + body)
drawNew(textContent=(body))
示例7: extractChannel
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extract [as 别名]
def extractChannel(html,referer):
element = {}
logger.debug('processing html...')
if html.find('<iframe scrolling="no" marginwidth="0" marginheight="0" frameborder="0" width="650" height="400" src="')>-1:
element = Cineestrenostv.extractIframeChannel(html,referer)
elif html.find('.php')>-1 and referer.find(".php")==-1:
logger.debug("proccessing level 1, cookie: "+Cineestrenostv.cookie)
iframeUrl = Decoder.extractWithRegex('http://','.php',html)
if iframeUrl.find('"')>-1:
iframeUrl = iframeUrl[0:iframeUrl.find('"')]
html2 = Cineestrenostv.getContentFromUrl(iframeUrl,"",Cineestrenostv.cookie,referer)
if html2.find('<iframe scrolling="no" marginwidth="0" marginheight="0" frameborder="0" width="650" height="400" src="')>-1 or '<iframe scrolling="no" marginwidth="0" marginheight="0" frameborder="0" allowfullscreen width="653" height="403" src="' in html2:
element = Cineestrenostv.extractIframeChannel(html2,iframeUrl)
elif html.find('<iframe scrolling="no" marginwidth="0" marginheight="0" frameborder="0" width="650" height="500" src="')>-1:
iframeUrl = Decoder.extract('<iframe scrolling="no" marginwidth="0" marginheight="0" frameborder="0" width="650" height="500" src="','"></iframe>',html) #same case with different width and height: TODO: change to regex!!
html2 = Cineestrenostv.getContentFromUrl(iframeUrl,"","",referer)
if html2.find('<th scope="col"><a href="/')>-1:
partialLink = Decoder.extract('<th scope="col"><a href="/','"><font color="ffffff">',html2)
completeLink = Cineestrenostv.MAIN_URL+"/"+partialLink
html3 = Cineestrenostv.getContentFromUrl(completeLink,"",Cineestrenostv.cookie,iframeUrl)
if html3.find('<iframe scrolling="no" marginwidth="0" marginheight="0" frameborder="0" width="650" height="400" src="')>-1:
element = Cineestrenostv.extractIframeChannel(html3,completeLink)
elif referer.find("php")!=-1:
referer = referer.replace("ñ","%C3%B1")
html2 = Cineestrenostv.getContentFromUrl(referer,"",Cineestrenostv.cookie,referer)
element = Cineestrenostv.extractIframeChannel(html2,referer)
return element
示例8: getChannels
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extract [as 别名]
def getChannels(page):
x = []
logger.debug("using tvshowme...")
if str(page) == '0':
page=Tvshowme.MAIN_URL
html = Tvshowme.getContentFromUrl(page,"",Tvshowme.cookie,"")
table = Decoder.extract('<span class="yawp_wim_title">Latest 150 Posts</span> <ul>','</ul>',html)
x = Tvshowme.extractElements(table)
elif page=="search":
#display keyboard, it will wait for result
keyboard = XBMCUtils.getKeyboard()
keyboard.doModal()
text = ""
if (keyboard.isConfirmed()):
text = keyboard.getText()
page = "http://www.tvshow.me/?s="+urllib.quote(text)
html = Tvshowme.getContentFromUrl(url=page)
logger.debug(html)
table = Decoder.extract('<div id="content"', '<h3 class="assistive-text">', html)
logger.debug("table is: "+table)
x = Tvshowme.extractLinks2(table)
else:
html = Tvshowme.getContentFromUrl(page, "", Tvshowme.cookie, Tvshowme.MAIN_URL)
logger.debug(html)
table = Decoder.extract('<div id="content"', '</article>', html)
x = Tvshowme.extractLinks(table)
return x
示例9: getChannels
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extract [as 别名]
def getChannels(page):
x = []
start = False
if str(page) == '0':
start = True
page=Sports4u.MAIN_URL
html = Sports4u.getContentFromUrl(page,"",Sports4u.cookie,"")
#print html
if start and 'live-channels-list">' in html: #it's a list, needs decode
table = Decoder.extract('live-channels-list">','</li><br>',html)
logger.debug("using menu table: "+table)
x = Sports4u.extractElements(table)
logger.debug("channel list logic done!")
else:
iframeUrl = Decoder.extract('<iframe frameborder="0" marginheight="0" marginwidth="0" height="490" ','"></iframe>',html)
iframeUrl = Decoder.extract('src="','"',iframeUrl)
logger.debug("iframeUrl is: "+iframeUrl)
html2 = Sports4u.getContentFromUrl(url=iframeUrl,referer=page)
logger.debug("html is: "+html2)
file = Cricfreetv.seekIframeScript(html2,page,iframeUrl)
logger.debug("Finished file logic, obtained file: "+file)
element = {}
element["link"] = file
element["title"] = "Watch streaming"
x.append(element)
return x
示例10: getChannels
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extract [as 别名]
def getChannels(page):
x = []
if str(page) == '0':
html = Mamahdcom.getContentFromUrl(Mamahdcom.MAIN_URL,"",Mamahdcom.cookie,"")
else:
html = Mamahdcom.getContentFromUrl(page,"",Mamahdcom.cookie,"")
#print html
if page=='0': #menu
table = Decoder.extract('<div class="standard row channels">','</div>',html)
logger.debug("table is: "+table)
x = Mamahdcom.extractElements(table)
logger.debug("mamahd channels logic done!")
else:
iframeHtml = Decoder.extract("<iframe ","</iframe>",html)
iframeUrl = Decoder.extract('src="','"',iframeHtml)
html2 = Mamahdcom.getContentFromUrl(url=iframeUrl,referer=page)
logger.debug("obtained html from iframe: "+iframeUrl+"; html: "+html2)
if 'src="http://hdcast.org' in html2:
logger.debug("found script, launching logic...")
scriptUrl = Decoder.extract('<script type="text/javascript" src="','"></script>',html2)
logger.debug("extracting script url... from: "+scriptUrl)
iframeUrl2 = Mamahdcom.extractScriptIframeUrl(html2,scriptUrl,iframeUrl)
logger.debug("script url extracted: "+iframeUrl2)
finalRtmpUrl = Mamahdcom.extractFinalRtmpUrl(iframeUrl2,iframeUrl)
logger.debug("rtmp extracted is: "+finalRtmpUrl)
element = {}
element["link"] = finalRtmpUrl
element["title"] = "Watch channel"
element["permaLink"] = True
logger.debug("finished append element!")
x.append(element)
return x
示例11: searchLists
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extract [as 别名]
def searchLists(param):
url = (
"https://www.googleapis.com/customsearch/v1element?"
"key=AIzaSyCVAXiUzRYsML1Pv6RwSG1gunmMikTzQqY"
"&rsz=filtered_cse"
"&num=20"
"&hl=en"
"&prettyPrint=false"
"&source=gcsc"
"&gss=.com"
"&sig=8bdfc79787aa2b2b1ac464140255872c"
"&cx=013305635491195529773:0ufpuq-fpt0"
)
url += "&q=" + param + "&sort=date&googlehost=www.google.com&callback=google.search.Search.apiary846"
results = Pastebin.getContentFromUrl(url)
x = []
jsonString = Decoder.extract(',"results":', "]});", results)
logger.debug(jsonString)
for jsonResult in results.split('{"GsearchResultClass"'):
element = {}
link = Decoder.extract('"url":"', '","', jsonResult)
if "pastebin.com" in link and "/raw/" not in link:
link = link[: link.rfind("/")] + "/raw/" + link[link.rfind("/") + 1 :]
title = Decoder.extract('"title":"', '","titleNoFormatting"', jsonResult)
if "http" in link:
logger.debug("appending result: " + title + ", url: " + link)
element["title"] = title
element["link"] = link
x.append(element)
return x
示例12: getWidthAndHeightParams
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extract [as 别名]
def getWidthAndHeightParams(html):
subUrl = ""
if html.find("; v_width=")>-1:
width = Decoder.extract("; v_width=",";",html)
height = Decoder.extract("; v_height=",";",html)
subUrl = "&vw="+width+"&vh="+height
logger.debug("width-height subUrl now is: "+subUrl)
return subUrl
示例13: getWidthAndHeightParams
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extract [as 别名]
def getWidthAndHeightParams(html):
subUrl = ""
if html.find("; width='")>-1:
width = Decoder.extract("; width='","'",html)
height = Decoder.extract("; height='","'",html)
subUrl = "&width="+width+"&height="+height
elif html.find("; v_height=")>-1:
width = Decoder.extract("; v_width=",";",html)
height = Decoder.extract("; v_height=",";",html)
subUrl = "&vw="+width+"&vh="+height
return subUrl
示例14: extractScriptIframeUrl
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extract [as 别名]
def extractScriptIframeUrl(html,scriptUrl,referer):
iframeUrl = ""
logger.debug("extracting script iframe... url: "+scriptUrl)
scriptContent = Mamahdcom.getContentFromUrl(scriptUrl,"",Mamahdcom.cookie,referer)
#print scriptContent
iframeUrl = Decoder.extract('src="',"'",scriptContent)
logger.debug("brute iframeUrl is: "+iframeUrl)
if iframeUrl.find("?u=")>-1:
if '<script type="text/javascript"> fid="' in html:
id = Decoder.extract('<script type="text/javascript"> fid="','"; ',html)
iframeUrl = iframeUrl+id+Mamahdcom.getWidthAndHeightParams(html)
return iframeUrl
示例15: extractElements
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extract [as 别名]
def extractElements(table):
x = []
for fieldHtml in table.split('<li>'):
if fieldHtml.find("<a href=")>-1:
element = {}
element["link"] = Decoder.extract('<a href="','"',fieldHtml)
element["title"] = Decoder.extract('alt="','">',fieldHtml)
element["thumbnail"] = Decoder.extract('src="','" ',fieldHtml)
logger.debug("found title: "+element["title"]+", link: "+element["link"]+", thumbnail: "+element["thumbnail"])
if len(element["title"])>0:
x.append(element)
return x