本文整理汇总了Python中core.decoder.Decoder.extractWithRegex方法的典型用法代码示例。如果您正苦于以下问题:Python Decoder.extractWithRegex方法的具体用法?Python Decoder.extractWithRegex怎么用?Python Decoder.extractWithRegex使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类core.decoder.Decoder
的用法示例。
在下文中一共展示了Decoder.extractWithRegex方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: getChannels
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extractWithRegex [as 别名]
def getChannels(page):
x = []
if str(page) == "0":
page = Live9net.MAIN_URL
html = Live9net.getContentFromUrl(page, "", Live9net.cookie, "")
# print html
if html.find("ESPN</") > -1: # it's a list, needs decode
table = Decoder.extract("ESPN</", "<div>", html)
x = Live9net.extractElements(table)
logger.debug("live9 channels logic done!")
else:
iframeUrl = Decoder.extract('src="', '"></iframe>', html)
html2 = Live9net.getContentFromUrl(iframeUrl, "", Live9net.cookie, page)
# print html2
if html2.find('src="http://sawlive.tv/') > -1 or html2.find('src="http://www3.sawlive') > -1:
if html2.find('src="http://sawlive.tv/') > -1:
scriptSrc = Decoder.extractWithRegex("http://sawlive", '"></script>', html2).replace(
'"></script>', ""
)
else:
scriptSrc = Decoder.extractWithRegex("http://www3.sawlive", '"></script>', html2).replace(
'"></script>', ""
)
finalRtmpUrl = Decoder.extractSawlive(scriptSrc, iframeUrl)
element = {}
element["link"] = finalRtmpUrl
element["title"] = "Watch channel"
element["permalink"] = True
logger.debug("finished append element!")
x.append(element)
return x
示例2: drawBbcCoUkNew
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extractWithRegex [as 别名]
def drawBbcCoUkNew(url):
htmlContent = Downloader.getContentFromUrl(url=url)
title = Decoder.extract('<p class="story-body__introduction">', "</p><div", htmlContent)
if 'property="articleBody"' in htmlContent:
body = Decoder.extract(
'property="articleBody"',
" </div>",
htmlContent,
)
body = body.replace('<span class="off-screen">Image copyright</span>', "")
body = body.replace('<span class="story-image-copyright">AFP</span>', "")
body = body.replace('<span class="story-image-copyright">Reuters</span>', "")
body = body.replace('<span class="off-screen">Image caption</span>', "")
body = body.replace('<span class="off-screen">Media caption</span>', "")
while '<span class="media-caption__text">' in body:
line = Decoder.extractWithRegex('<span class="media-caption__text">', "</span>", body)
body = body.replace(line, "")
elif 'class="text-wrapper"' in htmlContent:
# special content
body = Decoder.extract('class="text-wrapper"', "</p>\n", htmlContent)
dates = Decoder.extractWithRegex('<div class="date', "</div>", body)
lastUpdate = Decoder.extractWithRegex('<p class="date ', "</p>", body)
body = body.replace(dates, "")
body = body.replace(lastUpdate, "")
elif '<figcaption class="sp-media-asset' in htmlContent:
body = Decoder.extract('<figcaption class="sp-media-asset', "</p><div ", htmlContent)
if ">" in body:
body = body[body.find(">") + 1 :]
body = Decoder.removeHTML(body).replace(".", ".\n").replace(">", "")
logger.debug("body is: " + body)
drawNew(textContent=(body))
示例3: extractNewIframeChannel
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extractWithRegex [as 别名]
def extractNewIframeChannel(html3,iframeUrl2):
element = {}
if html3.find("http://telefivegb.com/")>-1:
logger.debug("found telefivegb.com link, using that link to...")
newUrl = Decoder.extractWithRegex('http://telefivegb.com/','"',html3).replace('"',"")
elif html3.find("http://verlatelegratis.net")>-1:
logger.debug("found verlatelegratis.net link, using that link to...")
newUrl = Decoder.extractWithRegex('http://verlatelegratis.net','"',html3).replace('"',"")
html4 = Cineestrenostv.getContentFromUrl(newUrl,"",Cineestrenostv.cookie,iframeUrl2)
if html4.find("http://www.playerhd1.pw/")>-1:
logger.debug("found playerhd1.pw, using that link, continue...")
element = Cineestrenostv.extractScriptPlayerHd1pw(html4,newUrl)
else:
logger.debug("possible redirect to his domains: "+html4+", try again..."+newUrl)
element = Cineestrenostv.extractNewIframeChannel(html4,newUrl)
return element
示例4: extractAllVideosFromHtml
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extractWithRegex [as 别名]
def extractAllVideosFromHtml(html):
x = []
tableHtml = Decoder.extract('class="item-section">','<div class="branded-page-box search-pager',html)
i=0
for rowHtml in tableHtml.split('<div class="yt-lockup-dismissable yt-uix-tile">'):
if i>0:
logger.debug("row html is: "+rowHtml)
element = {}
link = "/watch?"+Decoder.extract('href="/watch?', '"', rowHtml)
title = Decoder.extract(' title="','"', rowHtml)
if 'youtube.com' not in link:
link = Youtube.MAIN_URL+link
logger.debug("link: " + link + ", title is: " + title)
image = Decoder.extractWithRegex('https://i.ytimg.com/','"',rowHtml).replace('"','')
element["title"] = title
element["page"] = link
if '&list=' not in link:
element["finalLink"] = True
element["thumbnail"] = image
x.append(element)
i+=1
#add next if pagination exists
if '<div class="branded-page-box search-pager spf-link ">' in html:
bruteHtmlPaginate = Decoder.rExtract('<div class="branded-page-box search-pager spf-link ">','<div class="branded-page-v2-secondary-col">',html)
title = Decoder.rExtract(">","</span></a>",bruteHtmlPaginate)
title = title[:len(title)-2]
link = Decoder.rExtract('href="','" class="yt-uix-button', bruteHtmlPaginate)
if 'youtube.com' not in link:
link = Youtube.MAIN_URL + link
element = {}
element["title"] = title
element["page"] = link
logger.debug("link: " + link + ", title is: " + title)
x.append(element)
return x
示例5: extractChannel
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extractWithRegex [as 别名]
def extractChannel(html,referer):
element = {}
logger.debug('processing html...')
if html.find('<iframe scrolling="no" marginwidth="0" marginheight="0" frameborder="0" width="650" height="400" src="')>-1:
element = Cineestrenostv.extractIframeChannel(html,referer)
elif html.find('.php')>-1 and referer.find(".php")==-1:
logger.debug("proccessing level 1, cookie: "+Cineestrenostv.cookie)
iframeUrl = Decoder.extractWithRegex('http://','.php',html)
if iframeUrl.find('"')>-1:
iframeUrl = iframeUrl[0:iframeUrl.find('"')]
html2 = Cineestrenostv.getContentFromUrl(iframeUrl,"",Cineestrenostv.cookie,referer)
if html2.find('<iframe scrolling="no" marginwidth="0" marginheight="0" frameborder="0" width="650" height="400" src="')>-1 or '<iframe scrolling="no" marginwidth="0" marginheight="0" frameborder="0" allowfullscreen width="653" height="403" src="' in html2:
element = Cineestrenostv.extractIframeChannel(html2,iframeUrl)
elif html.find('<iframe scrolling="no" marginwidth="0" marginheight="0" frameborder="0" width="650" height="500" src="')>-1:
iframeUrl = Decoder.extract('<iframe scrolling="no" marginwidth="0" marginheight="0" frameborder="0" width="650" height="500" src="','"></iframe>',html) #same case with different width and height: TODO: change to regex!!
html2 = Cineestrenostv.getContentFromUrl(iframeUrl,"","",referer)
if html2.find('<th scope="col"><a href="/')>-1:
partialLink = Decoder.extract('<th scope="col"><a href="/','"><font color="ffffff">',html2)
completeLink = Cineestrenostv.MAIN_URL+"/"+partialLink
html3 = Cineestrenostv.getContentFromUrl(completeLink,"",Cineestrenostv.cookie,iframeUrl)
if html3.find('<iframe scrolling="no" marginwidth="0" marginheight="0" frameborder="0" width="650" height="400" src="')>-1:
element = Cineestrenostv.extractIframeChannel(html3,completeLink)
elif referer.find("php")!=-1:
referer = referer.replace("ñ","%C3%B1")
html2 = Cineestrenostv.getContentFromUrl(referer,"",Cineestrenostv.cookie,referer)
element = Cineestrenostv.extractIframeChannel(html2,referer)
return element
示例6: getChannels
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extractWithRegex [as 别名]
def getChannels(page):
x = []
if page == '0':
page = RedeneobuxCom.LIST_PAGE
results = RedeneobuxCom.getContentFromUrl(page)
i=0
for result in results.split('<div class="media">'):
if i>0:
element = {}
img = Decoder.extract('<img src=\'',"'",result)
link = Decoder.extract('location.href=\'', "'", result)
title = Decoder.extract('\' alt=\'', "'", result)
if "http" in link:
logger.debug("appending result: "+title+", url: "+link)
element["title"] = title
element["link"] = link
element["thumbnail"] = img
x.append(element)
i+=1
else:
content = RedeneobuxCom.getContentFromUrl(url=page,referer=RedeneobuxCom.LIST_PAGE)
logger.debug("list content is: " + content)
url = Decoder.extractWithRegex('http'," ",content).replace(" ","")
logger.debug("url is: " + url)
if 'adf' in url:
listUrl = Decoder.decodeAdfly(url)
logger.debug("list obtained is: "+listUrl)
m3uContent = Downloader.getSimpleDownload(listUrl) #simple urllib2 download
logger.debug("content: "+m3uContent)
i=0
for lineContent in m3uContent.split('#EXTINF:'):
if i>0:
title = Decoder.extract(',','\n',lineContent)
lineContent = lineContent[lineContent.find("\n"):]
urlContent = Decoder.extractWithRegex('http://',"\n",lineContent).replace('\n','')
element = {}
element["title"] = title
element["link"] = urlContent#+"|"+Downloader.getHeaders(listUrl)
element["thumbnail"] = ''
element["finalLink"] = True
if "://" in urlContent:
logger.debug("added: " + title + ", content: " + urlContent)
x.append(element)
i+=1
return x
示例7: extractChannel
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extractWithRegex [as 别名]
def extractChannel(html,page="http://www.vipgoal.net/"):
element = {}
if html.find('<script type="text/javascript" src="http://www.playerapp1.pw/channel.php?file=')>-1: #old part
scriptUrl = Decoder.extractWithRegex('http://www.playerapp1.pw/channel.php?file=','"',html)
html2 = Vigoal.getContentFromUrl(scriptUrl)
lastUrl = Decoder.extractWithRegex('http://','" ',html2)
lastUrl = lastUrl.replace('"',"")
logger.debug("last url: "+lastUrl+", cookie="+Vigoal.cookie)
html3 = Vigoal.getContentFromUrl(lastUrl,"",Vigoal.cookie,lastUrl)
playerUrl = Decoder.decodeBussinessApp(html3,lastUrl)
logger.debug("player url is: "+playerUrl)
element["title"] = "Watch streaming"
element["permalink"] = True
element["link"] = playerUrl
else: #unified with cinestrenostv, they are the same people, at least the same code works and the changes are at the same time xD
logger.debug('Extracting channel from: '+page)
element = Cineestrenostv.extractIframeChannel(html,page)
return element
示例8: extractElements
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extractWithRegex [as 别名]
def extractElements(table):
x = []
for value in table.split('\n'):
if value.find("acestream://")>-1:
element = {}
element["title"] = unicode(Decoder.extract("// ",'(',value), errors='replace')
element["link"] = Decoder.extractWithRegex("acestream:",'\"',value).replace('"',"")
logger.debug("append: "+element["title"]+", link: "+element["link"])
x.append(element)
return x
示例9: extractSeasons
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extractWithRegex [as 别名]
def extractSeasons(html,url):
items = []
#extract <a href='http://hdfull.tv/serie/homeland/temporada-1'>1</a>
while html.find("<a href='"+url+"/temporada-")>-1:
item = {}
aHtml = Decoder.extractWithRegex("<a href='"+url+"/temporada-","</a>",html)
html = html[html.find(aHtml)+len(aHtml):]
item["permalink"] = Decoder.extractWithRegex(url+"/temporada-","'",aHtml)
item["permalink"] = item["permalink"][0:item["permalink"].find("'")]
item["title"] = Decoder.extract('>','</a>',aHtml)
logger.debug("found title: "+item["title"]+", link: "+item["permalink"])
if item["title"].find('<img class="tooltip" original-title="Temporada ')>-1:
title = item["title"]
item["title"] = Decoder.extract('original-title="','"',title)
item["thumbnail"] = Decoder.extract('" src="','" />',title)
logger.debug("procesed title: "+item["title"]+", thumbnail: "+item["permalink"])
items.append(item)
return items
示例10: extractTargetVideo
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extractWithRegex [as 别名]
def extractTargetVideo(page):
url = page.split('|')[0]
referer = page[page.rfind("=")+1:]
referer = 'http://ver.movistarplus.es/player/?canal='+referer+Yomvies.CHANNELS_SUBFIX
logger.debug("yomvi url is: "+url+", with referer: "+referer)
html = Yomvies.getContentFromUrl(url=url,referer=referer,launchLocation=True)
logger.debug("obtained response for yomvi page: "+html)
newUrl = url[:url.rfind("/")+1]+Decoder.extractWithRegex("#EXT-",".m3u8",html).split("\n")[1]
html2 = Yomvies.getContentFromUrl(url=newUrl, referer=url, launchLocation=True)
logger.debug("obtained second response for yomvi page: " + html2)
return "http://127.0.0.1:46720?original-request=" + newUrl#+"&referer="+referer
示例11: extractScriptLevel3
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extractWithRegex [as 别名]
def extractScriptLevel3(scriptUrl,referer=''):
html4 = Cineestrenostv.getContentFromUrl(scriptUrl, "", Cineestrenostv.cookie, referer)
finalIframeUrl = Decoder.extractWithRegex('http://', '%3D"', html4)
finalIframeUrl = finalIframeUrl[0:len(finalIframeUrl) - 1]
logger.debug("proccessing level 4, cookie: " + Cineestrenostv.cookie)
finalHtml = Cineestrenostv.getContentFromUrl(finalIframeUrl, "", Cineestrenostv.cookie, referer)
# print "final level5 html: "+finalHtml
logger.debug("proccessing level 5, cookie: " + Cineestrenostv.cookie)
playerUrl = Decoder.decodeBussinessApp(finalHtml, finalIframeUrl)
return playerUrl
示例12: launchScriptLogic
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extractWithRegex [as 别名]
def launchScriptLogic(scriptRegex,html,referer,iframeUrl):
logger.debug("processing pre level 2... url: "+scriptRegex)
firstScriptUrl = Decoder.extractWithRegex(scriptRegex,".js",html)
if firstScriptUrl.find('"')>-1:
firstScriptUrl = firstScriptUrl[0:firstScriptUrl.find('"')]
if "'" in firstScriptUrl:
firstScriptUrl = firstScriptUrl[0:firstScriptUrl.find("'")]
scriptUrl = Cricfreetv.extractScriptIframeUrl(html,firstScriptUrl,referer)
logger.debug("level 2, scriptUrl: "+scriptUrl+", cookie: "+Cricfreetv.cookie)
lastIframeHtml = Cricfreetv.getContentFromUrl(scriptUrl,"",Cricfreetv.cookie,iframeUrl)
#print lastIframeHtml
file = Cricfreetv.seekIframeScript(lastIframeHtml,iframeUrl,scriptUrl)
logger.debug("script logic finished!")
return file
示例13: extractScriptDefaultLogic
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extractWithRegex [as 别名]
def extractScriptDefaultLogic(htmlContent,referer,regex):
element = {}
logger.debug("proccessing level 3, cookie: "+Cineestrenostv.cookie)
logger.debug("using regex: "+regex)
scriptUrl = Decoder.extractWithRegex(regex,'"',htmlContent).replace('"',"")
scriptUrl = scriptUrl[0:len(scriptUrl)-1]
playerUrl = Cineestrenostv.extractScriptLevel3(scriptUrl,referer)
logger.debug("DONE! player url is: "+str(playerUrl))
#print "player url is: "+playerUrl
element["title"] = "Watch streaming"
element["permalink"] = True
element["link"] = playerUrl
return element
示例14: extractListVideos
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extractWithRegex [as 别名]
def extractListVideos(html):
x = []
tableHtml = Decoder.extract('<div class="playlist-videos-container yt-scrollbar-dark yt-scrollbar">','</div><div id="content" class=" content-alignment" role="main">',html)
i=0
for rowHtml in tableHtml.split('<span class="index">'):
if i>0:
element = {}
link = "/watch?"+Decoder.extract('href="/watch?', '"', rowHtml)
title = Decoder.extract('<h4 class="yt-ui-ellipsis yt-ui-ellipsis-2">','</h4>', rowHtml)
if 'youtube.com' not in link:
link = Youtube.MAIN_URL+link
logger.debug("link: " + link + ", title is: " + title)
image = Decoder.extractWithRegex('https://i.ytimg.com/','"',rowHtml).replace('"','')
element["title"] = title.strip()
element["page"] = link
element["finalLink"] = True
element["thumbnail"] = image
x.append(element)
i+=1
return x
示例15: extractIframeValue
# 需要导入模块: from core.decoder import Decoder [as 别名]
# 或者: from core.decoder.Decoder import extractWithRegex [as 别名]
def extractIframeValue(iframe,html,referer):
file = ""
if iframe.find("http:")!=0:
iframe = Decoder.extract("<iframe src='","' ",html).replace("'","") #take into account .lower() characters, so is not ' SRC=
if iframe.find("http:")!=0:
iframe = Decoder.extract(' src="','"',html).replace('"',"")
logger.debug("using iframeUrl: "+iframe)
if iframe.find("filmon.")>-1: # i prefer this fix to change all logic, really, I boried about this provider and is a 'silly' provider
logger.debug("Detected exceptional filmon.com|tv provider: "+iframe)
file = Filmoncom.launchScriptLogic(iframe,referer)[0]["url"]
else:
html2 = Cricfreetv.getContentFromUrl(iframe,"",Cricfreetv.cookie,referer)
#print html2
if html2.find("http://www3.sawlive.tv/embed/")>-1:
iframe2 = Decoder.extractWithRegex("http://www3.sawlive.tv/embed/",'"',html2).replace('"',"")
logger.debug("detected a sawlive: "+iframe2+", from: "+iframe)
#file = Live9net.getChannels(iframe2) #Live9net has the sawlive decoder, so it decodes target link
file = Decoder.extractSawlive(iframe2,Cricfreetv.cookie,iframe)
else:
file = Cricfreetv.seekIframeScript(html2,referer,iframe)
return file