本文整理汇总了Python中youtube_dl.utils.unescapeHTML函数的典型用法代码示例。如果您正苦于以下问题:Python unescapeHTML函数的具体用法?Python unescapeHTML怎么用?Python unescapeHTML使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了unescapeHTML函数的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse_page
def parse_page(self, url):
webpage = self._download_webpage(url, 'main')
download_list_html = re.findall(r'<a href="([^"]+vid=[0-9]+)" onclick="[^"]+">\s*<img src="([^"]+)"\s+alt="([^"]+)"(?:\s+width="\d+"\s+height="\d+"\s+data-frz-src="([^"]+)")?', webpage)
result = []
for expr in download_list_html:
infos = {}
infos['url'] = 'http://www.canalplus.fr' + expr[0]
infos['title'] = unescapeHTML(expr[2])
infos['thumbnail'] = expr[1]
if len(expr)>3 and len(expr[3])>0:
infos['thumbnail'] = expr[3]
result.append(infos)
return result
示例2: test_unescape_html
def test_unescape_html(self):
self.assertEqual(unescapeHTML('%20;'), '%20;')
self.assertEqual(unescapeHTML('/'), '/')
self.assertEqual(unescapeHTML('/'), '/')
self.assertEqual(unescapeHTML('é'), 'é')
self.assertEqual(unescapeHTML('�'), '�')
# HTML5 entities
self.assertEqual(unescapeHTML('.''), '.\'')
示例3: test_unescape_html
def test_unescape_html(self):
self.assertEqual(unescapeHTML("%20;"), "%20;")
self.assertEqual(unescapeHTML("/"), "/")
self.assertEqual(unescapeHTML("/"), "/")
self.assertEqual(unescapeHTML("é"), "é")
self.assertEqual(unescapeHTML("�"), "�")
# HTML5 entities
self.assertEqual(unescapeHTML(".'"), ".'")
示例4: parse_page
def parse_page(self, url):
webpage = self._download_webpage(url, 'main')
url_thumb_list = re.findall(r'<a\s+href="([^"]+)"><img\s+width="\d+"\s+height="\d+"\s+src="([^"]+)"', webpage)
url_title_list = re.findall(r'<h3 class="internet-title"><a href="([^"]+)">([^<]+)<', webpage)
result = []
i = 0
for expr in url_thumb_list:
expr2 = url_title_list[i]
infos = {}
infos['url'] = expr[0]
infos['title'] = unescapeHTML(expr2[1])
infos['thumbnail'] = expr[1]
result.append(infos)
i += 1
return result
示例5: parse_page
def parse_page(self, url):
webpage = self._download_webpage(url, 'main')
# returns an incomplete page the first time
if self.parse_page_counter == 0:
webpage = self._download_webpage(url, 'main')
self.parse_page_counter += 1
download_list_html = re.findall(r'<a href="/watch\?v=([^"]+)" class="[^"]+" data-sessionlink="[^"]+" title="([^"]+)"', webpage)
result = []
for expr in download_list_html:
infos = {}
infos['url'] = 'https://www.youtube.com/watch?v=' + expr[0]
infos['title'] = unescapeHTML(expr[1])
infos['thumbnail'] = 'https://i.ytimg.com/vi/'+ expr[0]+ '/mqdefault.jpg'
result.append(infos)
return result
示例6: parse_page
def parse_page(self, url):
webpage = self._download_webpage(url, 'main')
print('parse_page')
#url_thumb_list = re.findall(r'<a\s+href="([^"]+)"><img\s+width="\d+"\s+height="\d+"\s+src="([^"]+)"', webpage)
url_list = re.findall(r'<a\s+href="([^"]+)"\s+class="videoLink', webpage)
thumbnail_list = re.findall(r'data-srcset="([^ ]+) 1x', webpage)
title_list = re.findall(r'<p class="title">([^<]+)</p><p class="stitle">', webpage)
result = []
url_size = len(url_list)
thumbnail_size = len(thumbnail_list)
title_size = len(title_list)
for i in range(title_size):
infos = {}
infos['url'] = 'http://www.tf1.fr' + url_list[url_size-title_size+i]
infos['title'] = unescapeHTML(title_list[i])
infos['thumbnail'] = 'http:' + thumbnail_list[i*3]
result.append(infos)
return result
示例7: test_unescape_html
def test_unescape_html(self):
self.assertEqual(unescapeHTML('%20;'), '%20;')
self.assertEqual(unescapeHTML('/'), '/')
self.assertEqual(unescapeHTML('/'), '/')
self.assertEqual(unescapeHTML('é'), 'é')
self.assertEqual(unescapeHTML('�'), '�')
示例8: test_unescape_html
def test_unescape_html(self):
self.assertEqual(unescapeHTML(_compat_str('%20;')), _compat_str('%20;'))
示例9: test_unescape_html
def test_unescape_html(self):
self.assertEqual(unescapeHTML('%20;'), '%20;')
self.assertEqual(
unescapeHTML('é'), 'é')
示例10: test_unescape_html
def test_unescape_html(self):
self.assertEqual(unescapeHTML("%20;"), "%20;")
self.assertEqual(unescapeHTML("é"), "é")
示例11: test_unescape_html
def test_unescape_html(self):
self.assertEqual(unescapeHTML(u"%20;"), u"%20;")