本文整理汇总了Python中HTMLParser.HTMLParseError方法的典型用法代码示例。如果您正苦于以下问题:Python HTMLParser.HTMLParseError方法的具体用法?Python HTMLParser.HTMLParseError怎么用?Python HTMLParser.HTMLParseError使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类HTMLParser
的用法示例。
在下文中一共展示了HTMLParser.HTMLParseError方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: feed
# 需要导入模块: import HTMLParser [as 别名]
# 或者: from HTMLParser import HTMLParseError [as 别名]
def feed(self, markup):
args, kwargs = self.parser_args
parser = BeautifulSoupHTMLParser(*args, **kwargs)
parser.soup = self.soup
try:
parser.feed(markup)
except HTMLParseError, e:
warnings.warn(RuntimeWarning(
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
raise e
# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
# string.
#
# XXX This code can be removed once most Python 3 users are on 3.2.3.
示例2: parse_declaration
# 需要导入模块: import HTMLParser [as 别名]
# 或者: from HTMLParser import HTMLParseError [as 别名]
def parse_declaration(self, i):
"""Treat a bogus SGML declaration as raw data. Treat a CDATA
declaration as a CData object."""
j = None
if self.rawdata[i:i+9] == '<![CDATA[':
k = self.rawdata.find(']]>', i)
if k == -1:
k = len(self.rawdata)
data = self.rawdata[i+9:k]
j = k+3
self._toStringSubclass(data, CData)
else:
try:
j = HTMLParser.parse_declaration(self, i)
except HTMLParseError:
toHandle = self.rawdata[i:]
self.handle_data(toHandle)
j = i + len(toHandle)
return j
示例3: http_response
# 需要导入模块: import HTMLParser [as 别名]
# 或者: from HTMLParser import HTMLParseError [as 别名]
def http_response(self, request, response):
if not hasattr(response, "seek"):
response = response_seek_wrapper(response)
http_message = response.info()
url = response.geturl()
ct_hdrs = http_message.getheaders("content-type")
if is_html(ct_hdrs, url, self._allow_xhtml):
try:
try:
html_headers = parse_head(response,
self.head_parser_class())
finally:
response.seek(0)
except (HTMLParser.HTMLParseError,
sgmllib.SGMLParseError):
pass
else:
for hdr, val in html_headers:
# add a header
http_message.dict[hdr.lower()] = val
text = hdr + ": " + val
for line in text.split("\n"):
http_message.headers.append(line + "\n")
return response
示例4: feed
# 需要导入模块: import HTMLParser [as 别名]
# 或者: from HTMLParser import HTMLParseError [as 别名]
def feed(self, markup):
args, kwargs = self.parser_args
parser = BeautifulSoupHTMLParser(*args, **kwargs)
parser.soup = self.soup
try:
parser.feed(markup)
except HTMLParseError, e:
warnings.warn(RuntimeWarning(
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
raise e
示例5: _parse_error
# 需要导入模块: import HTMLParser [as 别名]
# 或者: from HTMLParser import HTMLParseError [as 别名]
def _parse_error(self, source):
def parse(source=source):
parser = HTMLParser.HTMLParser()
parser.feed(source)
parser.close()
self.assertRaises(HTMLParser.HTMLParseError, parse)
示例6: StripTags
# 需要导入模块: import HTMLParser [as 别名]
# 或者: from HTMLParser import HTMLParseError [as 别名]
def StripTags(str):
"""Returns the string with HTML tags stripped.
Args:
str: An html string.
Returns:
The html string with all tags stripped. If there was a parse error, returns
the text successfully parsed so far.
"""
# Brute force approach to stripping as much HTML as possible. If there is a
# parsing error, don't strip text before parse error position, and continue
# trying from there.
final_text = ''
finished = False
while not finished:
try:
strip = _HtmlStripper()
strip.feed(str)
strip.close()
str = strip.get_output()
final_text += str
finished = True
except HTMLParser.HTMLParseError, e:
final_text += str[:e.offset]
str = str[e.offset + 1:]
示例7: feed
# 需要导入模块: import HTMLParser [as 别名]
# 或者: from HTMLParser import HTMLParseError [as 别名]
def feed(self, data):
try:
HTMLParser.HTMLParser.feed(self, data)
except HTMLParser.HTMLParseError, exc:
raise ParseError(exc)
示例8: feed
# 需要导入模块: import HTMLParser [as 别名]
# 或者: from HTMLParser import HTMLParseError [as 别名]
def feed(self, data):
try:
HTMLParser.HTMLParser.feed(self, data)
except HTMLParser.HTMLParseError, exc:
raise ParseError(exc)
示例9: strip_tags
# 需要导入模块: import HTMLParser [as 别名]
# 或者: from HTMLParser import HTMLParseError [as 别名]
def strip_tags(html):
s = HTMLTagStripper()
try:
s.feed(html)
except HTMLParseError:
get_logger().error('error stripping tags', raw_html=html)
return s.get_data()
# https://djangosnippets.org/snippets/19/