当前位置: 首页>>代码示例>>Python>>正文


Python HTMLParser.HTMLParseError方法代码示例

本文整理汇总了Python中HTMLParser.HTMLParseError方法的典型用法代码示例。如果您正苦于以下问题:Python HTMLParser.HTMLParseError方法的具体用法?Python HTMLParser.HTMLParseError怎么用?Python HTMLParser.HTMLParseError使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在HTMLParser的用法示例。


在下文中一共展示了HTMLParser.HTMLParseError方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: feed

# 需要导入模块: import HTMLParser [as 别名]
# 或者: from HTMLParser import HTMLParseError [as 别名]
def feed(self, markup):
        args, kwargs = self.parser_args
        parser = BeautifulSoupHTMLParser(*args, **kwargs)
        parser.soup = self.soup
        try:
            parser.feed(markup)
        except HTMLParseError, e:
            warnings.warn(RuntimeWarning(
                "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
            raise e

# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
# string.
#
# XXX This code can be removed once most Python 3 users are on 3.2.3. 
开发者ID:evait-security,项目名称:weeman,代码行数:18,代码来源:_htmlparser.py

示例2: parse_declaration

# 需要导入模块: import HTMLParser [as 别名]
# 或者: from HTMLParser import HTMLParseError [as 别名]
def parse_declaration(self, i):
        """Treat a bogus SGML declaration as raw data. Treat a CDATA
        declaration as a CData object."""
        j = None
        if self.rawdata[i:i+9] == '<![CDATA[':
             k = self.rawdata.find(']]>', i)
             if k == -1:
                 k = len(self.rawdata)
             data = self.rawdata[i+9:k]
             j = k+3
             self._toStringSubclass(data, CData)
        else:
            try:
                j = HTMLParser.parse_declaration(self, i)
            except HTMLParseError:
                toHandle = self.rawdata[i:]
                self.handle_data(toHandle)
                j = i + len(toHandle)
        return j 
开发者ID:pythonanywhere,项目名称:dirigible-spreadsheet,代码行数:21,代码来源:BeautifulSoup.py

示例3: http_response

# 需要导入模块: import HTMLParser [as 别名]
# 或者: from HTMLParser import HTMLParseError [as 别名]
def http_response(self, request, response):
        if not hasattr(response, "seek"):
            response = response_seek_wrapper(response)
        http_message = response.info()
        url = response.geturl()
        ct_hdrs = http_message.getheaders("content-type")
        if is_html(ct_hdrs, url, self._allow_xhtml):
            try:
                try:
                    html_headers = parse_head(response,
                                              self.head_parser_class())
                finally:
                    response.seek(0)
            except (HTMLParser.HTMLParseError,
                    sgmllib.SGMLParseError):
                pass
            else:
                for hdr, val in html_headers:
                    # add a header
                    http_message.dict[hdr.lower()] = val
                    text = hdr + ": " + val
                    for line in text.split("\n"):
                        http_message.headers.append(line + "\n")
        return response 
开发者ID:rajeshmajumdar,项目名称:BruteXSS,代码行数:26,代码来源:_http.py

示例4: feed

# 需要导入模块: import HTMLParser [as 别名]
# 或者: from HTMLParser import HTMLParseError [as 别名]
def feed(self, markup):
        args, kwargs = self.parser_args
        parser = BeautifulSoupHTMLParser(*args, **kwargs)
        parser.soup = self.soup
        try:
            parser.feed(markup)
        except HTMLParseError, e:
            warnings.warn(RuntimeWarning(
                "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
            raise e 
开发者ID:MarcelloLins,项目名称:ServerlessCrawler-VancouverRealState,代码行数:12,代码来源:_htmlparser.py

示例5: _parse_error

# 需要导入模块: import HTMLParser [as 别名]
# 或者: from HTMLParser import HTMLParseError [as 别名]
def _parse_error(self, source):
        def parse(source=source):
            parser = HTMLParser.HTMLParser()
            parser.feed(source)
            parser.close()
        self.assertRaises(HTMLParser.HTMLParseError, parse) 
开发者ID:IronLanguages,项目名称:ironpython2,代码行数:8,代码来源:test_htmlparser.py

示例6: StripTags

# 需要导入模块: import HTMLParser [as 别名]
# 或者: from HTMLParser import HTMLParseError [as 别名]
def StripTags(str):
  """Returns the string with HTML tags stripped.

  Args:
    str: An html string.

  Returns:
    The html string with all tags stripped. If there was a parse error, returns
    the text successfully parsed so far.
  """
  # Brute force approach to stripping as much HTML as possible. If there is a
  # parsing error, don't strip text before parse error position, and continue
  # trying from there.
  final_text = ''
  finished = False
  while not finished:
    try:
      strip = _HtmlStripper()
      strip.feed(str)
      strip.close()
      str = strip.get_output()
      final_text += str
      finished = True
    except HTMLParser.HTMLParseError, e:
      final_text += str[:e.offset]
      str = str[e.offset + 1:] 
开发者ID:google,项目名称:closure-linter,代码行数:28,代码来源:htmlutil.py

示例7: feed

# 需要导入模块: import HTMLParser [as 别名]
# 或者: from HTMLParser import HTMLParseError [as 别名]
def feed(self, data):
            try:
                HTMLParser.HTMLParser.feed(self, data)
            except HTMLParser.HTMLParseError, exc:
                raise ParseError(exc) 
开发者ID:krintoxi,项目名称:NoobSec-Toolkit,代码行数:7,代码来源:clientform.py

示例8: feed

# 需要导入模块: import HTMLParser [as 别名]
# 或者: from HTMLParser import HTMLParseError [as 别名]
def feed(self, data):
        try:
            HTMLParser.HTMLParser.feed(self, data)
        except HTMLParser.HTMLParseError, exc:
            raise ParseError(exc) 
开发者ID:rajeshmajumdar,项目名称:BruteXSS,代码行数:7,代码来源:_form.py

示例9: strip_tags

# 需要导入模块: import HTMLParser [as 别名]
# 或者: from HTMLParser import HTMLParseError [as 别名]
def strip_tags(html):
    s = HTMLTagStripper()
    try:
        s.feed(html)
    except HTMLParseError:
        get_logger().error('error stripping tags', raw_html=html)
    return s.get_data()

# https://djangosnippets.org/snippets/19/ 
开发者ID:nylas,项目名称:sync-engine,代码行数:11,代码来源:html.py


注:本文中的HTMLParser.HTMLParseError方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。