Python etree.ParserError方法代码示例

本文整理汇总了Python中lxml.etree.ParserError方法的典型用法代码示例。如果您正苦于以下问题：Python etree.ParserError方法的具体用法？Python etree.ParserError怎么用？Python etree.ParserError使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lxml.etree的用法示例。

在下文中一共展示了etree.ParserError方法的9个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: feed

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParserError [as 别名]
def feed(self, markup):
        if isinstance(markup, bytes):
            markup = BytesIO(markup)
        elif isinstance(markup, unicode):
            markup = StringIO(markup)

        # Call feed() at least once, even if the markup is empty,
        # or the parser won't be initialized.
        data = markup.read(self.CHUNK_SIZE)
        try:
            self.parser = self.parser_for(self.soup.original_encoding)
            self.parser.feed(data)
            while len(data) != 0:
                # Now call feed() on the rest of the data, chunk by chunk.
                data = markup.read(self.CHUNK_SIZE)
                if len(data) != 0:
                    self.parser.feed(data)
            self.parser.close()
        except (UnicodeDecodeError, LookupError, etree.ParserError), e:
            raise ParserRejectedMarkup(str(e))

开发者ID:MarcelloLins，项目名称:ServerlessCrawler-VancouverRealState，代码行数:22，代码来源:_lxml.py

示例2: feed

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParserError [as 别名]
def feed(self, markup):
        if isinstance(markup, bytes):
            markup = BytesIO(markup)
        elif isinstance(markup, str):
            markup = StringIO(markup)

        # Call feed() at least once, even if the markup is empty,
        # or the parser won't be initialized.
        data = markup.read(self.CHUNK_SIZE)
        try:
            self.parser = self.parser_for(self.soup.original_encoding)
            self.parser.feed(data)
            while len(data) != 0:
                # Now call feed() on the rest of the data, chunk by chunk.
                data = markup.read(self.CHUNK_SIZE)
                if len(data) != 0:
                    self.parser.feed(data)
            self.parser.close()
        except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
            raise ParserRejectedMarkup(str(e))

开发者ID:the-ethan-hunt，项目名称:B.E.N.J.I.，代码行数:22，代码来源:_lxml.py

示例3: extract_html_content

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParserError [as 别名]
def extract_html_content(self, html_body, fix_html=True):
        """Ingestor implementation."""
        if html_body is None:
            return
        try:
            try:
                doc = html.fromstring(html_body)
            except ValueError:
                # Ship around encoding declarations.
                # https://stackoverflow.com/questions/3402520
                html_body = self.RE_XML_ENCODING.sub('', html_body, count=1)
                doc = html.fromstring(html_body)
        except (ParserError, ParseError, ValueError):
            raise ProcessingException("HTML could not be parsed.")

        self.extract_html_header(doc)
        self.cleaner(doc)
        text = self.extract_html_text(doc)
        self.result.flag(self.result.FLAG_HTML)
        self.result.emit_html_body(html_body, text)

开发者ID:occrp-attic，项目名称:ingestors，代码行数:22，代码来源:html.py

示例4: ingest

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParserError [as 别名]
def ingest(self, file_path):
        """Ingestor implementation."""
        file_size = self.result.size or os.path.getsize(file_path)
        if file_size > self.MAX_SIZE:
            raise ProcessingException("XML file is too large.")

        try:
            doc = etree.parse(file_path)
        except (ParserError, ParseError):
            raise ProcessingException("XML could not be parsed.")

        text = self.extract_html_text(doc.getroot())
        transform = etree.XSLT(self.XSLT)
        html_doc = transform(doc)
        html_body = html.tostring(html_doc, encoding=str, pretty_print=True)
        self.result.flag(self.result.FLAG_HTML)
        self.result.emit_html_body(html_body, text)

开发者ID:occrp-attic，项目名称:ingestors，代码行数:19，代码来源:xml.py

示例5: _retrieve_html_page

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParserError [as 别名]
def _retrieve_html_page(self):
        """
        Download the requested player's stats page.

        Download the requested page and strip all of the comment tags before
        returning a PyQuery object which will be used to parse the data.
        Oftentimes, important data is contained in tables which are hidden in
        HTML comments and not accessible via PyQuery.

        Returns
        -------
        PyQuery object
            The requested page is returned as a queriable PyQuery object with
            the comment tags removed.
        """
        url = self._build_url()
        try:
            url_data = pq(url)
        except (HTTPError, ParserError):
            return None
        # For NFL, a 404 page doesn't actually raise a 404 error, so it needs
        # to be manually checked.
        if 'Page Not Found (404 error)' in str(url_data):
            return None
        return pq(utils._remove_html_comment_tags(url_data))

开发者ID:roclark，项目名称:sportsreference，代码行数:27，代码来源:roster.py

示例6: _retrieve_html_page

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParserError [as 别名]
def _retrieve_html_page(self):
        """
        Download the requested player's stats page.

        Download the requested page and strip all of the comment tags before
        returning a pyquery object which will be used to parse the data.

        Returns
        -------
        PyQuery object
            The requested page is returned as a queriable PyQuery object with
            the comment tags removed.
        """
        url = self._build_url()
        try:
            url_data = pq(url)
        except (HTTPError, ParserError):
            return None
        return pq(utils._remove_html_comment_tags(url_data))

开发者ID:roclark，项目名称:sportsreference，代码行数:21，代码来源:roster.py

示例7: _retrieve_html_page

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParserError [as 别名]
def _retrieve_html_page(self):
        """
        Download the requested player's stats page.

        Download the requested page and strip all of the comment tags before
        returning a pyquery object which will be used to parse the data.

        Returns
        -------
        PyQuery object
            The requested page is returned as a queriable PyQuery object with
            the comment tags removed.
        """
        url = PLAYER_URL % self._player_id
        try:
            url_data = pq(url)
        except (HTTPError, ParserError):
            return None
        return pq(utils._remove_html_comment_tags(url_data))

开发者ID:roclark，项目名称:sportsreference，代码行数:21，代码来源:roster.py

示例8: _pull_conference_page

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParserError [as 别名]
def _pull_conference_page(self, conference_abbreviation, year):
        """
        Download the conference page.

        Download the conference page for the requested conference and season
        and create a PyQuery object.

        Parameters
        ----------
        conference_abbreviation : string
            A string of the requested conference's abbreviation, such as
            'big-12'.
        year : string
            A string of the requested year to pull conference information from.
        """
        try:
            return pq(CONFERENCE_URL % (conference_abbreviation, year))
        except (HTTPError, ParserError):
            return None

开发者ID:roclark，项目名称:sportsreference，代码行数:21，代码来源:conferences.py

示例9: feed

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParserError [as 别名]
def feed(self, markup):
        if isinstance(markup, bytes):
            markup = BytesIO(markup)
        elif isinstance(markup, str):
            markup = StringIO(markup)

        # Call feed() at least once, even if the markup is empty,
        # or the parser won't be initialized.
        data = markup.read(self.CHUNK_SIZE)
        try:
            self.parser = self.parser_for(self.soup.original_encoding)
            self.parser.feed(data)
            while len(data) != 0:
                # Now call feed() on the rest of the data, chunk by chunk.
                data = markup.read(self.CHUNK_SIZE)
                if len(data) != 0:
                    self.parser.feed(data)
            self.parser.close()
        except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
            raise ParserRejectedMarkup(e)

开发者ID:Tautulli，项目名称:Tautulli，代码行数:22，代码来源:_lxml.py

注：本文中的lxml.etree.ParserError方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。