當前位置: 首頁>>代碼示例>>Python>>正文


Python etree.ParserError方法代碼示例

本文整理匯總了Python中lxml.etree.ParserError方法的典型用法代碼示例。如果您正苦於以下問題:Python etree.ParserError方法的具體用法?Python etree.ParserError怎麽用?Python etree.ParserError使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在lxml.etree的用法示例。


在下文中一共展示了etree.ParserError方法的9個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: feed

# 需要導入模塊: from lxml import etree [as 別名]
# 或者: from lxml.etree import ParserError [as 別名]
def feed(self, markup):
        if isinstance(markup, bytes):
            markup = BytesIO(markup)
        elif isinstance(markup, unicode):
            markup = StringIO(markup)

        # Call feed() at least once, even if the markup is empty,
        # or the parser won't be initialized.
        data = markup.read(self.CHUNK_SIZE)
        try:
            self.parser = self.parser_for(self.soup.original_encoding)
            self.parser.feed(data)
            while len(data) != 0:
                # Now call feed() on the rest of the data, chunk by chunk.
                data = markup.read(self.CHUNK_SIZE)
                if len(data) != 0:
                    self.parser.feed(data)
            self.parser.close()
        except (UnicodeDecodeError, LookupError, etree.ParserError), e:
            raise ParserRejectedMarkup(str(e)) 
開發者ID:MarcelloLins,項目名稱:ServerlessCrawler-VancouverRealState,代碼行數:22,代碼來源:_lxml.py

示例2: feed

# 需要導入模塊: from lxml import etree [as 別名]
# 或者: from lxml.etree import ParserError [as 別名]
def feed(self, markup):
        if isinstance(markup, bytes):
            markup = BytesIO(markup)
        elif isinstance(markup, str):
            markup = StringIO(markup)

        # Call feed() at least once, even if the markup is empty,
        # or the parser won't be initialized.
        data = markup.read(self.CHUNK_SIZE)
        try:
            self.parser = self.parser_for(self.soup.original_encoding)
            self.parser.feed(data)
            while len(data) != 0:
                # Now call feed() on the rest of the data, chunk by chunk.
                data = markup.read(self.CHUNK_SIZE)
                if len(data) != 0:
                    self.parser.feed(data)
            self.parser.close()
        except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
            raise ParserRejectedMarkup(str(e)) 
開發者ID:the-ethan-hunt,項目名稱:B.E.N.J.I.,代碼行數:22,代碼來源:_lxml.py

示例3: extract_html_content

# 需要導入模塊: from lxml import etree [as 別名]
# 或者: from lxml.etree import ParserError [as 別名]
def extract_html_content(self, html_body, fix_html=True):
        """Ingestor implementation."""
        if html_body is None:
            return
        try:
            try:
                doc = html.fromstring(html_body)
            except ValueError:
                # Ship around encoding declarations.
                # https://stackoverflow.com/questions/3402520
                html_body = self.RE_XML_ENCODING.sub('', html_body, count=1)
                doc = html.fromstring(html_body)
        except (ParserError, ParseError, ValueError):
            raise ProcessingException("HTML could not be parsed.")

        self.extract_html_header(doc)
        self.cleaner(doc)
        text = self.extract_html_text(doc)
        self.result.flag(self.result.FLAG_HTML)
        self.result.emit_html_body(html_body, text) 
開發者ID:occrp-attic,項目名稱:ingestors,代碼行數:22,代碼來源:html.py

示例4: ingest

# 需要導入模塊: from lxml import etree [as 別名]
# 或者: from lxml.etree import ParserError [as 別名]
def ingest(self, file_path):
        """Ingestor implementation."""
        file_size = self.result.size or os.path.getsize(file_path)
        if file_size > self.MAX_SIZE:
            raise ProcessingException("XML file is too large.")

        try:
            doc = etree.parse(file_path)
        except (ParserError, ParseError):
            raise ProcessingException("XML could not be parsed.")

        text = self.extract_html_text(doc.getroot())
        transform = etree.XSLT(self.XSLT)
        html_doc = transform(doc)
        html_body = html.tostring(html_doc, encoding=str, pretty_print=True)
        self.result.flag(self.result.FLAG_HTML)
        self.result.emit_html_body(html_body, text) 
開發者ID:occrp-attic,項目名稱:ingestors,代碼行數:19,代碼來源:xml.py

示例5: _retrieve_html_page

# 需要導入模塊: from lxml import etree [as 別名]
# 或者: from lxml.etree import ParserError [as 別名]
def _retrieve_html_page(self):
        """
        Download the requested player's stats page.

        Download the requested page and strip all of the comment tags before
        returning a PyQuery object which will be used to parse the data.
        Oftentimes, important data is contained in tables which are hidden in
        HTML comments and not accessible via PyQuery.

        Returns
        -------
        PyQuery object
            The requested page is returned as a queriable PyQuery object with
            the comment tags removed.
        """
        url = self._build_url()
        try:
            url_data = pq(url)
        except (HTTPError, ParserError):
            return None
        # For NFL, a 404 page doesn't actually raise a 404 error, so it needs
        # to be manually checked.
        if 'Page Not Found (404 error)' in str(url_data):
            return None
        return pq(utils._remove_html_comment_tags(url_data)) 
開發者ID:roclark,項目名稱:sportsreference,代碼行數:27,代碼來源:roster.py

示例6: _retrieve_html_page

# 需要導入模塊: from lxml import etree [as 別名]
# 或者: from lxml.etree import ParserError [as 別名]
def _retrieve_html_page(self):
        """
        Download the requested player's stats page.

        Download the requested page and strip all of the comment tags before
        returning a pyquery object which will be used to parse the data.

        Returns
        -------
        PyQuery object
            The requested page is returned as a queriable PyQuery object with
            the comment tags removed.
        """
        url = self._build_url()
        try:
            url_data = pq(url)
        except (HTTPError, ParserError):
            return None
        return pq(utils._remove_html_comment_tags(url_data)) 
開發者ID:roclark,項目名稱:sportsreference,代碼行數:21,代碼來源:roster.py

示例7: _retrieve_html_page

# 需要導入模塊: from lxml import etree [as 別名]
# 或者: from lxml.etree import ParserError [as 別名]
def _retrieve_html_page(self):
        """
        Download the requested player's stats page.

        Download the requested page and strip all of the comment tags before
        returning a pyquery object which will be used to parse the data.

        Returns
        -------
        PyQuery object
            The requested page is returned as a queriable PyQuery object with
            the comment tags removed.
        """
        url = PLAYER_URL % self._player_id
        try:
            url_data = pq(url)
        except (HTTPError, ParserError):
            return None
        return pq(utils._remove_html_comment_tags(url_data)) 
開發者ID:roclark,項目名稱:sportsreference,代碼行數:21,代碼來源:roster.py

示例8: _pull_conference_page

# 需要導入模塊: from lxml import etree [as 別名]
# 或者: from lxml.etree import ParserError [as 別名]
def _pull_conference_page(self, conference_abbreviation, year):
        """
        Download the conference page.

        Download the conference page for the requested conference and season
        and create a PyQuery object.

        Parameters
        ----------
        conference_abbreviation : string
            A string of the requested conference's abbreviation, such as
            'big-12'.
        year : string
            A string of the requested year to pull conference information from.
        """
        try:
            return pq(CONFERENCE_URL % (conference_abbreviation, year))
        except (HTTPError, ParserError):
            return None 
開發者ID:roclark,項目名稱:sportsreference,代碼行數:21,代碼來源:conferences.py

示例9: feed

# 需要導入模塊: from lxml import etree [as 別名]
# 或者: from lxml.etree import ParserError [as 別名]
def feed(self, markup):
        if isinstance(markup, bytes):
            markup = BytesIO(markup)
        elif isinstance(markup, str):
            markup = StringIO(markup)

        # Call feed() at least once, even if the markup is empty,
        # or the parser won't be initialized.
        data = markup.read(self.CHUNK_SIZE)
        try:
            self.parser = self.parser_for(self.soup.original_encoding)
            self.parser.feed(data)
            while len(data) != 0:
                # Now call feed() on the rest of the data, chunk by chunk.
                data = markup.read(self.CHUNK_SIZE)
                if len(data) != 0:
                    self.parser.feed(data)
            self.parser.close()
        except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
            raise ParserRejectedMarkup(e) 
開發者ID:Tautulli,項目名稱:Tautulli,代碼行數:22,代碼來源:_lxml.py


注:本文中的lxml.etree.ParserError方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。