Python etree.ParseError方法代码示例

本文整理汇总了Python中lxml.etree.ParseError方法的典型用法代码示例。如果您正苦于以下问题：Python etree.ParseError方法的具体用法？Python etree.ParseError怎么用？Python etree.ParseError使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lxml.etree的用法示例。

在下文中一共展示了etree.ParseError方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: perform_romeo_query

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def perform_romeo_query(self, search_terms):
        search_terms = search_terms.copy()
        if self.api_key:
            search_terms['ak'] = self.api_key

        # Perform the query
        try:
            req = requests.get(self.base_url, params=search_terms, timeout=20)
        except requests.exceptions.RequestException as e:
            raise MetadataSourceException('Error while querying RoMEO.\n' +
                                          'URL was: '+self.base_url+'\n' +
                                          'Parameters were: '+str(search_terms)+'\n' +
                                          'Error is: '+str(e))

        # Parse it
        try:
            parser = ET.XMLParser(encoding='ISO-8859-1')
            root = ET.parse(BytesIO(req.content), parser)
        except ET.ParseError as e:
            raise MetadataSourceException('RoMEO returned an invalid XML response.\n' +
                                          'URL was: '+self.base_url+'\n' +
                                          'Parameters were: '+str(search_terms)+'\n' +
                                          'Error is: '+str(e))

        return root

开发者ID:dissemin，项目名称:dissemin，代码行数:27，代码来源:romeo.py

示例2: init

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def __init__(self, *a, **k):
        # Importing names from *a and **k or using defaults
        self.ffpath = k.setdefault('ffpath', None)
        self.root   = k.setdefault('root', None) if 'tree' not in k else k['tree'].getroot()

        if len(a) > 0:
            etype   = type(et.Element("a"))
            ettype  = type(et.ElementTree())
            for s in a:
                if isinstance(s, (etype,ettype)):
                    if self.root == None:
                        self.root = s.getroot() if isinstance(s,ettype) else s
                elif isinstance(s, str):
                    if self.ffpath == None:
                        self.ffpath = s
                else:
                    raise ValueError("XML\'s initializer only accepts string, ElementTree or Element")
        
        if self.ffpath != None and self.root == None:
            try:
                self.root = et.parse(self.ffpath).getroot()
            except (IOError, et.ParseError):
                # TODO Populate tree and save it
                raise

开发者ID:ActiveState，项目名称:code，代码行数:26，代码来源:recipe-578492.py

示例3: test_transform__xml_parse_error

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def test_transform__xml_parse_error(self):
        task = create_task(
            ConcreteMetadataSingleEntityTransformTask,
            {"managed": False, "api_version": "47.0", "api_names": "Test"},
        )

        task.entity = "CustomApplication"

        with tempfile.TemporaryDirectory() as tmpdir:
            task._create_directories(tmpdir)

            test_path = task.retrieve_dir / "applications"
            test_path.mkdir()
            test_path = test_path / "Test.app"

            test_path.write_text(">>>>>NOT XML<<<<<")
            with pytest.raises(etree.ParseError):
                task._transform()

开发者ID:SFDO-Tooling，项目名称:CumulusCI，代码行数:20，代码来源:test_base.py

示例4: extract_html_content

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def extract_html_content(self, html_body, fix_html=True):
        """Ingestor implementation."""
        if html_body is None:
            return
        try:
            try:
                doc = html.fromstring(html_body)
            except ValueError:
                # Ship around encoding declarations.
                # https://stackoverflow.com/questions/3402520
                html_body = self.RE_XML_ENCODING.sub('', html_body, count=1)
                doc = html.fromstring(html_body)
        except (ParserError, ParseError, ValueError):
            raise ProcessingException("HTML could not be parsed.")

        self.extract_html_header(doc)
        self.cleaner(doc)
        text = self.extract_html_text(doc)
        self.result.flag(self.result.FLAG_HTML)
        self.result.emit_html_body(html_body, text)

开发者ID:occrp-attic，项目名称:ingestors，代码行数:22，代码来源:html.py

示例5: ingest

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def ingest(self, file_path):
        """Ingestor implementation."""
        file_size = self.result.size or os.path.getsize(file_path)
        if file_size > self.MAX_SIZE:
            raise ProcessingException("XML file is too large.")

        try:
            doc = etree.parse(file_path)
        except (ParserError, ParseError):
            raise ProcessingException("XML could not be parsed.")

        text = self.extract_html_text(doc.getroot())
        transform = etree.XSLT(self.XSLT)
        html_doc = transform(doc)
        html_body = html.tostring(html_doc, encoding=str, pretty_print=True)
        self.result.flag(self.result.FLAG_HTML)
        self.result.emit_html_body(html_body, text)

开发者ID:occrp-attic，项目名称:ingestors，代码行数:19，代码来源:xml.py

示例6: xml_translate

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def xml_translate(callback, value):
    """ Translate an XML value (string), using `callback` for translating text
        appearing in `value`.
    """
    if not value:
        return value

    try:
        root = parse_xml(value)
        result = translate_xml_node(root, callback, parse_xml, serialize_xml)
        return serialize_xml(result)
    except etree.ParseError:
        # fallback for translated terms: use an HTML parser and wrap the term
        root = parse_html(u"<div>%s</div>" % value)
        result = translate_xml_node(root, callback, parse_xml, serialize_xml)
        # remove tags <div> and </div> from result
        return serialize_xml(result)[5:-6]

开发者ID:guohuadeng，项目名称:odoo13-x64，代码行数:19，代码来源:translate.py

示例7: _feed_xml

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def _feed_xml(self, data):
        try:
            self._parser.feed(data)
        except etree.ParseError as e:
            raise GvmError(
                "Cannot parse XML response. Response data "
                "read {0}".format(data),
                e,
            )

开发者ID:greenbone，项目名称:python-gvm，代码行数:11，代码来源:connections.py

示例8: read

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def read(feed, limit, timeout=10):
    try:
        with urllib.request.urlopen(feed.url, None, timeout) as file:
            data = file.read()
        body = _parse(data, limit)
        if body:
            body = ["<h2>{}</h2>\n".format(escape(feed.title))] + body
            return True, body
        return True, None
    except (ValueError, urllib.error.HTTPError, urllib.error.URLError,
            etree.ParseError, socket.timeout) as err:
        return False, "Error: {}: {}".format(feed.url, err)

开发者ID:lovexiaov，项目名称:python-in-practice，代码行数:14，代码来源:Feed.py

示例9: crawl

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def crawl(url, thread_id=0):
    global WORDS, OVERRIDE_SIZE, HEADER, SAVE_PAGES, SAVE_WORDS
    if not OVERRIDE_SIZE:
        try:
            # Attempt to get the size in bytes of the document
            length = int(requests.head(url, headers=HEADER).headers['Content-Length'])
        except KeyError:  # Sometimes no Content-Length header is returned...
            length = 1
        if length > 524288000:  # If the page is larger than 500 MB
            raise SizeError
    # If the SizeError is raised it will be caught in the except block in the run section,
    # and the following code will not be run.
    page = requests.get(url, headers=HEADER)  # Get page
    word_list = []
    if SAVE_WORDS:
        word_list = make_words(page)
        for word in word_list:
            WORDS.put(word)
    try:
        # Pull out all links after resolving them using any <base> tags found in the document.
        links = [link for element, attribute, link, pos in iterlinks(resolve_base_href(page.content))]
    except etree.ParseError:
        # If the document is not HTML content this will return an empty list.
        links = []
    links = list(set(links))
    if SAVE_PAGES:
        save_page(url, page)
    if SAVE_WORDS:
        # Announce which link was crawled
        write_log('CRAWL', 'Found {0} links and {1} words on {2}'.format(len(links), len(word_list), url),
                  worker=thread_id)
    else:
        # Announce which link was crawled
        write_log('CRAWL', 'Found {0} links on {1}'.format(len(links), url),
                  worker=thread_id)
    return links

开发者ID:rivermont，项目名称:spidy，代码行数:38，代码来源:crawler.py

示例10: html

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def html(self):
        if not hasattr(self, '_html'):
            self._html = None
            if self.content_type in NON_HTML:
                return
            if self.raw is None or not len(self.raw):
                return
            try:
                self._html = html.fromstring(self.text)
            except ValueError as ve:
                if 'encoding declaration' in str(ve):
                    self._html = html.parse(self.file_path.as_posix())
            except (etree.ParserError, etree.ParseError):
                pass
        return self._html

开发者ID:alephdata，项目名称:memorious，代码行数:17，代码来源:http.py

示例11: json

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def json(self):
        if not hasattr(self, '_json'):
            if self.file_path is None:
                raise ParseError("Cannot parse failed download.")
            with open(self.file_path, 'r') as fh:
                self._json = json.load(fh)
        return self._json

开发者ID:alephdata，项目名称:memorious，代码行数:9，代码来源:http.py

示例12: feed

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def feed(self, data):
        self._init_parser()
        try:
            return self._original_parser.feed(data)
        except etree.ParseError as _err:
            str_err = str(_err)
            # fix unknown entity
            if 'undefined entity' in str_err:
                log.warning('WARNING {}'.format(str_err))
                entity = re.search(r'&\w+;', str_err)
                if entity:
                    raw_entity = entity.group()[1:-1]
                    self.known_entity[raw_entity] = raw_entity
                    return self.feed(data)
            raise _err

开发者ID:Arello-Mobile，项目名称:confluence-publisher，代码行数:17，代码来源:confluence.py

示例13: invoke

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def invoke(self, action, resource_uri, resource, option_set=None,
               selector_set=None, timeout=None):
        """
        Send a generic WSMan request to the host.

        :param action: The action to run, this relates to the wsa:Action header
            field.
        :param resource_uri: The resource URI that the action relates to, this
          relates to the wsman:ResourceURI header field.
        :param resource: This is an optional xml.etree.ElementTree Element to
            be added to the s:Body section.
        :param option_set: a wsman.OptionSet to add to the request
        :param selector_set: a wsman.SelectorSet to add to the request
        :param timeout: Override the default wsman:OperationTimeout value for
            the request, this should be an int in seconds.
        :return: The ET Element of the response XML from the server
        """
        s = NAMESPACES['s']
        envelope = ET.Element("{%s}Envelope" % s)

        header = self._create_header(action, resource_uri, option_set,
                                     selector_set, timeout)
        envelope.append(header)

        body = ET.SubElement(envelope, "{%s}Body" % s)
        if resource is not None:
            body.append(resource)

        message_id = header.find("wsa:MessageID", namespaces=NAMESPACES).text
        xml = ET.tostring(envelope, encoding='utf-8', method='xml')

        try:
            response = self.transport.send(xml)
        except WinRMTransportError as err:
            try:
                # try and parse the XML and get the WSManFault
                raise self._parse_wsman_fault(err.response_text)
            except ET.ParseError:
                # no XML message is present so not a WSManFault error
                log.error("Failed to parse WSManFault message on WinRM error"
                          " response, raising original WinRMTransportError")
                raise err

        response_xml = ET.fromstring(response)
        relates_to = response_xml.find("s:Header/wsa:RelatesTo",
                                       namespaces=NAMESPACES).text

        if message_id != relates_to:
            raise WinRMError("Received related id does not match related "
                             "expected message id: Sent: %s, Received: %s"
                             % (message_id, relates_to))
        return response_xml

开发者ID:jborean93，项目名称:pypsrp，代码行数:54，代码来源:wsman.py

示例14: run

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def run(self, args):
        formatter = SplunkSimpleXmlFormatter()
        # Should we read a list of conf files from STDIN?
        if len(args.xml) == 1 and args.xml[0] == "-":
            files = _stdin_iter()
        else:
            files = args.xml
        c = Counter()
        exit_code = EXIT_CODE_SUCCESS
        for fn in files:
            c["checked"] += 1
            if not os.path.isfile(fn):
                self.stderr.write("Skipping missing file:  {0}\n".format(fn))
                c["missing"] += 1
                continue
            try:
                if formatter.format_xml(fn, fn, args.indent):
                    self.stderr.write("Replaced file {0} with formatted content\n".format(fn))
                    c["changed"] += 1
                else:
                    if not args.quiet:
                        self.stderr.write("Already formatted {0}\n".format(fn))
                    c["no-action"] += 1
                self.stderr.flush()
            except etree.ParseError as e:
                self.stderr.write("Error parsing file {0}:  {1}\n".format(fn, e))
                self.stderr.flush()
                c["error"] += 1
                exit_code = EXIT_CODE_BAD_CONF_FILE
            except Exception as e:  # pragma: no cover
                self.stderr.write("Unhandled top-level exception while parsing {0}.  "
                                  "Aborting.\n{1}\n".format(fn, e))
                debug_traceback()
                c["error"] += 1
                exit_code = EXIT_CODE_INTERNAL_ERROR
                break

        if not exit_code and c["changed"] > 0:
            exit_code = EXIT_CODE_FORMAT_APPLIED

        if True:  # show stats or verbose
            self.stdout.write("Completed formatting {0[checked]} files.  rc={1} Breakdown:\n"
                              "   {0[changed]} files were formatted successfully.\n"
                              "   {0[no-action]} files were already formatted.\n"
                              "   {0[error]} files failed.\n".format(c, exit_code))
        return exit_code

开发者ID:Kintyre，项目名称:ksconf，代码行数:48，代码来源:xmlformat.py

示例15: init

# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def __init__(self, url=None, file=None, text=None, **kwargs) -> None:
        """
        A feed can be provided as either a url or a file, but exactly one must
        be given. Realistically, users will almost universally use a url to
        retrieve feeds from. However, having support for handling files makes
        testing easier.

        Args:
            url: (optional) the url where the feed is located
            file: (optional) the file where the feed is located
            text: (optional) pre-retrieved text for the feed. Can be useful if
                multiple feeds were downloaded previously; a URL or file is
                still required, providing this field will only skip the
                download step
        """
        # * Don't allow providing both a url and a file, but must provide one.
        # Check that one of them is None, and that they are not both the same.
        # The second conditional can be read as checking that both variables
        # are not None.
        assert (url is None or file is None) and (url is not file)

        self._url = url
        self._file = file
        self._tree = None
        self._validated = False

        self._title = kwargs.get('title', None)
        self._description = kwargs.get('description', None)
        self._link = kwargs.get('link', None)
        self._last_build_date = kwargs.get('last_build_date', None)
        self._copyright = kwargs.get('copyright', None)

        # assume that if we have been passed the title then we have also been
        # passed everything else and that the feed is valid
        if self._title is None:
            if text:
                # the content of a document was already provided, but we need
                # to ensure it is valid RSS
                try:
                    self._tree = etree.fromstring(text)
                except etree.ParseError:
                    raise FeedParseError(
                        "Unable to parse text as an XML document")
            else:
                # retrieve the feed and parse to XML document
                self._download_feed()
            # check that the XML document is a properly structured RSS feed
            self._validate_feed()
            # set this object's metadata using rss feed
            self._parse_metadata()
        else:
            self._validated = True

开发者ID:xgi，项目名称:castero，代码行数:54，代码来源:feed.py

注：本文中的lxml.etree.ParseError方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。