本文整理汇总了Python中lxml.etree.ParseError方法的典型用法代码示例。如果您正苦于以下问题:Python etree.ParseError方法的具体用法?Python etree.ParseError怎么用?Python etree.ParseError使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lxml.etree
的用法示例。
在下文中一共展示了etree.ParseError方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: perform_romeo_query
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def perform_romeo_query(self, search_terms):
search_terms = search_terms.copy()
if self.api_key:
search_terms['ak'] = self.api_key
# Perform the query
try:
req = requests.get(self.base_url, params=search_terms, timeout=20)
except requests.exceptions.RequestException as e:
raise MetadataSourceException('Error while querying RoMEO.\n' +
'URL was: '+self.base_url+'\n' +
'Parameters were: '+str(search_terms)+'\n' +
'Error is: '+str(e))
# Parse it
try:
parser = ET.XMLParser(encoding='ISO-8859-1')
root = ET.parse(BytesIO(req.content), parser)
except ET.ParseError as e:
raise MetadataSourceException('RoMEO returned an invalid XML response.\n' +
'URL was: '+self.base_url+'\n' +
'Parameters were: '+str(search_terms)+'\n' +
'Error is: '+str(e))
return root
示例2: __init__
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def __init__(self, *a, **k):
# Importing names from *a and **k or using defaults
self.ffpath = k.setdefault('ffpath', None)
self.root = k.setdefault('root', None) if 'tree' not in k else k['tree'].getroot()
if len(a) > 0:
etype = type(et.Element("a"))
ettype = type(et.ElementTree())
for s in a:
if isinstance(s, (etype,ettype)):
if self.root == None:
self.root = s.getroot() if isinstance(s,ettype) else s
elif isinstance(s, str):
if self.ffpath == None:
self.ffpath = s
else:
raise ValueError("XML\'s initializer only accepts string, ElementTree or Element")
if self.ffpath != None and self.root == None:
try:
self.root = et.parse(self.ffpath).getroot()
except (IOError, et.ParseError):
# TODO Populate tree and save it
raise
示例3: test_transform__xml_parse_error
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def test_transform__xml_parse_error(self):
task = create_task(
ConcreteMetadataSingleEntityTransformTask,
{"managed": False, "api_version": "47.0", "api_names": "Test"},
)
task.entity = "CustomApplication"
with tempfile.TemporaryDirectory() as tmpdir:
task._create_directories(tmpdir)
test_path = task.retrieve_dir / "applications"
test_path.mkdir()
test_path = test_path / "Test.app"
test_path.write_text(">>>>>NOT XML<<<<<")
with pytest.raises(etree.ParseError):
task._transform()
示例4: extract_html_content
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def extract_html_content(self, html_body, fix_html=True):
"""Ingestor implementation."""
if html_body is None:
return
try:
try:
doc = html.fromstring(html_body)
except ValueError:
# Ship around encoding declarations.
# https://stackoverflow.com/questions/3402520
html_body = self.RE_XML_ENCODING.sub('', html_body, count=1)
doc = html.fromstring(html_body)
except (ParserError, ParseError, ValueError):
raise ProcessingException("HTML could not be parsed.")
self.extract_html_header(doc)
self.cleaner(doc)
text = self.extract_html_text(doc)
self.result.flag(self.result.FLAG_HTML)
self.result.emit_html_body(html_body, text)
示例5: ingest
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def ingest(self, file_path):
"""Ingestor implementation."""
file_size = self.result.size or os.path.getsize(file_path)
if file_size > self.MAX_SIZE:
raise ProcessingException("XML file is too large.")
try:
doc = etree.parse(file_path)
except (ParserError, ParseError):
raise ProcessingException("XML could not be parsed.")
text = self.extract_html_text(doc.getroot())
transform = etree.XSLT(self.XSLT)
html_doc = transform(doc)
html_body = html.tostring(html_doc, encoding=str, pretty_print=True)
self.result.flag(self.result.FLAG_HTML)
self.result.emit_html_body(html_body, text)
示例6: xml_translate
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def xml_translate(callback, value):
""" Translate an XML value (string), using `callback` for translating text
appearing in `value`.
"""
if not value:
return value
try:
root = parse_xml(value)
result = translate_xml_node(root, callback, parse_xml, serialize_xml)
return serialize_xml(result)
except etree.ParseError:
# fallback for translated terms: use an HTML parser and wrap the term
root = parse_html(u"<div>%s</div>" % value)
result = translate_xml_node(root, callback, parse_xml, serialize_xml)
# remove tags <div> and </div> from result
return serialize_xml(result)[5:-6]
示例7: _feed_xml
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def _feed_xml(self, data):
try:
self._parser.feed(data)
except etree.ParseError as e:
raise GvmError(
"Cannot parse XML response. Response data "
"read {0}".format(data),
e,
)
示例8: read
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def read(feed, limit, timeout=10):
try:
with urllib.request.urlopen(feed.url, None, timeout) as file:
data = file.read()
body = _parse(data, limit)
if body:
body = ["<h2>{}</h2>\n".format(escape(feed.title))] + body
return True, body
return True, None
except (ValueError, urllib.error.HTTPError, urllib.error.URLError,
etree.ParseError, socket.timeout) as err:
return False, "Error: {}: {}".format(feed.url, err)
示例9: crawl
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def crawl(url, thread_id=0):
global WORDS, OVERRIDE_SIZE, HEADER, SAVE_PAGES, SAVE_WORDS
if not OVERRIDE_SIZE:
try:
# Attempt to get the size in bytes of the document
length = int(requests.head(url, headers=HEADER).headers['Content-Length'])
except KeyError: # Sometimes no Content-Length header is returned...
length = 1
if length > 524288000: # If the page is larger than 500 MB
raise SizeError
# If the SizeError is raised it will be caught in the except block in the run section,
# and the following code will not be run.
page = requests.get(url, headers=HEADER) # Get page
word_list = []
if SAVE_WORDS:
word_list = make_words(page)
for word in word_list:
WORDS.put(word)
try:
# Pull out all links after resolving them using any <base> tags found in the document.
links = [link for element, attribute, link, pos in iterlinks(resolve_base_href(page.content))]
except etree.ParseError:
# If the document is not HTML content this will return an empty list.
links = []
links = list(set(links))
if SAVE_PAGES:
save_page(url, page)
if SAVE_WORDS:
# Announce which link was crawled
write_log('CRAWL', 'Found {0} links and {1} words on {2}'.format(len(links), len(word_list), url),
worker=thread_id)
else:
# Announce which link was crawled
write_log('CRAWL', 'Found {0} links on {1}'.format(len(links), url),
worker=thread_id)
return links
示例10: html
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def html(self):
if not hasattr(self, '_html'):
self._html = None
if self.content_type in NON_HTML:
return
if self.raw is None or not len(self.raw):
return
try:
self._html = html.fromstring(self.text)
except ValueError as ve:
if 'encoding declaration' in str(ve):
self._html = html.parse(self.file_path.as_posix())
except (etree.ParserError, etree.ParseError):
pass
return self._html
示例11: json
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def json(self):
if not hasattr(self, '_json'):
if self.file_path is None:
raise ParseError("Cannot parse failed download.")
with open(self.file_path, 'r') as fh:
self._json = json.load(fh)
return self._json
示例12: feed
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def feed(self, data):
self._init_parser()
try:
return self._original_parser.feed(data)
except etree.ParseError as _err:
str_err = str(_err)
# fix unknown entity
if 'undefined entity' in str_err:
log.warning('WARNING {}'.format(str_err))
entity = re.search(r'&\w+;', str_err)
if entity:
raw_entity = entity.group()[1:-1]
self.known_entity[raw_entity] = raw_entity
return self.feed(data)
raise _err
示例13: invoke
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def invoke(self, action, resource_uri, resource, option_set=None,
selector_set=None, timeout=None):
"""
Send a generic WSMan request to the host.
:param action: The action to run, this relates to the wsa:Action header
field.
:param resource_uri: The resource URI that the action relates to, this
relates to the wsman:ResourceURI header field.
:param resource: This is an optional xml.etree.ElementTree Element to
be added to the s:Body section.
:param option_set: a wsman.OptionSet to add to the request
:param selector_set: a wsman.SelectorSet to add to the request
:param timeout: Override the default wsman:OperationTimeout value for
the request, this should be an int in seconds.
:return: The ET Element of the response XML from the server
"""
s = NAMESPACES['s']
envelope = ET.Element("{%s}Envelope" % s)
header = self._create_header(action, resource_uri, option_set,
selector_set, timeout)
envelope.append(header)
body = ET.SubElement(envelope, "{%s}Body" % s)
if resource is not None:
body.append(resource)
message_id = header.find("wsa:MessageID", namespaces=NAMESPACES).text
xml = ET.tostring(envelope, encoding='utf-8', method='xml')
try:
response = self.transport.send(xml)
except WinRMTransportError as err:
try:
# try and parse the XML and get the WSManFault
raise self._parse_wsman_fault(err.response_text)
except ET.ParseError:
# no XML message is present so not a WSManFault error
log.error("Failed to parse WSManFault message on WinRM error"
" response, raising original WinRMTransportError")
raise err
response_xml = ET.fromstring(response)
relates_to = response_xml.find("s:Header/wsa:RelatesTo",
namespaces=NAMESPACES).text
if message_id != relates_to:
raise WinRMError("Received related id does not match related "
"expected message id: Sent: %s, Received: %s"
% (message_id, relates_to))
return response_xml
示例14: run
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def run(self, args):
formatter = SplunkSimpleXmlFormatter()
# Should we read a list of conf files from STDIN?
if len(args.xml) == 1 and args.xml[0] == "-":
files = _stdin_iter()
else:
files = args.xml
c = Counter()
exit_code = EXIT_CODE_SUCCESS
for fn in files:
c["checked"] += 1
if not os.path.isfile(fn):
self.stderr.write("Skipping missing file: {0}\n".format(fn))
c["missing"] += 1
continue
try:
if formatter.format_xml(fn, fn, args.indent):
self.stderr.write("Replaced file {0} with formatted content\n".format(fn))
c["changed"] += 1
else:
if not args.quiet:
self.stderr.write("Already formatted {0}\n".format(fn))
c["no-action"] += 1
self.stderr.flush()
except etree.ParseError as e:
self.stderr.write("Error parsing file {0}: {1}\n".format(fn, e))
self.stderr.flush()
c["error"] += 1
exit_code = EXIT_CODE_BAD_CONF_FILE
except Exception as e: # pragma: no cover
self.stderr.write("Unhandled top-level exception while parsing {0}. "
"Aborting.\n{1}\n".format(fn, e))
debug_traceback()
c["error"] += 1
exit_code = EXIT_CODE_INTERNAL_ERROR
break
if not exit_code and c["changed"] > 0:
exit_code = EXIT_CODE_FORMAT_APPLIED
if True: # show stats or verbose
self.stdout.write("Completed formatting {0[checked]} files. rc={1} Breakdown:\n"
" {0[changed]} files were formatted successfully.\n"
" {0[no-action]} files were already formatted.\n"
" {0[error]} files failed.\n".format(c, exit_code))
return exit_code
示例15: __init__
# 需要导入模块: from lxml import etree [as 别名]
# 或者: from lxml.etree import ParseError [as 别名]
def __init__(self, url=None, file=None, text=None, **kwargs) -> None:
"""
A feed can be provided as either a url or a file, but exactly one must
be given. Realistically, users will almost universally use a url to
retrieve feeds from. However, having support for handling files makes
testing easier.
Args:
url: (optional) the url where the feed is located
file: (optional) the file where the feed is located
text: (optional) pre-retrieved text for the feed. Can be useful if
multiple feeds were downloaded previously; a URL or file is
still required, providing this field will only skip the
download step
"""
# * Don't allow providing both a url and a file, but must provide one.
# Check that one of them is None, and that they are not both the same.
# The second conditional can be read as checking that both variables
# are not None.
assert (url is None or file is None) and (url is not file)
self._url = url
self._file = file
self._tree = None
self._validated = False
self._title = kwargs.get('title', None)
self._description = kwargs.get('description', None)
self._link = kwargs.get('link', None)
self._last_build_date = kwargs.get('last_build_date', None)
self._copyright = kwargs.get('copyright', None)
# assume that if we have been passed the title then we have also been
# passed everything else and that the feed is valid
if self._title is None:
if text:
# the content of a document was already provided, but we need
# to ensure it is valid RSS
try:
self._tree = etree.fromstring(text)
except etree.ParseError:
raise FeedParseError(
"Unable to parse text as an XML document")
else:
# retrieve the feed and parse to XML document
self._download_feed()
# check that the XML document is a properly structured RSS feed
self._validate_feed()
# set this object's metadata using rss feed
self._parse_metadata()
else:
self._validated = True