本文整理汇总了Python中lxml.etree.XML.iter方法的典型用法代码示例。如果您正苦于以下问题:Python XML.iter方法的具体用法?Python XML.iter怎么用?Python XML.iter使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lxml.etree.XML
的用法示例。
在下文中一共展示了XML.iter方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: validateFootnote
# 需要导入模块: from lxml.etree import XML [as 别名]
# 或者: from lxml.etree.XML import iter [as 别名]
def validateFootnote(modelXbrl, footnote):
#handler = TextBlockHandler(modelXbrl)
loadDTD(modelXbrl)
checkedGraphicsFiles = set() # only check any graphics file reference once per footnote
try:
footnoteHtml = XML("<body/>")
copyHtml(footnote, footnoteHtml)
if not edbodyDTD.validate( footnoteHtml ):
modelXbrl.error("EFM.6.05.34.dtdError",
_("Footnote %(xlinkLabel)s causes the XML error %(error)s"),
modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
error=', '.join(e.message for e in edbodyDTD.error_log.filter_from_errors()))
for elt in footnoteHtml.iter():
eltTag = elt.tag
for attrTag, attrValue in elt.items():
if ((attrTag == "href" and eltTag == "a") or
(attrTag == "src" and eltTag == "img")):
if "javascript:" in attrValue:
modelXbrl.error("EFM.6.05.34.activeContent",
_("Footnote %(xlinkLabel)s has javascript in '%(attribute)s' for <%(element)s>"),
modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
attribute=attrTag, element=eltTag)
elif attrValue.startswith("http://www.sec.gov/Archives/edgar/data/") and eltTag == "a":
pass
elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue:
modelXbrl.error("EFM.6.05.34.externalReference",
_("Footnote %(xlinkLabel)s has an invalid external reference in '%(attribute)s' for <%(element)s>: %(value)s"),
modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
attribute=attrTag, element=eltTag, value=attrValue)
if attrTag == "src" and attrValue not in checkedGraphicsFiles:
if attrValue.lower()[-4:] not in ('.jpg', '.gif'):
modelXbrl.error("EFM.6.05.34.graphicFileType",
_("Footnote %(xlinkLabel)s references a graphics file which isn't .gif or .jpg '%(attribute)s' for <%(element)s>"),
modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
attribute=attrValue, element=eltTag)
else: # test file contents
try:
if validateGraphicFile(footnote, attrValue) != attrValue.lower()[-3:]:
modelXbrl.error("EFM.6.05.34.graphicFileContent",
_("Footnote %(xlinkLabel)s references a graphics file which doesn't have expected content '%(attribute)s' for <%(element)s>"),
modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
attribute=attrValue, element=eltTag)
except IOError as err:
modelXbrl.error("EFM.6.05.34.graphicFileError",
_("Footnote %(xlinkLabel)s references a graphics file which isn't openable '%(attribute)s' for <%(element)s>, error: %(error)s"),
modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
attribute=attrValue, element=eltTag, error=err)
checkedGraphicsFiles.add(attrValue)
if eltTag == "table" and any(a is not None for a in elt.iterancestors("table")):
modelXbrl.error("EFM.6.05.34.nestedTable",
_("Footnote %(xlinkLabel)s has nested <table> elements."),
modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"))
except (XMLSyntaxError,
UnicodeDecodeError) as err:
#if not err.endswith("undefined entity"):
modelXbrl.error("EFM.6.05.34",
_("Footnote %(xlinkLabel)s causes the XML error %(error)s"),
modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
error=edbodyDTD.error_log.filter_from_errors())
示例2: validateFootnote
# 需要导入模块: from lxml.etree import XML [as 别名]
# 或者: from lxml.etree.XML import iter [as 别名]
def validateFootnote(modelXbrl, footnote):
# handler = TextBlockHandler(modelXbrl)
loadDTD(modelXbrl)
try:
footnoteHtml = XML("<body/>")
copyHtml(footnote, footnoteHtml)
if not edbodyDTD.validate(footnoteHtml):
modelXbrl.error(
"EFM.6.05.34",
_("Footnote %(xlinkLabel)s causes the XML error %(error)s"),
modelObject=footnote,
xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
error=", ".join(e.message for e in edbodyDTD.error_log.filter_from_errors()),
)
for elt in footnoteHtml.iter():
eltTag = elt.tag
for attrTag, attrValue in elt.items():
if (attrTag == "href" and eltTag == "a") or (attrTag == "src" and eltTag == "img"):
if "javascript:" in attrValue:
modelXbrl.error(
"EFM.6.05.34",
_("Footnote %(xlinkLabel)s has javascript in '%(attribute)s' for <%(element)s>"),
modelObject=footnote,
xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
attribute=attrTag,
element=eltTag,
)
elif attrValue.startswith("http://www.sec.gov/Archives/edgar/data/") and eltTag == "a":
pass
elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue:
modelXbrl.error(
"EFM.6.05.34",
_(
"Footnote %(xlinkLabel)s has an invalid external reference in '%(attribute)s' for <%(element)s>: %(value)s"
),
modelObject=footnote,
xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
attribute=attrTag,
element=eltTag,
value=attrValue,
)
except (XMLSyntaxError, UnicodeDecodeError) as err:
# if not err.endswith("undefined entity"):
modelXbrl.error(
"EFM.6.05.34",
_("Footnote %(xlinkLabel)s causes the XML error %(error)s"),
modelObject=footnote,
xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
error=edbodyDTD.error_log.filter_from_errors(),
)
示例3: validateTextBlockFacts
# 需要导入模块: from lxml.etree import XML [as 别名]
# 或者: from lxml.etree.XML import iter [as 别名]
def validateTextBlockFacts(modelXbrl):
#handler = TextBlockHandler(modelXbrl)
loadDTD(modelXbrl)
checkedGraphicsFiles = set() # only check any graphics file reference once per fact
for f1 in modelXbrl.facts:
# build keys table for 6.5.14
concept = f1.concept
if f1.xsiNil != "true" and \
concept is not None and \
concept.isTextBlock and \
XMLpattern.match(f1.value):
#handler.fact = f1
# test encoded entity tags
for match in namedEntityPattern.finditer(f1.value):
entity = match.group()
if not entity in xhtmlEntities:
modelXbrl.error(("EFM.6.05.16", "GFM.1.2.15"),
_("Fact %(fact)s contextID %(contextID)s has disallowed entity %(entity)s"),
modelObject=f1, fact=f1.qname, contextID=f1.contextID, entity=entity, error=entity)
# test html
for xmltext in [f1.value] + CDATApattern.findall(f1.value):
'''
try:
xml.sax.parseString(
"<?xml version='1.0' encoding='utf-8' ?>\n<body>\n{0}\n</body>\n".format(
removeEntities(xmltext)).encode('utf-8'),handler,handler)
except (xml.sax.SAXParseException,
xml.sax.SAXException,
UnicodeDecodeError) as err:
# ignore errors which are not errors (e.g., entity codes checked previously
if not err.endswith("undefined entity"):
handler.modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"),
_("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err)
'''
xmlBodyWithoutEntities = "<body>\n{0}\n</body>\n".format(removeEntities(xmltext))
try:
textblockXml = XML(xmlBodyWithoutEntities)
if not edbodyDTD.validate( textblockXml ):
errors = edbodyDTD.error_log.filter_from_errors()
htmlError = any(e.type_name in ("DTD_INVALID_CHILD", "DTD_UNKNOWN_ATTRIBUTE")
for e in errors)
modelXbrl.error("EFM.6.05.16" if htmlError else ("EFM.6.05.15.dtdError", "GFM.1.02.14"),
_("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
modelObject=f1, fact=f1.qname, contextID=f1.contextID,
error=', '.join(e.message for e in errors))
for elt in textblockXml.iter():
eltTag = elt.tag
for attrTag, attrValue in elt.items():
if ((attrTag == "href" and eltTag == "a") or
(attrTag == "src" and eltTag == "img")):
if "javascript:" in attrValue:
modelXbrl.error("EFM.6.05.16.activeContent",
_("Fact %(fact)s of context %(contextID)s has javascript in '%(attribute)s' for <%(element)s>"),
modelObject=f1, fact=f1.qname, contextID=f1.contextID,
attribute=attrTag, element=eltTag)
elif attrValue.startswith("http://www.sec.gov/Archives/edgar/data/") and eltTag == "a":
pass
elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue:
modelXbrl.error("EFM.6.05.16.externalReference",
_("Fact %(fact)s of context %(contextID)s has an invalid external reference in '%(attribute)s' for <%(element)s>"),
modelObject=f1, fact=f1.qname, contextID=f1.contextID,
attribute=attrTag, element=eltTag)
if attrTag == "src" and attrValue not in checkedGraphicsFiles:
if attrValue.lower()[-4:] not in ('.jpg', '.gif'):
modelXbrl.error("EFM.6.05.16.graphicFileType",
_("Fact %(fact)s of context %(contextID)s references a graphics file which isn't .gif or .jpg '%(attribute)s' for <%(element)s>"),
modelObject=f1, fact=f1.qname, contextID=f1.contextID,
attribute=attrValue, element=eltTag)
else: # test file contents
try:
if validateGraphicFile(f1, attrValue) != attrValue.lower()[-3:]:
modelXbrl.error("EFM.6.05.16.graphicFileContent",
_("Fact %(fact)s of context %(contextID)s references a graphics file which doesn't have expected content '%(attribute)s' for <%(element)s>"),
modelObject=f1, fact=f1.qname, contextID=f1.contextID,
attribute=attrValue, element=eltTag)
except IOError as err:
modelXbrl.error("EFM.6.05.16.graphicFileError",
_("Fact %(fact)s of context %(contextID)s references a graphics file which isn't openable '%(attribute)s' for <%(element)s>, error: %(error)s"),
modelObject=f1, fact=f1.qname, contextID=f1.contextID,
attribute=attrValue, element=eltTag, error=err)
checkedGraphicsFiles.add(attrValue)
if eltTag == "table" and any(a is not None for a in elt.iterancestors("table")):
modelXbrl.error("EFM.6.05.16.nestedTable",
_("Fact %(fact)s of context %(contextID)s has nested <table> elements."),
modelObject=f1, fact=f1.qname, contextID=f1.contextID)
except (XMLSyntaxError,
UnicodeDecodeError) as err:
#if not err.endswith("undefined entity"):
modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"),
_("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err)
checkedGraphicsFiles.clear()
示例4: validateTextBlockFacts
# 需要导入模块: from lxml.etree import XML [as 别名]
# 或者: from lxml.etree.XML import iter [as 别名]
def validateTextBlockFacts(modelXbrl):
# handler = TextBlockHandler(modelXbrl)
loadDTD(modelXbrl)
for f1 in modelXbrl.facts:
# build keys table for 6.5.14
concept = f1.concept
if f1.xsiNil != "true" and concept is not None and concept.isTextBlock and XMLpattern.match(f1.value):
# handler.fact = f1
# test encoded entity tags
for match in entityPattern.finditer(f1.value):
entity = match.group()
if not entity in xhtmlEntities:
modelXbrl.error(
("EFM.6.05.16", "GFM.1.2.15"),
_("Fact %(fact)s contextID %(contextID)s has disallowed entity %(entity)s"),
modelObject=f1,
fact=f1.qname,
contextID=f1.contextID,
entity=entity,
)
# test html
for xmltext in [f1.value] + CDATApattern.findall(f1.value):
"""
try:
xml.sax.parseString(
"<?xml version='1.0' encoding='utf-8' ?>\n<body>\n{0}\n</body>\n".format(
removeEntities(xmltext)).encode('utf-8'),handler,handler)
except (xml.sax.SAXParseException,
xml.sax.SAXException,
UnicodeDecodeError) as err:
# ignore errors which are not errors (e.g., entity codes checked previously
if not err.endswith("undefined entity"):
handler.modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"),
_("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err)
"""
try:
textblockXml = XML("<body>\n{0}\n</body>\n".format(removeEntities(xmltext)))
if not edbodyDTD.validate(textblockXml):
errors = edbodyDTD.error_log.filter_from_errors()
htmlError = any(e.type_name in ("DTD_INVALID_CHILD", "DTD_UNKNOWN_ATTRIBUTE") for e in errors)
modelXbrl.error(
"EFM.6.05.16" if htmlError else ("EFM.6.05.15", "GFM.1.02.14"),
_("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
modelObject=f1,
fact=f1.qname,
contextID=f1.contextID,
error=", ".join(e.message for e in errors),
)
for elt in textblockXml.iter():
eltTag = elt.tag
for attrTag, attrValue in elt.items():
if (attrTag == "href" and eltTag == "a") or (attrTag == "src" and eltTag == "img"):
if "javascript:" in attrValue:
modelXbrl.error(
"EFM.6.05.16",
_(
"Fact %(fact)s of context %(contextID) has javascript in '%(attribute)s' for <%(element)s>"
),
modelObject=f1,
fact=f1.qname,
contextID=f1.contextID,
attribute=attrTag,
element=eltTag,
)
elif attrValue.startswith("http://www.sec.gov/Archives/edgar/data/") and eltTag == "a":
pass
elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue:
modelXbrl.error(
"EFM.6.05.16",
_(
"Fact %(fact)s of context %(contextID) has an invalid external reference in '%(attribute)s' for <%(element)s>"
),
modelObject=f1,
fact=f1.qname,
contextID=f1.contextID,
attribute=attrTag,
element=eltTag,
)
except (XMLSyntaxError, UnicodeDecodeError) as err:
# if not err.endswith("undefined entity"):
modelXbrl.error(
("EFM.6.05.15", "GFM.1.02.14"),
_("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
modelObject=f1,
fact=f1.qname,
contextID=f1.contextID,
error=err,
)
示例5: createTargetInstance
# 需要导入模块: from lxml.etree import XML [as 别名]
# 或者: from lxml.etree.XML import iter [as 别名]
def createTargetInstance(modelXbrl, targetUrl, targetDocumentSchemaRefs, filingFiles, baseXmlLang=None, defaultXmlLang=None):
targetInstance = ModelXbrl.create(modelXbrl.modelManager,
newDocumentType=Type.INSTANCE,
url=targetUrl,
schemaRefs=targetDocumentSchemaRefs,
isEntry=True,
discover=False) # don't attempt to load DTS
if baseXmlLang:
targetInstance.modelDocument.xmlRootElement.set("{http://www.w3.org/XML/1998/namespace}lang", baseXmlLang)
if defaultXmlLang is None:
defaultXmlLang = baseXmlLang # allows facts/footnotes to override baseXmlLang
ValidateXbrlDimensions.loadDimensionDefaults(targetInstance) # need dimension defaults
# roleRef and arcroleRef (of each inline document)
for sourceRefs in (modelXbrl.targetRoleRefs, modelXbrl.targetArcroleRefs):
for roleRefElt in sourceRefs.values():
addChild(targetInstance.modelDocument.xmlRootElement, roleRefElt.qname,
attributes=roleRefElt.items())
# contexts
for context in sorted(modelXbrl.contexts.values(), key=lambda c: c.objectIndex): # contexts may come from multiple IXDS files
ignore = targetInstance.createContext(context.entityIdentifier[0],
context.entityIdentifier[1],
'instant' if context.isInstantPeriod else
'duration' if context.isStartEndPeriod
else 'forever',
context.startDatetime,
context.endDatetime,
None,
context.qnameDims, [], [],
id=context.id)
for unit in sorted(modelXbrl.units.values(), key=lambda u: u.objectIndex): # units may come from multiple IXDS files
measures = unit.measures
ignore = targetInstance.createUnit(measures[0], measures[1], id=unit.id)
modelXbrl.modelManager.showStatus(_("Creating and validating facts"))
newFactForOldObjId = {}
def createFacts(facts, parent):
for fact in facts:
if fact.isItem: # HF does not de-duplicate, which is currently-desired behavior
attrs = {"contextRef": fact.contextID}
if fact.id:
attrs["id"] = fact.id
if fact.isNumeric:
attrs["unitRef"] = fact.unitID
if fact.get("decimals"):
attrs["decimals"] = fact.get("decimals")
if fact.get("precision"):
attrs["precision"] = fact.get("precision")
if fact.isNil:
attrs[XbrlConst.qnXsiNil] = "true"
text = None
else:
text = fact.xValue if fact.xValid else fact.textValue
if fact.concept is not None and fact.concept.baseXsdType in ("string", "normalizedString"): # default
xmlLang = fact.xmlLang
if xmlLang is not None and xmlLang != defaultXmlLang:
attrs["{http://www.w3.org/XML/1998/namespace}lang"] = xmlLang
newFact = targetInstance.createFact(fact.qname, attributes=attrs, text=text, parent=parent)
# if fact.isFraction, create numerator and denominator
newFactForOldObjId[fact.objectIndex] = newFact
if filingFiles is not None and fact.concept is not None and fact.concept.isTextBlock:
# check for img and other filing references so that referenced files are included in the zip.
for xmltext in [text] + CDATApattern.findall(text):
try:
for elt in XML("<body>\n{0}\n</body>\n".format(xmltext)).iter():
addLocallyReferencedFile(elt, filingFiles)
except (XMLSyntaxError, UnicodeDecodeError):
pass # TODO: Why ignore UnicodeDecodeError?
elif fact.isTuple:
newTuple = targetInstance.createFact(fact.qname, parent=parent)
newFactForOldObjId[fact.objectIndex] = newTuple
createFacts(fact.modelTupleFacts, newTuple)
createFacts(modelXbrl.facts, None)
modelXbrl.modelManager.showStatus(_("Creating and validating footnotes and relationships"))
HREF = "{http://www.w3.org/1999/xlink}href"
footnoteLinks = defaultdict(list)
footnoteIdCount = {}
for linkKey, linkPrototypes in modelXbrl.baseSets.items():
arcrole, linkrole, linkqname, arcqname = linkKey
if (linkrole and linkqname and arcqname and # fully specified roles
arcrole != "XBRL-footnotes" and
any(lP.modelDocument.type == Type.INLINEXBRL for lP in linkPrototypes)):
for linkPrototype in linkPrototypes:
if linkPrototype not in footnoteLinks[linkrole]:
footnoteLinks[linkrole].append(linkPrototype)
for linkrole in sorted(footnoteLinks.keys()):
for linkPrototype in footnoteLinks[linkrole]:
newLink = addChild(targetInstance.modelDocument.xmlRootElement,
linkPrototype.qname,
attributes=linkPrototype.attributes)
for linkChild in linkPrototype:
attributes = linkChild.attributes
if isinstance(linkChild, LocPrototype):
if HREF not in linkChild.attributes:
linkChild.attributes[HREF] = \
"#" + elementFragmentIdentifier(newFactForOldObjId[linkChild.dereference().objectIndex])
addChild(newLink, linkChild.qname,
attributes=attributes)
elif isinstance(linkChild, ArcPrototype):
#.........这里部分代码省略.........
示例6: saveTargetDocument
# 需要导入模块: from lxml.etree import XML [as 别名]
# 或者: from lxml.etree.XML import iter [as 别名]
#.........这里部分代码省略.........
newCntx = targetInstance.createContext(context.entityIdentifier[0],
context.entityIdentifier[1],
'instant' if context.isInstantPeriod else
'duration' if context.isStartEndPeriod
else 'forever',
context.startDatetime,
context.endDatetime,
None,
context.qnameDims, [], [],
id=context.id)
for unit in modelXbrl.units.values():
measures = unit.measures
newUnit = targetInstance.createUnit(measures[0], measures[1], id=unit.id)
modelXbrl.modelManager.showStatus(_("Creating and validating facts"))
newFactForOldObjId = {}
def createFacts(facts, parent):
for fact in facts:
if fact.isItem:
attrs = {"contextRef": fact.contextID}
if fact.id:
attrs["id"] = fact.id
if fact.isNumeric:
attrs["unitRef"] = fact.unitID
if fact.get("decimals"):
attrs["decimals"] = fact.get("decimals")
if fact.get("precision"):
attrs["precision"] = fact.get("precision")
if fact.isNil:
attrs[XbrlConst.qnXsiNil] = "true"
text = None
else:
text = fact.xValue if fact.xValid else fact.textValue
newFact = targetInstance.createFact(fact.qname, attributes=attrs, text=text, parent=parent)
newFactForOldObjId[fact.objectIndex] = newFact
if filingFiles and fact.concept is not None and fact.concept.isTextBlock:
# check for img and other filing references
for xmltext in [text] + CDATApattern.findall(text):
try:
for elt in XML("<body>\n{0}\n</body>\n".format(xmltext)):
if elt.tag in ("a", "img") and not isHttpUrl(attrValue) and not os.path.isabs(attrvalue):
for attrTag, attrValue in elt.items():
if attrTag in ("href", "src"):
filingFiles.add(attrValue)
except (XMLSyntaxError, UnicodeDecodeError):
pass
elif fact.isTuple:
newTuple = targetInstance.createFact(fact.qname, parent=parent)
newFactForOldObjId[fact.objectIndex] = newTuple
createFacts(fact.modelTupleFacts, newTuple)
createFacts(modelXbrl.facts, None)
# footnote links
footnoteIdCount = {}
modelXbrl.modelManager.showStatus(_("Creating and validating footnotes & relationships"))
HREF = "{http://www.w3.org/1999/xlink}href"
footnoteLinks = defaultdict(list)
for linkKey, linkPrototypes in modelXbrl.baseSets.items():
arcrole, linkrole, linkqname, arcqname = linkKey
if (linkrole and linkqname and arcqname and # fully specified roles
arcrole != "XBRL-footnotes" and
any(lP.modelDocument.type == Type.INLINEXBRL for lP in linkPrototypes)):
for linkPrototype in linkPrototypes:
if linkPrototype not in footnoteLinks[linkrole]:
footnoteLinks[linkrole].append(linkPrototype)
for linkrole in sorted(footnoteLinks.keys()):
for linkPrototype in footnoteLinks[linkrole]:
newLink = addChild(targetInstance.modelDocument.xmlRootElement,
linkPrototype.qname,
attributes=linkPrototype.attributes)
for linkChild in linkPrototype:
attributes = linkChild.attributes
if isinstance(linkChild, LocPrototype):
if HREF not in linkChild.attributes:
linkChild.attributes[HREF] = \
"#" + elementFragmentIdentifier(newFactForOldObjId[linkChild.dereference().objectIndex])
addChild(newLink, linkChild.qname,
attributes=attributes)
elif isinstance(linkChild, ArcPrototype):
addChild(newLink, linkChild.qname, attributes=attributes)
elif isinstance(linkChild, ModelInlineFootnote):
idUseCount = footnoteIdCount.get(linkChild.footnoteID, 0) + 1
if idUseCount > 1: # if footnote with id in other links bump the id number
attributes = linkChild.attributes.copy()
attributes["id"] = "{}_{}".format(attributes["id"], idUseCount)
footnoteIdCount[linkChild.footnoteID] = idUseCount
newChild = addChild(newLink, linkChild.qname,
attributes=attributes)
copyIxFootnoteHtml(linkChild, newChild, withText=True)
if filingFiles and linkChild.textValue:
footnoteHtml = XML("<body/>")
copyIxFootnoteHtml(linkChild, footnoteHtml)
for elt in footnoteHtml.iter():
if elt.tag in ("a", "img"):
for attrTag, attrValue in elt.items():
if attrTag in ("href", "src") and not isHttpUrl(attrValue) and not os.path.isabs(attrvalue):
filingFiles.add(attrValue)
targetInstance.saveInstance(overrideFilepath=targetUrl, outputZip=outputZip)
modelXbrl.modelManager.showStatus(_("Saved extracted instance"), 5000)
示例7: validateTextBlockFacts
# 需要导入模块: from lxml.etree import XML [as 别名]
# 或者: from lxml.etree.XML import iter [as 别名]
def validateTextBlockFacts(modelXbrl):
#handler = TextBlockHandler(modelXbrl)
loadDTD(modelXbrl)
checkedGraphicsFiles = set() # only check any graphics file reference once per fact
allowedExternalHrefPattern = modelXbrl.modelManager.disclosureSystem.allowedExternalHrefPattern
if isInlineDTD:
htmlBodyTemplate = "<body><div>\n{0}\n</div></body>\n"
else:
htmlBodyTemplate = "<body>\n{0}\n</body>\n"
_xhtmlNs = "{{{}}}".format(xhtml)
_xhtmlNsLen = len(_xhtmlNs)
for f1 in modelXbrl.facts:
# build keys table for 6.5.14
concept = f1.concept
if f1.xsiNil != "true" and \
concept is not None and \
concept.isTextBlock and \
XMLpattern.match(f1.value):
#handler.fact = f1
# test encoded entity tags
for match in namedEntityPattern.finditer(f1.value):
entity = match.group()
if not entity in xhtmlEntities:
modelXbrl.error(("EFM.6.05.16", "GFM.1.2.15"),
_("Fact %(fact)s contextID %(contextID)s has disallowed entity %(entity)s"),
modelObject=f1, fact=f1.qname, contextID=f1.contextID, entity=entity, error=entity)
# test html
for xmltext in [f1.value] + CDATApattern.findall(f1.value):
'''
try:
xml.sax.parseString(
"<?xml version='1.0' encoding='utf-8' ?>\n<body>\n{0}\n</body>\n".format(
removeEntities(xmltext)).encode('utf-8'),handler,handler)
except (xml.sax.SAXParseException,
xml.sax.SAXException,
UnicodeDecodeError) as err:
# ignore errors which are not errors (e.g., entity codes checked previously
if not err.endswith("undefined entity"):
handler.modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"),
_("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err)
'''
xmlBodyWithoutEntities = htmlBodyTemplate.format(removeEntities(xmltext))
try:
textblockXml = XML(xmlBodyWithoutEntities)
if not edbodyDTD.validate( textblockXml ):
errors = edbodyDTD.error_log.filter_from_errors()
htmlError = any(e.type_name in ("DTD_INVALID_CHILD", "DTD_UNKNOWN_ATTRIBUTE")
for e in errors)
modelXbrl.error("EFM.6.05.16" if htmlError else ("EFM.6.05.15.dtdError", "GFM.1.02.14"),
_("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
modelObject=f1, fact=f1.qname, contextID=f1.contextID,
error=', '.join(e.message for e in errors),
messageCodes=("EFM.6.05.16", "EFM.6.05.15.dtdError", "GFM.1.02.14"))
for elt in textblockXml.iter():
eltTag = elt.tag
if isinstance(elt, ModelObject) and elt.namespaceURI == xhtml:
eltTag = elt.localName
elif isinstance(elt, (_ElementTree, _Comment, _ProcessingInstruction)):
continue # comment or other non-parsed element
else:
eltTag = elt.tag
if eltTag.startswith(_xhtmlNs):
eltTag = eltTag[_xhtmlNsLen:]
if isInlineDTD and eltTag in efmBlockedInlineHtmlElements:
modelXbrl.error("EFM.5.02.05.disallowedElement",
_("%(validatedObjectLabel)s has disallowed element <%(element)s>"),
modelObject=elt, validatedObjectLabel=f1.qname,
element=eltTag)
for attrTag, attrValue in elt.items():
if isInlineDTD:
if attrTag in efmBlockedInlineHtmlElementAttributes.get(eltTag,()):
modelXbrl.error("EFM.5.02.05.disallowedAttribute",
_("%(validatedObjectLabel)s has disallowed attribute on element <%(element)s>: %(attribute)s=\"%(value)s\""),
modelObject=elt, validatedObjectLabel=validatedObjectLabel,
element=eltTag, attribute=attrTag, value=attrValue)
if ((attrTag == "href" and eltTag == "a") or
(attrTag == "src" and eltTag == "img")):
if "javascript:" in attrValue:
modelXbrl.error("EFM.6.05.16.activeContent",
_("Fact %(fact)s of context %(contextID)s has javascript in '%(attribute)s' for <%(element)s>"),
modelObject=f1, fact=f1.qname, contextID=f1.contextID,
attribute=attrTag, element=eltTag)
elif eltTag == "a" and (not allowedExternalHrefPattern or allowedExternalHrefPattern.match(attrValue)):
pass
elif scheme(attrValue) in ("http", "https", "ftp"):
modelXbrl.error("EFM.6.05.16.externalReference",
_("Fact %(fact)s of context %(contextID)s has an invalid external reference in '%(attribute)s' for <%(element)s>"),
modelObject=f1, fact=f1.qname, contextID=f1.contextID,
attribute=attrTag, element=eltTag)
if attrTag == "src" and attrValue not in checkedGraphicsFiles:
if scheme(attrValue) == "data":
modelXbrl.error("EFM.6.05.16.graphicDataUrl",
_("Fact %(fact)s of context %(contextID)s references a graphics data URL which isn't accepted '%(attribute)s' for <%(element)s>"),
modelObject=f1, fact=f1.qname, contextID=f1.contextID,
attribute=attrValue[:32], element=eltTag)
elif attrValue.lower()[-4:] not in ('.jpg', '.gif'):
modelXbrl.error("EFM.6.05.16.graphicFileType",
#.........这里部分代码省略.........