当前位置: 首页>>代码示例>>Python>>正文


Python XML.iter方法代码示例

本文整理汇总了Python中lxml.etree.XML.iter方法的典型用法代码示例。如果您正苦于以下问题:Python XML.iter方法的具体用法?Python XML.iter怎么用?Python XML.iter使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在lxml.etree.XML的用法示例。


在下文中一共展示了XML.iter方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: validateFootnote

# 需要导入模块: from lxml.etree import XML [as 别名]
# 或者: from lxml.etree.XML import iter [as 别名]
def validateFootnote(modelXbrl, footnote):
    #handler = TextBlockHandler(modelXbrl)
    loadDTD(modelXbrl)
    checkedGraphicsFiles = set() # only check any graphics file reference once per footnote
    
    try:
        footnoteHtml = XML("<body/>")
        copyHtml(footnote, footnoteHtml)
        if not edbodyDTD.validate( footnoteHtml ):
            modelXbrl.error("EFM.6.05.34.dtdError",
                _("Footnote %(xlinkLabel)s causes the XML error %(error)s"),
                modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                error=', '.join(e.message for e in edbodyDTD.error_log.filter_from_errors()))
        for elt in footnoteHtml.iter():
            eltTag = elt.tag
            for attrTag, attrValue in elt.items():
                if ((attrTag == "href" and eltTag == "a") or 
                    (attrTag == "src" and eltTag == "img")):
                    if "javascript:" in attrValue:
                        modelXbrl.error("EFM.6.05.34.activeContent",
                            _("Footnote %(xlinkLabel)s has javascript in '%(attribute)s' for <%(element)s>"),
                            modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                            attribute=attrTag, element=eltTag)
                    elif attrValue.startswith("http://www.sec.gov/Archives/edgar/data/") and eltTag == "a":
                        pass
                    elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue:
                        modelXbrl.error("EFM.6.05.34.externalReference",
                            _("Footnote %(xlinkLabel)s has an invalid external reference in '%(attribute)s' for <%(element)s>: %(value)s"),
                            modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                            attribute=attrTag, element=eltTag, value=attrValue)
                    if attrTag == "src" and attrValue not in checkedGraphicsFiles:
                        if attrValue.lower()[-4:] not in ('.jpg', '.gif'):
                            modelXbrl.error("EFM.6.05.34.graphicFileType",
                                _("Footnote %(xlinkLabel)s references a graphics file which isn't .gif or .jpg '%(attribute)s' for <%(element)s>"),
                                modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                                attribute=attrValue, element=eltTag)
                        else:   # test file contents
                            try:
                                if validateGraphicFile(footnote, attrValue) != attrValue.lower()[-3:]:
                                    modelXbrl.error("EFM.6.05.34.graphicFileContent",
                                        _("Footnote %(xlinkLabel)s references a graphics file which doesn't have expected content '%(attribute)s' for <%(element)s>"),
                                        modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                                        attribute=attrValue, element=eltTag)
                            except IOError as err:
                                modelXbrl.error("EFM.6.05.34.graphicFileError",
                                    _("Footnote %(xlinkLabel)s references a graphics file which isn't openable '%(attribute)s' for <%(element)s>, error: %(error)s"),
                                    modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                                    attribute=attrValue, element=eltTag, error=err)
                        checkedGraphicsFiles.add(attrValue)
            if eltTag == "table" and any(a is not None for a in elt.iterancestors("table")):
                modelXbrl.error("EFM.6.05.34.nestedTable",
                    _("Footnote %(xlinkLabel)s has nested <table> elements."),
                    modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"))
    except (XMLSyntaxError,
            UnicodeDecodeError) as err:
        #if not err.endswith("undefined entity"):
        modelXbrl.error("EFM.6.05.34",
            _("Footnote %(xlinkLabel)s causes the XML error %(error)s"),
            modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
            error=edbodyDTD.error_log.filter_from_errors())
开发者ID:andygreener,项目名称:Arelle,代码行数:62,代码来源:ValidateFilingText.py

示例2: validateFootnote

# 需要导入模块: from lxml.etree import XML [as 别名]
# 或者: from lxml.etree.XML import iter [as 别名]
def validateFootnote(modelXbrl, footnote):
    # handler = TextBlockHandler(modelXbrl)
    loadDTD(modelXbrl)

    try:
        footnoteHtml = XML("<body/>")
        copyHtml(footnote, footnoteHtml)
        if not edbodyDTD.validate(footnoteHtml):
            modelXbrl.error(
                "EFM.6.05.34",
                _("Footnote %(xlinkLabel)s causes the XML error %(error)s"),
                modelObject=footnote,
                xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                error=", ".join(e.message for e in edbodyDTD.error_log.filter_from_errors()),
            )
        for elt in footnoteHtml.iter():
            eltTag = elt.tag
            for attrTag, attrValue in elt.items():
                if (attrTag == "href" and eltTag == "a") or (attrTag == "src" and eltTag == "img"):
                    if "javascript:" in attrValue:
                        modelXbrl.error(
                            "EFM.6.05.34",
                            _("Footnote %(xlinkLabel)s has javascript in '%(attribute)s' for <%(element)s>"),
                            modelObject=footnote,
                            xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                            attribute=attrTag,
                            element=eltTag,
                        )
                    elif attrValue.startswith("http://www.sec.gov/Archives/edgar/data/") and eltTag == "a":
                        pass
                    elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue:
                        modelXbrl.error(
                            "EFM.6.05.34",
                            _(
                                "Footnote %(xlinkLabel)s has an invalid external reference in '%(attribute)s' for <%(element)s>: %(value)s"
                            ),
                            modelObject=footnote,
                            xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
                            attribute=attrTag,
                            element=eltTag,
                            value=attrValue,
                        )
    except (XMLSyntaxError, UnicodeDecodeError) as err:
        # if not err.endswith("undefined entity"):
        modelXbrl.error(
            "EFM.6.05.34",
            _("Footnote %(xlinkLabel)s causes the XML error %(error)s"),
            modelObject=footnote,
            xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"),
            error=edbodyDTD.error_log.filter_from_errors(),
        )
开发者ID:rheimbuch,项目名称:Arelle,代码行数:53,代码来源:ValidateFilingText.py

示例3: validateTextBlockFacts

# 需要导入模块: from lxml.etree import XML [as 别名]
# 或者: from lxml.etree.XML import iter [as 别名]
def validateTextBlockFacts(modelXbrl):
    #handler = TextBlockHandler(modelXbrl)
    loadDTD(modelXbrl)
    checkedGraphicsFiles = set() #  only check any graphics file reference once per fact
    
    for f1 in modelXbrl.facts:
        # build keys table for 6.5.14
        concept = f1.concept
        if f1.xsiNil != "true" and \
           concept is not None and \
           concept.isTextBlock and \
           XMLpattern.match(f1.value):
            #handler.fact = f1
            # test encoded entity tags
            for match in namedEntityPattern.finditer(f1.value):
                entity = match.group()
                if not entity in xhtmlEntities:
                    modelXbrl.error(("EFM.6.05.16", "GFM.1.2.15"),
                        _("Fact %(fact)s contextID %(contextID)s has disallowed entity %(entity)s"),
                        modelObject=f1, fact=f1.qname, contextID=f1.contextID, entity=entity, error=entity)
            # test html
            for xmltext in [f1.value] + CDATApattern.findall(f1.value):
                '''
                try:
                    xml.sax.parseString(
                        "<?xml version='1.0' encoding='utf-8' ?>\n<body>\n{0}\n</body>\n".format(
                         removeEntities(xmltext)).encode('utf-8'),handler,handler)
                except (xml.sax.SAXParseException,
                        xml.sax.SAXException,
                        UnicodeDecodeError) as err:
                    # ignore errors which are not errors (e.g., entity codes checked previously
                    if not err.endswith("undefined entity"):
                        handler.modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"),
                            _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
                            modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err)
                '''
                xmlBodyWithoutEntities = "<body>\n{0}\n</body>\n".format(removeEntities(xmltext))
                try:
                    textblockXml = XML(xmlBodyWithoutEntities)
                    if not edbodyDTD.validate( textblockXml ):
                        errors = edbodyDTD.error_log.filter_from_errors()
                        htmlError = any(e.type_name in ("DTD_INVALID_CHILD", "DTD_UNKNOWN_ATTRIBUTE") 
                                        for e in errors)
                        modelXbrl.error("EFM.6.05.16" if htmlError else ("EFM.6.05.15.dtdError", "GFM.1.02.14"),
                            _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
                            modelObject=f1, fact=f1.qname, contextID=f1.contextID, 
                            error=', '.join(e.message for e in errors))
                    for elt in textblockXml.iter():
                        eltTag = elt.tag
                        for attrTag, attrValue in elt.items():
                            if ((attrTag == "href" and eltTag == "a") or 
                                (attrTag == "src" and eltTag == "img")):
                                if "javascript:" in attrValue:
                                    modelXbrl.error("EFM.6.05.16.activeContent",
                                        _("Fact %(fact)s of context %(contextID)s has javascript in '%(attribute)s' for <%(element)s>"),
                                        modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                        attribute=attrTag, element=eltTag)
                                elif attrValue.startswith("http://www.sec.gov/Archives/edgar/data/") and eltTag == "a":
                                    pass
                                elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue:
                                    modelXbrl.error("EFM.6.05.16.externalReference",
                                        _("Fact %(fact)s of context %(contextID)s has an invalid external reference in '%(attribute)s' for <%(element)s>"),
                                        modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                        attribute=attrTag, element=eltTag)
                                if attrTag == "src" and attrValue not in checkedGraphicsFiles:
                                    if attrValue.lower()[-4:] not in ('.jpg', '.gif'):
                                        modelXbrl.error("EFM.6.05.16.graphicFileType",
                                            _("Fact %(fact)s of context %(contextID)s references a graphics file which isn't .gif or .jpg '%(attribute)s' for <%(element)s>"),
                                            modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                            attribute=attrValue, element=eltTag)
                                    else:   # test file contents
                                        try:
                                            if validateGraphicFile(f1, attrValue) != attrValue.lower()[-3:]:
                                                modelXbrl.error("EFM.6.05.16.graphicFileContent",
                                                    _("Fact %(fact)s of context %(contextID)s references a graphics file which doesn't have expected content '%(attribute)s' for <%(element)s>"),
                                                    modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                                    attribute=attrValue, element=eltTag)
                                        except IOError as err:
                                            modelXbrl.error("EFM.6.05.16.graphicFileError",
                                                _("Fact %(fact)s of context %(contextID)s references a graphics file which isn't openable '%(attribute)s' for <%(element)s>, error: %(error)s"),
                                                modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                                attribute=attrValue, element=eltTag, error=err)
                                    checkedGraphicsFiles.add(attrValue)
                        if eltTag == "table" and any(a is not None for a in elt.iterancestors("table")):
                            modelXbrl.error("EFM.6.05.16.nestedTable",
                                _("Fact %(fact)s of context %(contextID)s has nested <table> elements."),
                                modelObject=f1, fact=f1.qname, contextID=f1.contextID)
                except (XMLSyntaxError,
                        UnicodeDecodeError) as err:
                    #if not err.endswith("undefined entity"):
                    modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"),
                        _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
                        modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err)
                    
                checkedGraphicsFiles.clear()
开发者ID:andygreener,项目名称:Arelle,代码行数:97,代码来源:ValidateFilingText.py

示例4: validateTextBlockFacts

# 需要导入模块: from lxml.etree import XML [as 别名]
# 或者: from lxml.etree.XML import iter [as 别名]
def validateTextBlockFacts(modelXbrl):
    # handler = TextBlockHandler(modelXbrl)
    loadDTD(modelXbrl)

    for f1 in modelXbrl.facts:
        # build keys table for 6.5.14
        concept = f1.concept
        if f1.xsiNil != "true" and concept is not None and concept.isTextBlock and XMLpattern.match(f1.value):
            # handler.fact = f1
            # test encoded entity tags
            for match in entityPattern.finditer(f1.value):
                entity = match.group()
                if not entity in xhtmlEntities:
                    modelXbrl.error(
                        ("EFM.6.05.16", "GFM.1.2.15"),
                        _("Fact %(fact)s contextID %(contextID)s has disallowed entity %(entity)s"),
                        modelObject=f1,
                        fact=f1.qname,
                        contextID=f1.contextID,
                        entity=entity,
                    )
            # test html
            for xmltext in [f1.value] + CDATApattern.findall(f1.value):
                """
                try:
                    xml.sax.parseString(
                        "<?xml version='1.0' encoding='utf-8' ?>\n<body>\n{0}\n</body>\n".format(
                         removeEntities(xmltext)).encode('utf-8'),handler,handler)
                except (xml.sax.SAXParseException,
                        xml.sax.SAXException,
                        UnicodeDecodeError) as err:
                    # ignore errors which are not errors (e.g., entity codes checked previously
                    if not err.endswith("undefined entity"):
                        handler.modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"),
                            _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
                            modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err)
                """
                try:
                    textblockXml = XML("<body>\n{0}\n</body>\n".format(removeEntities(xmltext)))
                    if not edbodyDTD.validate(textblockXml):
                        errors = edbodyDTD.error_log.filter_from_errors()
                        htmlError = any(e.type_name in ("DTD_INVALID_CHILD", "DTD_UNKNOWN_ATTRIBUTE") for e in errors)
                        modelXbrl.error(
                            "EFM.6.05.16" if htmlError else ("EFM.6.05.15", "GFM.1.02.14"),
                            _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
                            modelObject=f1,
                            fact=f1.qname,
                            contextID=f1.contextID,
                            error=", ".join(e.message for e in errors),
                        )
                    for elt in textblockXml.iter():
                        eltTag = elt.tag
                        for attrTag, attrValue in elt.items():
                            if (attrTag == "href" and eltTag == "a") or (attrTag == "src" and eltTag == "img"):
                                if "javascript:" in attrValue:
                                    modelXbrl.error(
                                        "EFM.6.05.16",
                                        _(
                                            "Fact %(fact)s of context %(contextID) has javascript in '%(attribute)s' for <%(element)s>"
                                        ),
                                        modelObject=f1,
                                        fact=f1.qname,
                                        contextID=f1.contextID,
                                        attribute=attrTag,
                                        element=eltTag,
                                    )
                                elif attrValue.startswith("http://www.sec.gov/Archives/edgar/data/") and eltTag == "a":
                                    pass
                                elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue:
                                    modelXbrl.error(
                                        "EFM.6.05.16",
                                        _(
                                            "Fact %(fact)s of context %(contextID) has an invalid external reference in '%(attribute)s' for <%(element)s>"
                                        ),
                                        modelObject=f1,
                                        fact=f1.qname,
                                        contextID=f1.contextID,
                                        attribute=attrTag,
                                        element=eltTag,
                                    )
                except (XMLSyntaxError, UnicodeDecodeError) as err:
                    # if not err.endswith("undefined entity"):
                    modelXbrl.error(
                        ("EFM.6.05.15", "GFM.1.02.14"),
                        _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
                        modelObject=f1,
                        fact=f1.qname,
                        contextID=f1.contextID,
                        error=err,
                    )
开发者ID:rheimbuch,项目名称:Arelle,代码行数:92,代码来源:ValidateFilingText.py

示例5: createTargetInstance

# 需要导入模块: from lxml.etree import XML [as 别名]
# 或者: from lxml.etree.XML import iter [as 别名]
def createTargetInstance(modelXbrl, targetUrl, targetDocumentSchemaRefs, filingFiles, baseXmlLang=None, defaultXmlLang=None):
    targetInstance = ModelXbrl.create(modelXbrl.modelManager,
                                      newDocumentType=Type.INSTANCE,
                                      url=targetUrl,
                                      schemaRefs=targetDocumentSchemaRefs,
                                      isEntry=True,
                                      discover=False) # don't attempt to load DTS
    if baseXmlLang:
        targetInstance.modelDocument.xmlRootElement.set("{http://www.w3.org/XML/1998/namespace}lang", baseXmlLang)
        if defaultXmlLang is None:
            defaultXmlLang = baseXmlLang # allows facts/footnotes to override baseXmlLang
    ValidateXbrlDimensions.loadDimensionDefaults(targetInstance) # need dimension defaults 
    # roleRef and arcroleRef (of each inline document)
    for sourceRefs in (modelXbrl.targetRoleRefs, modelXbrl.targetArcroleRefs):
        for roleRefElt in sourceRefs.values():
            addChild(targetInstance.modelDocument.xmlRootElement, roleRefElt.qname,
                     attributes=roleRefElt.items())
    
    # contexts
    for context in sorted(modelXbrl.contexts.values(), key=lambda c: c.objectIndex): # contexts may come from multiple IXDS files
        ignore = targetInstance.createContext(context.entityIdentifier[0],
                                               context.entityIdentifier[1],
                                               'instant' if context.isInstantPeriod else
                                               'duration' if context.isStartEndPeriod
                                               else 'forever',
                                               context.startDatetime,
                                               context.endDatetime,
                                               None,
                                               context.qnameDims, [], [],
                                               id=context.id)
    for unit in sorted(modelXbrl.units.values(), key=lambda u: u.objectIndex): # units may come from multiple IXDS files
        measures = unit.measures
        ignore = targetInstance.createUnit(measures[0], measures[1], id=unit.id)

    modelXbrl.modelManager.showStatus(_("Creating and validating facts"))
    newFactForOldObjId = {}
    def createFacts(facts, parent):
        for fact in facts:
            if fact.isItem: # HF does not de-duplicate, which is currently-desired behavior
                attrs = {"contextRef": fact.contextID}
                if fact.id:
                    attrs["id"] = fact.id
                if fact.isNumeric:
                    attrs["unitRef"] = fact.unitID
                    if fact.get("decimals"):
                        attrs["decimals"] = fact.get("decimals")
                    if fact.get("precision"):
                        attrs["precision"] = fact.get("precision")
                if fact.isNil:
                    attrs[XbrlConst.qnXsiNil] = "true"
                    text = None
                else:
                    text = fact.xValue if fact.xValid else fact.textValue
                    if fact.concept is not None and fact.concept.baseXsdType in ("string", "normalizedString"): # default
                        xmlLang = fact.xmlLang
                        if xmlLang is not None and xmlLang != defaultXmlLang:
                            attrs["{http://www.w3.org/XML/1998/namespace}lang"] = xmlLang
                newFact = targetInstance.createFact(fact.qname, attributes=attrs, text=text, parent=parent)
                # if fact.isFraction, create numerator and denominator
                newFactForOldObjId[fact.objectIndex] = newFact
                if filingFiles is not None and fact.concept is not None and fact.concept.isTextBlock:
                    # check for img and other filing references so that referenced files are included in the zip.
                    for xmltext in [text] + CDATApattern.findall(text):
                        try:
                            for elt in XML("<body>\n{0}\n</body>\n".format(xmltext)).iter():
                                addLocallyReferencedFile(elt, filingFiles)
                        except (XMLSyntaxError, UnicodeDecodeError):
                            pass  # TODO: Why ignore UnicodeDecodeError?
            elif fact.isTuple:
                newTuple = targetInstance.createFact(fact.qname, parent=parent)
                newFactForOldObjId[fact.objectIndex] = newTuple
                createFacts(fact.modelTupleFacts, newTuple)
                
    createFacts(modelXbrl.facts, None)
    modelXbrl.modelManager.showStatus(_("Creating and validating footnotes and relationships"))
    HREF = "{http://www.w3.org/1999/xlink}href"
    footnoteLinks = defaultdict(list)
    footnoteIdCount = {}
    for linkKey, linkPrototypes in modelXbrl.baseSets.items():
        arcrole, linkrole, linkqname, arcqname = linkKey
        if (linkrole and linkqname and arcqname and  # fully specified roles
            arcrole != "XBRL-footnotes" and
            any(lP.modelDocument.type == Type.INLINEXBRL for lP in linkPrototypes)):
            for linkPrototype in linkPrototypes:
                if linkPrototype not in footnoteLinks[linkrole]:
                    footnoteLinks[linkrole].append(linkPrototype)
    for linkrole in sorted(footnoteLinks.keys()):
        for linkPrototype in footnoteLinks[linkrole]:
            newLink = addChild(targetInstance.modelDocument.xmlRootElement, 
                               linkPrototype.qname, 
                               attributes=linkPrototype.attributes)
            for linkChild in linkPrototype:
                attributes = linkChild.attributes
                if isinstance(linkChild, LocPrototype):
                    if HREF not in linkChild.attributes:
                        linkChild.attributes[HREF] = \
                        "#" + elementFragmentIdentifier(newFactForOldObjId[linkChild.dereference().objectIndex])
                    addChild(newLink, linkChild.qname, 
                             attributes=attributes)
                elif isinstance(linkChild, ArcPrototype):
#.........这里部分代码省略.........
开发者ID:Arelle,项目名称:Arelle,代码行数:103,代码来源:inlineXbrlDocumentSet.py

示例6: saveTargetDocument

# 需要导入模块: from lxml.etree import XML [as 别名]
# 或者: from lxml.etree.XML import iter [as 别名]

#.........这里部分代码省略.........
        newCntx = targetInstance.createContext(context.entityIdentifier[0],
                                               context.entityIdentifier[1],
                                               'instant' if context.isInstantPeriod else
                                               'duration' if context.isStartEndPeriod
                                               else 'forever',
                                               context.startDatetime,
                                               context.endDatetime,
                                               None, 
                                               context.qnameDims, [], [],
                                               id=context.id)
    for unit in modelXbrl.units.values():
        measures = unit.measures
        newUnit = targetInstance.createUnit(measures[0], measures[1], id=unit.id)

    modelXbrl.modelManager.showStatus(_("Creating and validating facts"))
    newFactForOldObjId = {}
    def createFacts(facts, parent):
        for fact in facts:
            if fact.isItem:
                attrs = {"contextRef": fact.contextID}
                if fact.id:
                    attrs["id"] = fact.id
                if fact.isNumeric:
                    attrs["unitRef"] = fact.unitID
                    if fact.get("decimals"):
                        attrs["decimals"] = fact.get("decimals")
                    if fact.get("precision"):
                        attrs["precision"] = fact.get("precision")
                if fact.isNil:
                    attrs[XbrlConst.qnXsiNil] = "true"
                    text = None
                else:
                    text = fact.xValue if fact.xValid else fact.textValue
                newFact = targetInstance.createFact(fact.qname, attributes=attrs, text=text, parent=parent)
                newFactForOldObjId[fact.objectIndex] = newFact
                if filingFiles and fact.concept is not None and fact.concept.isTextBlock:
                    # check for img and other filing references
                    for xmltext in [text] + CDATApattern.findall(text):
                        try:
                            for elt in XML("<body>\n{0}\n</body>\n".format(xmltext)):
                                if elt.tag in ("a", "img") and not isHttpUrl(attrValue) and not os.path.isabs(attrvalue):
                                    for attrTag, attrValue in elt.items():
                                        if attrTag in ("href", "src"):
                                            filingFiles.add(attrValue)
                        except (XMLSyntaxError, UnicodeDecodeError):
                            pass
            elif fact.isTuple:
                newTuple = targetInstance.createFact(fact.qname, parent=parent)
                newFactForOldObjId[fact.objectIndex] = newTuple
                createFacts(fact.modelTupleFacts, newTuple)
                
    createFacts(modelXbrl.facts, None)
    # footnote links
    footnoteIdCount = {}
    modelXbrl.modelManager.showStatus(_("Creating and validating footnotes & relationships"))
    HREF = "{http://www.w3.org/1999/xlink}href"
    footnoteLinks = defaultdict(list)
    for linkKey, linkPrototypes in modelXbrl.baseSets.items():
        arcrole, linkrole, linkqname, arcqname = linkKey
        if (linkrole and linkqname and arcqname and # fully specified roles
            arcrole != "XBRL-footnotes" and
            any(lP.modelDocument.type == Type.INLINEXBRL for lP in linkPrototypes)):
            for linkPrototype in linkPrototypes:
                if linkPrototype not in footnoteLinks[linkrole]:
                    footnoteLinks[linkrole].append(linkPrototype)
    for linkrole in sorted(footnoteLinks.keys()):
        for linkPrototype in footnoteLinks[linkrole]:
            newLink = addChild(targetInstance.modelDocument.xmlRootElement, 
                               linkPrototype.qname, 
                               attributes=linkPrototype.attributes)
            for linkChild in linkPrototype:
                attributes = linkChild.attributes
                if isinstance(linkChild, LocPrototype):
                    if HREF not in linkChild.attributes:
                        linkChild.attributes[HREF] = \
                        "#" + elementFragmentIdentifier(newFactForOldObjId[linkChild.dereference().objectIndex])
                    addChild(newLink, linkChild.qname, 
                             attributes=attributes)
                elif isinstance(linkChild, ArcPrototype):
                    addChild(newLink, linkChild.qname, attributes=attributes)
                elif isinstance(linkChild, ModelInlineFootnote):
                    idUseCount = footnoteIdCount.get(linkChild.footnoteID, 0) + 1
                    if idUseCount > 1: # if footnote with id in other links bump the id number
                        attributes = linkChild.attributes.copy()
                        attributes["id"] = "{}_{}".format(attributes["id"], idUseCount)
                    footnoteIdCount[linkChild.footnoteID] = idUseCount
                    newChild = addChild(newLink, linkChild.qname, 
                                        attributes=attributes)
                    copyIxFootnoteHtml(linkChild, newChild, withText=True)
                    if filingFiles and linkChild.textValue:
                        footnoteHtml = XML("<body/>")
                        copyIxFootnoteHtml(linkChild, footnoteHtml)
                        for elt in footnoteHtml.iter():
                            if elt.tag in ("a", "img"):
                                for attrTag, attrValue in elt.items():
                                    if attrTag in ("href", "src") and not isHttpUrl(attrValue) and not os.path.isabs(attrvalue):
                                        filingFiles.add(attrValue)
        
    targetInstance.saveInstance(overrideFilepath=targetUrl, outputZip=outputZip)
    modelXbrl.modelManager.showStatus(_("Saved extracted instance"), 5000)
开发者ID:javascriptgeek,项目名称:Arelle,代码行数:104,代码来源:inlineXbrlDocumentSet.py

示例7: validateTextBlockFacts

# 需要导入模块: from lxml.etree import XML [as 别名]
# 或者: from lxml.etree.XML import iter [as 别名]
def validateTextBlockFacts(modelXbrl):
    #handler = TextBlockHandler(modelXbrl)
    loadDTD(modelXbrl)
    checkedGraphicsFiles = set() #  only check any graphics file reference once per fact
    allowedExternalHrefPattern = modelXbrl.modelManager.disclosureSystem.allowedExternalHrefPattern
    
    if isInlineDTD:
        htmlBodyTemplate = "<body><div>\n{0}\n</div></body>\n"
    else:
        htmlBodyTemplate = "<body>\n{0}\n</body>\n"
    _xhtmlNs = "{{{}}}".format(xhtml)
    _xhtmlNsLen = len(_xhtmlNs)
    
    for f1 in modelXbrl.facts:
        # build keys table for 6.5.14
        concept = f1.concept
        if f1.xsiNil != "true" and \
           concept is not None and \
           concept.isTextBlock and \
           XMLpattern.match(f1.value):
            #handler.fact = f1
            # test encoded entity tags
            for match in namedEntityPattern.finditer(f1.value):
                entity = match.group()
                if not entity in xhtmlEntities:
                    modelXbrl.error(("EFM.6.05.16", "GFM.1.2.15"),
                        _("Fact %(fact)s contextID %(contextID)s has disallowed entity %(entity)s"),
                        modelObject=f1, fact=f1.qname, contextID=f1.contextID, entity=entity, error=entity)
            # test html
            for xmltext in [f1.value] + CDATApattern.findall(f1.value):
                '''
                try:
                    xml.sax.parseString(
                        "<?xml version='1.0' encoding='utf-8' ?>\n<body>\n{0}\n</body>\n".format(
                         removeEntities(xmltext)).encode('utf-8'),handler,handler)
                except (xml.sax.SAXParseException,
                        xml.sax.SAXException,
                        UnicodeDecodeError) as err:
                    # ignore errors which are not errors (e.g., entity codes checked previously
                    if not err.endswith("undefined entity"):
                        handler.modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"),
                            _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
                            modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err)
                '''
                xmlBodyWithoutEntities = htmlBodyTemplate.format(removeEntities(xmltext))
                try:
                    textblockXml = XML(xmlBodyWithoutEntities)
                    if not edbodyDTD.validate( textblockXml ):
                        errors = edbodyDTD.error_log.filter_from_errors()
                        htmlError = any(e.type_name in ("DTD_INVALID_CHILD", "DTD_UNKNOWN_ATTRIBUTE") 
                                        for e in errors)
                        modelXbrl.error("EFM.6.05.16" if htmlError else ("EFM.6.05.15.dtdError", "GFM.1.02.14"),
                            _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"),
                            modelObject=f1, fact=f1.qname, contextID=f1.contextID, 
                            error=', '.join(e.message for e in errors),
                            messageCodes=("EFM.6.05.16", "EFM.6.05.15.dtdError", "GFM.1.02.14"))
                    for elt in textblockXml.iter():
                        eltTag = elt.tag
                        if isinstance(elt, ModelObject) and elt.namespaceURI == xhtml:
                            eltTag = elt.localName
                        elif isinstance(elt, (_ElementTree, _Comment, _ProcessingInstruction)):
                            continue # comment or other non-parsed element
                        else:
                            eltTag = elt.tag
                            if eltTag.startswith(_xhtmlNs):
                                eltTag = eltTag[_xhtmlNsLen:]
                        if isInlineDTD and eltTag in efmBlockedInlineHtmlElements:
                            modelXbrl.error("EFM.5.02.05.disallowedElement",
                                _("%(validatedObjectLabel)s has disallowed element <%(element)s>"),
                                modelObject=elt, validatedObjectLabel=f1.qname,
                                element=eltTag)
                        for attrTag, attrValue in elt.items():
                            if isInlineDTD:
                                if attrTag in efmBlockedInlineHtmlElementAttributes.get(eltTag,()):
                                    modelXbrl.error("EFM.5.02.05.disallowedAttribute",
                                        _("%(validatedObjectLabel)s has disallowed attribute on element <%(element)s>: %(attribute)s=\"%(value)s\""),
                                        modelObject=elt, validatedObjectLabel=validatedObjectLabel,
                                        element=eltTag, attribute=attrTag, value=attrValue)
                            if ((attrTag == "href" and eltTag == "a") or 
                                (attrTag == "src" and eltTag == "img")):
                                if "javascript:" in attrValue:
                                    modelXbrl.error("EFM.6.05.16.activeContent",
                                        _("Fact %(fact)s of context %(contextID)s has javascript in '%(attribute)s' for <%(element)s>"),
                                        modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                        attribute=attrTag, element=eltTag)
                                elif eltTag == "a" and (not allowedExternalHrefPattern or allowedExternalHrefPattern.match(attrValue)):
                                    pass
                                elif scheme(attrValue) in ("http", "https", "ftp"):
                                    modelXbrl.error("EFM.6.05.16.externalReference",
                                        _("Fact %(fact)s of context %(contextID)s has an invalid external reference in '%(attribute)s' for <%(element)s>"),
                                        modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                        attribute=attrTag, element=eltTag)
                                if attrTag == "src" and attrValue not in checkedGraphicsFiles:
                                    if scheme(attrValue)  == "data":
                                        modelXbrl.error("EFM.6.05.16.graphicDataUrl",
                                            _("Fact %(fact)s of context %(contextID)s references a graphics data URL which isn't accepted '%(attribute)s' for <%(element)s>"),
                                            modelObject=f1, fact=f1.qname, contextID=f1.contextID,
                                            attribute=attrValue[:32], element=eltTag)
                                    elif attrValue.lower()[-4:] not in ('.jpg', '.gif'):
                                        modelXbrl.error("EFM.6.05.16.graphicFileType",
#.........这里部分代码省略.........
开发者ID:Arelle,项目名称:Arelle,代码行数:103,代码来源:ValidateFilingText.py


注:本文中的lxml.etree.XML.iter方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。