当前位置: 首页>>代码示例>>Python>>正文


Python ElementTree.iterparse方法代码示例

本文整理汇总了Python中xml.etree.ElementTree.iterparse方法的典型用法代码示例。如果您正苦于以下问题:Python ElementTree.iterparse方法的具体用法?Python ElementTree.iterparse怎么用?Python ElementTree.iterparse使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在xml.etree.ElementTree的用法示例。


在下文中一共展示了ElementTree.iterparse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _parse_xml

# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def _parse_xml(data, strip_ns=False):
    if six.PY2 and isinstance(data, six.text_type):
        data = data.encode("utf8")
    elif six.PY3:
        data = bytearray(data, "utf8")
    try:
        it = ET.iterparse(BytesIO(data))
        for _, el in it:
            if '}' in el.tag and strip_ns:
                # strip all namespaces
                el.tag = el.tag.split('}', 1)[1]
        return it.root
    except Exception as err:
        snippet = repr(data)
        if len(snippet) > 35:
            snippet = snippet[:35] + " ..."

        raise ValueError("Unable to parse XML: {0} ({1})".format(err, snippet)) 
开发者ID:streamlink,项目名称:streamlink,代码行数:20,代码来源:__init__.py

示例2: process_species

# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def process_species(self, limit):
        """
        Loop through the xml file and process the species.
        We add elements to the graph, and store the
        id-to-label in the label_hash dict.
        :param limit:
        :return:
        """
        myfile = '/'.join((self.rawdir, self.files['data']['file']))
        with gzip.open(myfile, 'rb') as readbin:
            filereader = io.TextIOWrapper(readbin, newline="")
            filereader.readline()  # remove the xml declaration line
            for event, elem in ET.iterparse(filereader):
                # Species ids are == NCBITaxon ids
                self.process_xml_table(
                    elem, 'Species_gb', self._process_species_table_row, limit) 
开发者ID:monarch-initiative,项目名称:dipper,代码行数:18,代码来源:OMIA.py

示例3: main

# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def main(argv):
    file_obj = open(argv[1])
    print "Reading XML file ",

    sys.stdout.flush()
    level = 0
    sim_list = []
    for event, elem in ElementTree.iterparse(file_obj, events=("start", "end")):
        if event == "start":
            level += 1
        if event == "end":
            level -= 1
            if level == 0 and elem.tag == 'FlowMonitor':
                sim = Simulation(elem)
                sim_list.append(sim)
                elem.clear() # won't need this any more
                sys.stdout.write(".")
                sys.stdout.flush()
    print " done."

    for sim in sim_list:
        for flow in sim.flows:
            print "FlowID: %i" % flow.flowId,
	    print flow.paths 
开发者ID:snowzjx,项目名称:ns3-load-balance,代码行数:26,代码来源:path-flowmon-parse-result.py

示例4: handleMatch

# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def handleMatch(self, match: Match[str]) -> Element:
        rendered = render_tex(match.group('body'), is_inline=True)
        if rendered is not None:
            # We need to give Python-Markdown an ElementTree object, but if we
            # give it one with correctly stored XML namespaces, it will mangle
            # everything when serializing it.  So we play this stupid game to
            # store xmlns as a normal attribute.  :-[
            assert ' zulip-xmlns="' not in rendered
            rendered = rendered.replace(' xmlns="', ' zulip-xmlns="')
            parsed = etree.iterparse(StringIO(rendered))
            for event, elem in parsed:
                if 'zulip-xmlns' in elem.attrib:
                    elem.attrib['xmlns'] = elem.attrib.pop('zulip-xmlns')
                root = elem
            return root
        else:  # Something went wrong while rendering
            span = Element('span')
            span.set('class', 'tex-error')
            span.text = '$$' + match.group('body') + '$$'
            return span 
开发者ID:zulip,项目名称:zulip,代码行数:22,代码来源:__init__.py

示例5: process_metalink

# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def process_metalink(self, ml_file):
        print("Processing metalink file: {0}".format(ml_file))
        with open(ml_file, 'r') as ml:
            xml = ml.read()
        
        # Hack to remove annoying namespace
        it = ET.iterparse(StringIO(xml))
        for _, el in it:
            if '}' in el.tag:
                el.tag = el.tag.split('}', 1)[1]  # strip all namespaces
        root = it.root
        
        dl_urls = []
        ml_files = root.find('files')
        for dl in ml_files:
            dl_urls.append(dl.find('resources').find('url').text)
        
        if len(dl_urls) > 0:
            return dl_urls
        else:
            return None
    
    # Get download urls from a csv file 
开发者ID:jonas-eberle,项目名称:esa_sentinel,代码行数:25,代码来源:asf_template.py

示例6: parse

# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def parse(filename):
    """Returns a DFXMLObject populated from the contents of the (string) filename argument."""
    retval = None
    appender = None
    for (event, obj) in iterparse(filename):
        if event == "start":
            if isinstance(obj, DFXMLObject):
                retval = obj
                appender = obj
            elif isinstance(obj, VolumeObject):
                retval.append(obj)
                appender = obj
        elif event == "end":
            if isinstance(obj, DFXMLObject):
                if retval is None:
                    retval = obj
                appender = obj
            if isinstance(obj, VolumeObject):
                appender = retval
            elif isinstance(obj, FileObject):
                appender.append(obj)
    return retval 
开发者ID:kieranjol,项目名称:IFIscripts,代码行数:24,代码来源:Objects.py

示例7: process_stream_iterparse

# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def process_stream_iterparse(self, stream, heading=None):
        if self.verbosity >= 2 and heading is not None:
            fprintf(self.logfile, "\n=== %s ===\n", heading)
        si_tag = U_SSML12 + 'si'
        elemno = -1
        sst = self.bk._sharedstrings
        for event, elem in ET.iterparse(stream):
            if elem.tag != si_tag: continue
            elemno = elemno + 1
            if self.verbosity >= 3:
                fprintf(self.logfile, "element #%d\n", elemno)
                self.dump_elem(elem)
            result = get_text_from_si_or_is(self, elem)
            sst.append(result)                
            elem.clear() # destroy all child elements
        if self.verbosity >= 2:
            self.dumpout('Entries in SST: %d', len(sst))
        if self.verbosity >= 3:
            for x, s in enumerate(sst):
                fprintf(self.logfile, "SST x=%d s=%r\n", x, s) 
开发者ID:alexfeng,项目名称:InternationalizationScript-iOS,代码行数:22,代码来源:xlsx.py

示例8: __init__

# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def __init__(self, filename):
        self.package_count = 0
        self.packages = []
        root = None
        for event, elem in eT.iterparse(filename, events=("start", "end")):
            if elem.tag == "{%s}metadata" % NS["primary"] and event == "start":
                root = elem
                self.package_count = int(elem.get("packages"))
            elif elem.tag == "{%s}package" % NS["primary"] and event == "end":
                if elem.get("type") == "rpm":
                    package = {}
                    package["name"] = text_strip(elem.find("primary:name", NS))
                    evr = elem.find("primary:version", NS)
                    package["epoch"] = evr.get("epoch")
                    package["ver"] = evr.get("ver")
                    package["rel"] = evr.get("rel")
                    package["arch"] = text_strip(elem.find("primary:arch", NS))
                    package["summary"] = text_strip(elem.find("primary:summary", NS))
                    package["description"] = text_strip(elem.find("primary:description", NS))
                    package["srpm"] = elem.find("primary:format", NS).find("rpm:sourcerpm", NS).text
                    self.packages.append(package)
                    # Clear the XML tree continuously
                    root.clear() 
开发者ID:RedHatInsights,项目名称:vmaas,代码行数:25,代码来源:primary.py

示例9: parse_and_remove

# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def parse_and_remove(filename, path):
    path_parts = path.split('/')
    doc = iterparse(filename, ('start', 'end'))
    tag_stack = []
    elem_stack = []
    for event, elem in doc:
        if event == 'start' in elem.tag:
            tag_stack.append(elem.tag)
            elem_stack.append(elem)
        elif event == 'end':
            eletag = elem.tag
            elemtext = elem.text
            yield eletag, elemtext

            if tag_stack == path_parts:
                yield elem
                elem_stack[-2].remove(elem)
            try:
                tag_stack.pop()
                elem_stack.pop()
            except IndexError:
                pass 
开发者ID:estnltk,项目名称:estnltk,代码行数:24,代码来源:parser.py

示例10: process_stream_iterparse

# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def process_stream_iterparse(self, stream, heading=None):
        if self.verbosity >= 2 and heading is not None:
            fprintf(self.logfile, "\n=== %s ===\n", heading)
        si_tag = U_SSML12 + 'si'
        elemno = -1
        sst = self.bk._sharedstrings
        for event, elem in ET.iterparse(stream):
            if elem.tag != si_tag: continue
            elemno = elemno + 1
            if self.verbosity >= 3:
                fprintf(self.logfile, "element #%d\n", elemno)
                self.dump_elem(elem)
            result = get_text_from_si_or_is(self, elem)
            sst.append(result)
            elem.clear() # destroy all child elements
        if self.verbosity >= 2:
            self.dumpout('Entries in SST: %d', len(sst))
        if self.verbosity >= 3:
            for x, s in enumerate(sst):
                fprintf(self.logfile, "SST x=%d s=%r\n", x, s) 
开发者ID:eirannejad,项目名称:pyRevit,代码行数:22,代码来源:xlsx.py

示例11: articles

# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def articles():
    n = 0
    with bz2.BZ2File("articles.xml.bz2", 'r') as infile:
        for event, elem in iterparse(infile, events=("start", "end")):
            if event == 'start':
                if elem.tag == '{http://www.mediawiki.org/xml/export-0.10/}mediawiki':
                    root = elem
            elif event == 'end':
                if elem.tag == '{http://www.mediawiki.org/xml/export-0.10/}page':
                    title_elem = elem.find('{http://www.mediawiki.org/xml/export-0.10/}title')
                    if title_elem is None: continue
                    title = title_elem.text
                    if title is None or ':' in title: continue
                    revision = elem.find('{http://www.mediawiki.org/xml/export-0.10/}revision')
                    if revision is None: continue
                    text_elem = revision.find('{http://www.mediawiki.org/xml/export-0.10/}text')
                    if text_elem is None: continue
                    text = text_elem.text
                    if text is None: continue

                    yield Article(n, title, text)
                    n += 1
                    #if title == 'Zhang Heng':
                    #    break
                root.clear() 
开发者ID:jorendorff,项目名称:tinysearch,代码行数:27,代码来源:create-corpus.py

示例12: isa

# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def isa(filename):
    # Test to see if file is a manifest.safe file
    try:
        ns = dict([node for _, node in ET.iterparse(filename, events=['start-ns'])])
        # Parse everything else
        root_node = ET.parse(filename).getroot()
        if ((root_node.find('./metadataSection/metadataObject[@ID="platform"]/' +
                            'metadataWrap/xmlData/safe:platform/safe:familyName', ns).text ==
             'SENTINEL-1') and
            (root_node.find('./metadataSection/metadataObject[@ID="generalProductInformation"]/' +
                            'metadataWrap/xmlData/s1sarl1:standAloneProductInformation/' +
                            's1sarl1:productType', ns).text ==
             'SLC')):
            return Reader
    except Exception:
        pass 
开发者ID:ngageoint,项目名称:sarpy,代码行数:18,代码来源:sentinel.py

示例13: iterparse

# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def iterparse(source, tag):
        context = ElementTree.iterparse(
            source,
            events=('start', 'end',),
            )
        context = iter(context)
        _, root = next(context)
        depth = 0
        for event, element in context:
            if element.tag == tag:
                if event == 'start':
                    depth += 1
                else:
                    depth -= 1
                    if depth == 0:
                        yield element
                        root.clear() 
开发者ID:josiah-wolf-oberholtzer,项目名称:discograph,代码行数:19,代码来源:Bootstrapper.py

示例14: parse_impl

# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def parse_impl(self):
        """
        Parses the HTML content as a stream. This is far less memory
        intensive than loading the entire HTML file into memory, like
        BeautifulSoup does.
        """

        # Cast to str to ensure not unicode under Python 2, as the parser
        # doesn't like that.
        parser = XMLParser(encoding=str('UTF-8'))
        element_iter = ET.iterparse(self.handle, events=("start", "end"), parser=parser)
        for pos, element in element_iter:
            tag, class_attr = _tag_and_class_attr(element)
            if tag == "h1" and pos == "end":
                if not self.user:
                    self.user = element.text.strip()
            elif tag == "div" and "thread" in class_attr and pos == "start":
                participants = self.parse_participants(element)
                thread = self.parse_thread(participants, element_iter, True)
                self.save_thread(thread) 
开发者ID:ownaginatious,项目名称:fbchat-archive-parser,代码行数:22,代码来源:parser.py

示例15: create_xml_tree

# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def create_xml_tree(filepath):
    """
    Method to ignore the namespaces if ElementTree is used. 
    Necessary becauseElementTree, by default, extend
    Tag names by the name space, but the namespaces used in the
    LIDC-IDRI dataset are not consistent. 
    Solution based on https://stackoverflow.com/questions/13412496/python-elementtree-module-how-to-ignore-the-namespace-of-xml-files-to-locate-ma
    
    instead of ET.fromstring(xml)
    """
    it = ET.iterparse(filepath)
    for _, el in it:
        if '}' in el.tag:
            el.tag = el.tag.split('}', 1)[1]  # strip all namespaces
        for at in el.attrib.keys(): # strip namespaces of attributes too
            if '}' in at:
                newat = at.split('}', 1)[1]
                el.attrib[newat] = el.attrib[at]
                del el.attrib[at]
    return it.root 
开发者ID:MIC-DKFZ,项目名称:LIDC-IDRI-processing,代码行数:22,代码来源:lidcXmlHelper.py


注:本文中的xml.etree.ElementTree.iterparse方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。