本文整理汇总了Python中xml.etree.ElementTree.iterparse方法的典型用法代码示例。如果您正苦于以下问题:Python ElementTree.iterparse方法的具体用法?Python ElementTree.iterparse怎么用?Python ElementTree.iterparse使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类xml.etree.ElementTree
的用法示例。
在下文中一共展示了ElementTree.iterparse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _parse_xml
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def _parse_xml(data, strip_ns=False):
if six.PY2 and isinstance(data, six.text_type):
data = data.encode("utf8")
elif six.PY3:
data = bytearray(data, "utf8")
try:
it = ET.iterparse(BytesIO(data))
for _, el in it:
if '}' in el.tag and strip_ns:
# strip all namespaces
el.tag = el.tag.split('}', 1)[1]
return it.root
except Exception as err:
snippet = repr(data)
if len(snippet) > 35:
snippet = snippet[:35] + " ..."
raise ValueError("Unable to parse XML: {0} ({1})".format(err, snippet))
示例2: process_species
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def process_species(self, limit):
"""
Loop through the xml file and process the species.
We add elements to the graph, and store the
id-to-label in the label_hash dict.
:param limit:
:return:
"""
myfile = '/'.join((self.rawdir, self.files['data']['file']))
with gzip.open(myfile, 'rb') as readbin:
filereader = io.TextIOWrapper(readbin, newline="")
filereader.readline() # remove the xml declaration line
for event, elem in ET.iterparse(filereader):
# Species ids are == NCBITaxon ids
self.process_xml_table(
elem, 'Species_gb', self._process_species_table_row, limit)
示例3: main
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def main(argv):
file_obj = open(argv[1])
print "Reading XML file ",
sys.stdout.flush()
level = 0
sim_list = []
for event, elem in ElementTree.iterparse(file_obj, events=("start", "end")):
if event == "start":
level += 1
if event == "end":
level -= 1
if level == 0 and elem.tag == 'FlowMonitor':
sim = Simulation(elem)
sim_list.append(sim)
elem.clear() # won't need this any more
sys.stdout.write(".")
sys.stdout.flush()
print " done."
for sim in sim_list:
for flow in sim.flows:
print "FlowID: %i" % flow.flowId,
print flow.paths
示例4: handleMatch
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def handleMatch(self, match: Match[str]) -> Element:
rendered = render_tex(match.group('body'), is_inline=True)
if rendered is not None:
# We need to give Python-Markdown an ElementTree object, but if we
# give it one with correctly stored XML namespaces, it will mangle
# everything when serializing it. So we play this stupid game to
# store xmlns as a normal attribute. :-[
assert ' zulip-xmlns="' not in rendered
rendered = rendered.replace(' xmlns="', ' zulip-xmlns="')
parsed = etree.iterparse(StringIO(rendered))
for event, elem in parsed:
if 'zulip-xmlns' in elem.attrib:
elem.attrib['xmlns'] = elem.attrib.pop('zulip-xmlns')
root = elem
return root
else: # Something went wrong while rendering
span = Element('span')
span.set('class', 'tex-error')
span.text = '$$' + match.group('body') + '$$'
return span
示例5: process_metalink
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def process_metalink(self, ml_file):
print("Processing metalink file: {0}".format(ml_file))
with open(ml_file, 'r') as ml:
xml = ml.read()
# Hack to remove annoying namespace
it = ET.iterparse(StringIO(xml))
for _, el in it:
if '}' in el.tag:
el.tag = el.tag.split('}', 1)[1] # strip all namespaces
root = it.root
dl_urls = []
ml_files = root.find('files')
for dl in ml_files:
dl_urls.append(dl.find('resources').find('url').text)
if len(dl_urls) > 0:
return dl_urls
else:
return None
# Get download urls from a csv file
示例6: parse
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def parse(filename):
"""Returns a DFXMLObject populated from the contents of the (string) filename argument."""
retval = None
appender = None
for (event, obj) in iterparse(filename):
if event == "start":
if isinstance(obj, DFXMLObject):
retval = obj
appender = obj
elif isinstance(obj, VolumeObject):
retval.append(obj)
appender = obj
elif event == "end":
if isinstance(obj, DFXMLObject):
if retval is None:
retval = obj
appender = obj
if isinstance(obj, VolumeObject):
appender = retval
elif isinstance(obj, FileObject):
appender.append(obj)
return retval
示例7: process_stream_iterparse
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def process_stream_iterparse(self, stream, heading=None):
if self.verbosity >= 2 and heading is not None:
fprintf(self.logfile, "\n=== %s ===\n", heading)
si_tag = U_SSML12 + 'si'
elemno = -1
sst = self.bk._sharedstrings
for event, elem in ET.iterparse(stream):
if elem.tag != si_tag: continue
elemno = elemno + 1
if self.verbosity >= 3:
fprintf(self.logfile, "element #%d\n", elemno)
self.dump_elem(elem)
result = get_text_from_si_or_is(self, elem)
sst.append(result)
elem.clear() # destroy all child elements
if self.verbosity >= 2:
self.dumpout('Entries in SST: %d', len(sst))
if self.verbosity >= 3:
for x, s in enumerate(sst):
fprintf(self.logfile, "SST x=%d s=%r\n", x, s)
示例8: __init__
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def __init__(self, filename):
self.package_count = 0
self.packages = []
root = None
for event, elem in eT.iterparse(filename, events=("start", "end")):
if elem.tag == "{%s}metadata" % NS["primary"] and event == "start":
root = elem
self.package_count = int(elem.get("packages"))
elif elem.tag == "{%s}package" % NS["primary"] and event == "end":
if elem.get("type") == "rpm":
package = {}
package["name"] = text_strip(elem.find("primary:name", NS))
evr = elem.find("primary:version", NS)
package["epoch"] = evr.get("epoch")
package["ver"] = evr.get("ver")
package["rel"] = evr.get("rel")
package["arch"] = text_strip(elem.find("primary:arch", NS))
package["summary"] = text_strip(elem.find("primary:summary", NS))
package["description"] = text_strip(elem.find("primary:description", NS))
package["srpm"] = elem.find("primary:format", NS).find("rpm:sourcerpm", NS).text
self.packages.append(package)
# Clear the XML tree continuously
root.clear()
示例9: parse_and_remove
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def parse_and_remove(filename, path):
path_parts = path.split('/')
doc = iterparse(filename, ('start', 'end'))
tag_stack = []
elem_stack = []
for event, elem in doc:
if event == 'start' in elem.tag:
tag_stack.append(elem.tag)
elem_stack.append(elem)
elif event == 'end':
eletag = elem.tag
elemtext = elem.text
yield eletag, elemtext
if tag_stack == path_parts:
yield elem
elem_stack[-2].remove(elem)
try:
tag_stack.pop()
elem_stack.pop()
except IndexError:
pass
示例10: process_stream_iterparse
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def process_stream_iterparse(self, stream, heading=None):
if self.verbosity >= 2 and heading is not None:
fprintf(self.logfile, "\n=== %s ===\n", heading)
si_tag = U_SSML12 + 'si'
elemno = -1
sst = self.bk._sharedstrings
for event, elem in ET.iterparse(stream):
if elem.tag != si_tag: continue
elemno = elemno + 1
if self.verbosity >= 3:
fprintf(self.logfile, "element #%d\n", elemno)
self.dump_elem(elem)
result = get_text_from_si_or_is(self, elem)
sst.append(result)
elem.clear() # destroy all child elements
if self.verbosity >= 2:
self.dumpout('Entries in SST: %d', len(sst))
if self.verbosity >= 3:
for x, s in enumerate(sst):
fprintf(self.logfile, "SST x=%d s=%r\n", x, s)
示例11: articles
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def articles():
n = 0
with bz2.BZ2File("articles.xml.bz2", 'r') as infile:
for event, elem in iterparse(infile, events=("start", "end")):
if event == 'start':
if elem.tag == '{http://www.mediawiki.org/xml/export-0.10/}mediawiki':
root = elem
elif event == 'end':
if elem.tag == '{http://www.mediawiki.org/xml/export-0.10/}page':
title_elem = elem.find('{http://www.mediawiki.org/xml/export-0.10/}title')
if title_elem is None: continue
title = title_elem.text
if title is None or ':' in title: continue
revision = elem.find('{http://www.mediawiki.org/xml/export-0.10/}revision')
if revision is None: continue
text_elem = revision.find('{http://www.mediawiki.org/xml/export-0.10/}text')
if text_elem is None: continue
text = text_elem.text
if text is None: continue
yield Article(n, title, text)
n += 1
#if title == 'Zhang Heng':
# break
root.clear()
示例12: isa
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def isa(filename):
# Test to see if file is a manifest.safe file
try:
ns = dict([node for _, node in ET.iterparse(filename, events=['start-ns'])])
# Parse everything else
root_node = ET.parse(filename).getroot()
if ((root_node.find('./metadataSection/metadataObject[@ID="platform"]/' +
'metadataWrap/xmlData/safe:platform/safe:familyName', ns).text ==
'SENTINEL-1') and
(root_node.find('./metadataSection/metadataObject[@ID="generalProductInformation"]/' +
'metadataWrap/xmlData/s1sarl1:standAloneProductInformation/' +
's1sarl1:productType', ns).text ==
'SLC')):
return Reader
except Exception:
pass
示例13: iterparse
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def iterparse(source, tag):
context = ElementTree.iterparse(
source,
events=('start', 'end',),
)
context = iter(context)
_, root = next(context)
depth = 0
for event, element in context:
if element.tag == tag:
if event == 'start':
depth += 1
else:
depth -= 1
if depth == 0:
yield element
root.clear()
示例14: parse_impl
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def parse_impl(self):
"""
Parses the HTML content as a stream. This is far less memory
intensive than loading the entire HTML file into memory, like
BeautifulSoup does.
"""
# Cast to str to ensure not unicode under Python 2, as the parser
# doesn't like that.
parser = XMLParser(encoding=str('UTF-8'))
element_iter = ET.iterparse(self.handle, events=("start", "end"), parser=parser)
for pos, element in element_iter:
tag, class_attr = _tag_and_class_attr(element)
if tag == "h1" and pos == "end":
if not self.user:
self.user = element.text.strip()
elif tag == "div" and "thread" in class_attr and pos == "start":
participants = self.parse_participants(element)
thread = self.parse_thread(participants, element_iter, True)
self.save_thread(thread)
示例15: create_xml_tree
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def create_xml_tree(filepath):
"""
Method to ignore the namespaces if ElementTree is used.
Necessary becauseElementTree, by default, extend
Tag names by the name space, but the namespaces used in the
LIDC-IDRI dataset are not consistent.
Solution based on https://stackoverflow.com/questions/13412496/python-elementtree-module-how-to-ignore-the-namespace-of-xml-files-to-locate-ma
instead of ET.fromstring(xml)
"""
it = ET.iterparse(filepath)
for _, el in it:
if '}' in el.tag:
el.tag = el.tag.split('}', 1)[1] # strip all namespaces
for at in el.attrib.keys(): # strip namespaces of attributes too
if '}' in at:
newat = at.split('}', 1)[1]
el.attrib[newat] = el.attrib[at]
del el.attrib[at]
return it.root