本文整理汇总了Python中xml.etree.ElementTree.iterparse方法的典型用法代码示例。如果您正苦于以下问题:Python ElementTree.iterparse方法的具体用法?Python ElementTree.iterparse怎么用?Python ElementTree.iterparse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类xml.etree.ElementTree
的用法示例。
在下文中一共展示了ElementTree.iterparse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ensure_elementtree_imported
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def ensure_elementtree_imported(verbosity, logfile):
global ET, ET_has_iterparse
if ET is not None:
return
if "IronPython" in sys.version:
import xml.etree.ElementTree as ET
#### 2.7.2.1: fails later with
#### NotImplementedError: iterparse is not supported on IronPython. (CP #31923)
else:
try: import xml.etree.cElementTree as ET
except ImportError:
try: import cElementTree as ET
except ImportError:
try: import lxml.etree as ET
except ImportError:
try: import xml.etree.ElementTree as ET
except ImportError:
try: import elementtree.ElementTree as ET
except ImportError:
raise Exception("Failed to import an ElementTree implementation")
if hasattr(ET, 'iterparse'):
_dummy_stream = BYTES_IO(b'')
try:
ET.iterparse(_dummy_stream)
ET_has_iterparse = True
except NotImplementedError:
pass
if verbosity:
etree_version = repr([
(item, getattr(ET, item))
for item in ET.__dict__.keys()
if item.lower().replace('_', '') == 'version'
])
print(ET.__file__, ET.__name__, etree_version, ET_has_iterparse, file=logfile)
示例2: process_map
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def process_map(filename):
st = set()
for _, element in ET.iterparse(filename):
if element.tag in ("tag"):
if element.attrib['k']=="cuisine":
if element.attrib['v'].lower() in ('coffe_shop','coffee_shop'):
element.attrib['v']='coffee'
if element.attrib['v'].lower() in ('steak_house','steaks'):
element.attrib['v']='steak'
if element.attrib['v'].lower() in ('mexican','mexcian_food'):
element.attrib['v']='mexican'
st.add(element.attrib['v'].lower())
dct={}
for item in st:
dct[item]=0
for _, element in ET.iterparse(filename):
if element.tag in ("tag"):
if element.attrib['k']=="cuisine":
if element.attrib['v'].lower() in ('coffe_shop','coffee_shop'):
element.attrib['v']='coffee'
if element.attrib['v'].lower() in ('steak_house','steaks'):
element.attrib['v']='steak'
if element.attrib['v'].lower() in ('mexican','mexcian_food'):
element.attrib['v']='mexican'
dct[element.attrib['v'].lower()]=dct[element.attrib['v'].lower()]+1
dctFinal={}
for key in dct:
if ";" not in key and "," not in key:
dctFinal[key]=dct[key]
return dctFinal
示例3: _read
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def _read (self):
try:
i = ET.iterparse(self.f, ('start', 'end'))
except FileNotFoundError:
self._create()
i = ET.iterparse(self.f, ('start', 'end'))
return i
示例4: process
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def process(fn, options):
if options.output_dir == '-':
outdir = None # use STDOUT
else:
outdir = make_output_directory(fn, options)
if not fn.endswith('.gz'):
process_stream(ET.iterparse(fn), fn, outdir, options)
else:
with gzip.GzipFile(fn) as stream:
process_stream(ET.iterparse(stream), fn, outdir, options)
示例5: _GetSheetRows
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def _GetSheetRows(self, filename):
"""Parses the contents of the first sheet of an XLSX document.
Args:
filename (str): The file path of the XLSX document to parse.
Returns:
list[list[str]]: A list of lists representing the rows of the first sheet.
Raises:
ValueError: if the sheet cannot be found, or a string cannot be read.
"""
zip_file = zipfile.ZipFile(filename)
# Fail if we can't find the expected first sheet.
if self._SHEET1 not in zip_file.namelist():
raise ValueError(
'Unable to locate expected sheet: {0:s}'.format(self._SHEET1))
# Generate a reference table of shared strings if available.
strings = []
if self._SHARED_STRINGS in zip_file.namelist():
zip_file_object = zip_file.open(self._SHARED_STRINGS)
for _, element in ElementTree.iterparse(zip_file_object):
if element.tag.endswith(self._SHARED_STRING_TAG):
strings.append(element.text)
row = []
rows = []
value = ''
zip_file_object = zip_file.open(self._SHEET1)
for _, element in ElementTree.iterparse(zip_file_object):
if (element.tag.endswith(self._VALUE_STRING_TAG) or
element.tag.endswith(self._SHARED_STRING_TAG)):
value = element.text
if element.tag.endswith(self._COLUMN_TAG):
# Grab value from shared string reference table if type shared string.
if (strings and element.attrib.get(self._TYPE_ATTRIBUTE) ==
self._SHARED_STRING_TYPE):
try:
value = strings[int(value)]
except (IndexError, ValueError):
raise ValueError(
'Unable to successfully dereference shared string.')
row.append(value)
# If we see the end tag of the row, record row in rows and reset.
if element.tag.endswith(self._ROW_TAG):
rows.append(row)
row = []
return rows
示例6: getDataFromExternal
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def getDataFromExternal(self, date, progress_callback=None):
if self.xmltvType == XMLTVSource.TYPE_LOCAL_FILE:
f = FileWrapper(self.xmltvFile)
context = ElementTree.iterparse(f, events=("start", "end"))
size = f.size
else:
u = urllib2.urlopen(self.xmltvUrl, timeout=30)
xml = u.read()
u.close()
f = StringIO.StringIO(xml)
context = ElementTree.iterparse(f)
size = len(xml)
return self.parseXMLTV(context, f, size, self.logoFolder, progress_callback)
示例7: __iter__
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def __iter__(self):
if self.is_debug:
fname = self.protxml + '.dump'
logging.debug('Dumping protxml reads into ' + fname)
self.debug_file = open(fname, 'w')
self.debug_file.write('{\n')
for event, elem in etree.iterparse(self.protxml, events=('end', 'start-ns')):
if event == 'start-ns':
self.nsmap.update({elem})
if event == 'end':
if elem.tag == parse.fixtag('', 'protein_group', self.nsmap):
group = parse_protein_group(elem, self.nsmap)
yield group
if self.is_debug:
pprint(group, stream=self.debug_file)
self.debug_file.write(',\n')
elem.clear()
elif elem.tag == parse.fixtag('', 'proteinprophet_details', self.nsmap):
self.distribution = parse_protein_probabilities(elem, self.nsmap)
if self.is_debug:
fname = self.protxml + '.distribution.dump'
pprint(self.distribution, open(fname, 'w'))
elem.clear()
if self.is_debug:
self.debug_file.write('}\n')
self.debug_file.close()
示例8: count_tags
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def count_tags(filename):
tags={}
for evt,elem in ET.iterparse(filename):
if(tags.has_key(elem.tag)):
tags[elem.tag]=tags[elem.tag]+1
else:
tags[elem.tag]=0
示例9: __init__
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def __init__(self, file):
"""Initialize the class."""
# Get an iterable context for XML parsing events
context = iter(ElementTree.iterparse(file, events=('start', 'end')))
event, root = next(context)
self.root = root
self.context = context
示例10: getDataFromExternal
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def getDataFromExternal(self, date, progress_callback=None):
f = FileWrapper(self.xmltvFile)
context = ElementTree.iterparse(f, events=("start", "end"))
size = f.size
return self.parseXMLTV(context, f, size, self.logoFolder, progress_callback)
示例11: reports
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def reports(request):
for element in ET.iterparse(request):
# do stuff to parse this element
# save a models.Report
pass
# return list of file hashes we need uploaded
return HttpResponse()
示例12: jsonify
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def jsonify(file_in, pretty = False):
# processes file into JSON
file_out = "{0}.json".format(file_in)
data = []
with codecs.open(file_out, "w") as fo:
for event, element in ET.iterparse(file_in, events=("start",)):
if element.tag == "node" or element.tag == "way":
for tag in element.iter("tag"):
if is_street_name(tag):
m = street_type_re.search(tag.attrib['v'])
if m:
street_name = update_name(tag.attrib['v'], mapping)
m = directions_re.search(street_name)
if m:
street_name = update_direction(street_name, direction_mapping)
m = directions_re.search(street_name)
tag.set('v', street_name)
el = shape_element(element)
if el:
data.append(el)
if pretty:
fo.write(json.dumps(el, indent=2)+"\n")
else:
fo.write(json.dumps(el) + "\n")
element.clear()
#pprint.pprint(data)
return data
开发者ID:dent424,项目名称:data-wrangle-openstreetmaps-data,代码行数:29,代码来源:xml_analysis_and_json_conversion.py
示例13: UniprotIterator
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def UniprotIterator(handle, alphabet=Alphabet.ProteinAlphabet(), return_raw_comments=False):
"""Generator function to parse UniProt XML as SeqRecord objects.
parses an XML entry at a time from any UniProt XML file
returns a SeqRecord for each iteration
This generator can be used in Bio.SeqIO
return_raw_comments = True --> comment fields are returned as complete XML to allow further processing
skip_parsing_errors = True --> if parsing errors are found, skip to next entry
"""
if isinstance(alphabet, Alphabet.NucleotideAlphabet):
raise ValueError("Wrong alphabet %r" % alphabet)
if isinstance(alphabet, Alphabet.Gapped):
if isinstance(alphabet.alphabet, Alphabet.NucleotideAlphabet):
raise ValueError("Wrong alphabet %r" % alphabet)
if not hasattr(handle, "read"):
if isinstance(handle, str):
handle = StringIO(handle)
else:
raise Exception('An XML-containing handler or an XML string must be passed')
if ElementTree is None:
from Bio import MissingExternalDependencyError
raise MissingExternalDependencyError(
"No ElementTree module was found. "
"Use Python 2.5+, lxml or elementtree if you "
"want to use Bio.SeqIO.UniprotIO.")
for event, elem in ElementTree.iterparse(handle, events=("start", "end")):
if event == "end" and elem.tag == NS + "entry":
yield Parser(elem, alphabet=alphabet, return_raw_comments=return_raw_comments).parse()
elem.clear()
示例14: main
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def main():
parser = argparse.ArgumentParser(description='Get file paths for input/output')
parser.add_argument('--in', dest='input_file', required=True)
parser.add_argument('--out', dest='output_file')
args = parser.parse_args()
input_file = args.input_file
if not args.output_file:
output_file = input_file[:input_file.rfind('.')] + ".json"
# convert XML to json
with open(output_file, 'a') as f:
# parse input XML file
for event, elem in Et.iterparse(input_file):
if elem.tag == "row" and '_uuid' in elem.attrib:
this_dict = dict()
# Add _address
this_dict['source_uri'] = elem.attrib['_address']
for child in elem.findall('*'):
if child.tag == "location_1":
if 'latitude' in child.attrib:
this_dict['latitude'] = child.attrib['latitude']
if 'longitude' in child.attrib:
this_dict['longitude'] = child.attrib['longitude']
else:
this_dict[child.tag] = child.text
# Write record to file as JSON
json.dump(this_dict, f)
print('', file=f)
# this helps reduce mem usage but more can be done (see http://effbot.org/zone/element-iterparse.htm)
elem.clear()
示例15: parse
# 需要导入模块: from xml.etree import ElementTree [as 别名]
# 或者: from xml.etree.ElementTree import iterparse [as 别名]
def parse(self, xml_file, from_string=False):
"""Import .nessus file"""
# Parse XML file
if from_string:
xml_file = StringIO(xml_file)
# Iterate through each host scanned and create objects for each
for event, elem in ET.iterparse(xml_file):
# Grab the report name from the Report element
if event == "end" and elem.tag == "Report":
self.name = elem.attrib.get("name")
continue
# Only process ReportHost elements
elif event == "end" and elem.tag != "ReportHost":
continue
rh_obj = ReportHost(elem)
if rh_obj:
self.targets.append(rh_obj)
# Update Report dates
if not self.scan_start and rh_obj.get("host_start"):
self.scan_start = rh_obj.host_start
if not self.scan_end:
self.scan_end = rh_obj.host_end
if rh_obj.get("host_start"):
if rh_obj.host_start < self.scan_start:
self.scan_start = rh_obj.host_start
if rh_obj.host_end > self.scan_end:
self.scan_end = rh_obj.host_end