本文整理汇总了Python中xml.etree.cElementTree.iterparse方法的典型用法代码示例。如果您正苦于以下问题:Python cElementTree.iterparse方法的具体用法?Python cElementTree.iterparse怎么用?Python cElementTree.iterparse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类xml.etree.cElementTree
的用法示例。
在下文中一共展示了cElementTree.iterparse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _find_elements
# 需要导入模块: from xml.etree import cElementTree [as 别名]
# 或者: from xml.etree.cElementTree import iterparse [as 别名]
def _find_elements(self, result, elements):
"""Find interesting elements from XML.
This function tries to only look for specified elements
without parsing the entire XML. The specified elements is better
located near the beginning.
Args:
result: response XML.
elements: a set of interesting element tags.
Returns:
A dict from element tag to element value.
"""
element_mapping = {}
result = StringIO.StringIO(result)
for _, e in ET.iterparse(result, events=('end',)):
if not elements:
break
if e.tag in elements:
element_mapping[e.tag] = e.text
elements.remove(e.tag)
return element_mapping
示例2: main
# 需要导入模块: from xml.etree import cElementTree [as 别名]
# 或者: from xml.etree.cElementTree import iterparse [as 别名]
def main(argv):
file_obj = open(argv[1])
print "Reading XML file ",
sys.stdout.flush()
level = 0
sim_list = []
for event, elem in ElementTree.iterparse(file_obj, events=("start", "end")):
if event == "start":
level += 1
if event == "end":
level -= 1
if level == 0 and elem.tag == 'FlowMonitor':
sim = Simulation(elem)
sim_list.append(sim)
elem.clear() # won't need this any more
sys.stdout.write(".")
sys.stdout.flush()
print " done."
for sim in sim_list:
for flow in sim.flows:
print "FlowID: %i" % flow.flowId,
print flow.paths
示例3: xml_namespace
# 需要导入模块: from xml.etree import cElementTree [as 别名]
# 或者: from xml.etree.cElementTree import iterparse [as 别名]
def xml_namespace(file):
"""
Takes an xml file and returns the root namespace as a dict
:param str file: xml file path
:return dict: dictionary of root namespace
"""
events = "start", "start-ns", "end-ns"
ns_map = []
for event, elem in ET.iterparse(file, events):
if event == "start-ns":
elem = ('default', elem[1]) if elem[0] == '' else elem
ns_map.append(elem)
# elif event == "end-ns":
# ns_map.pop()
# return dict(ns_map)
# elif event == "start":
# return dict(ns_map)
return dict(ns_map)
示例4: iterparse
# 需要导入模块: from xml.etree import cElementTree [as 别名]
# 或者: from xml.etree.cElementTree import iterparse [as 别名]
def iterparse(source, tag):
context = ElementTree.iterparse(
source,
events=('start', 'end',),
)
context = iter(context)
_, root = next(context)
depth = 0
for event, element in context:
if element.tag == tag:
if event == 'start':
depth += 1
else:
depth -= 1
if depth == 0:
yield element
root.clear()
示例5: _count_nodes
# 需要导入模块: from xml.etree import cElementTree [as 别名]
# 或者: from xml.etree.cElementTree import iterparse [as 别名]
def _count_nodes(file_path):
"""
Count the total number of nodes (both split and leaf nodes)
in the model identified by file_path.
Parameters
----------
file_path : str
The path to the filename where the model has been saved
Returns
-------
tuple(n_trees, n_nodes) : tuple(int, int)
The total number of trees and nodes (both split and leaf nodes)
in the model identified by file_path.
"""
# get an iterable
# NOTE: it seems like there is a bug inside lxmx since selecting only
# terminal tags with events=("end",) some tags are skipped...
context = etree.iterparse(file_path, events=("start", "end"))
# get the root element
_, root = next(context)
n_nodes = 0
n_trees = 0
for event, elem in context:
if event != "end":
continue
if elem.tag == 'Tree':
n_trees += 1
elif elem.tag == 'SplitFeatures' or elem.tag == 'LeafOutputs':
n_nodes += len(elem.text.split(" "))
elem.clear() # discard the element
root.clear() # remove root reference to the child
return n_trees, n_nodes
示例6: _count_nodes
# 需要导入模块: from xml.etree import cElementTree [as 别名]
# 或者: from xml.etree.cElementTree import iterparse [as 别名]
def _count_nodes(file_path):
"""
Count the total number of nodes (both split and leaf nodes)
in the model identified by file_path.
Parameters
----------
file_path : str
The path to the filename where the model has been saved
Returns
-------
tuple(n_trees, n_nodes) : tuple(int, int)
The total number of trees and nodes (both split and leaf nodes)
in the model identified by file_path.
"""
# get an iterable
context = etree.iterparse(file_path, events=("end",))
# get the root element
_, root = next(context)
n_nodes = 0
n_trees = 0
for _, elem in context:
if elem.tag == 'tree':
n_trees += 1
elif elem.tag == 'feature' or elem.tag == 'output':
n_nodes += 1
elem.clear() # discard the element
root.clear() # remove root reference to the child
return n_trees, n_nodes
示例7: read_mir
# 需要导入模块: from xml.etree import cElementTree [as 别名]
# 或者: from xml.etree.cElementTree import iterparse [as 别名]
def read_mir(xml_file, quiet=False):
out_list = []
tmp_list = []
error = ""
# Open the MIR output file.
try:
for (_, reg_item) in et.iterparse(xml_file, events=('end',)):
if reg_item.tag != 'RegistryItem':
continue
path_name = reg_item.find("Path").text
if not path_name:
print "[-] Error XML missing Path"
print et.tostring(reg_item)
reg_item.clear()
continue
path_name = path_name.lower()
# Check to see that we have the right registry value.
if 'control\\session manager\\appcompatcache\\appcompatcache' in path_name \
or 'control\\session manager\\appcompatibility\\appcompatcache' in path_name:
# return the base64 decoded value data.
bin_data = binascii.a2b_base64(reg_item.find('Value').text)
tmp_list = read_cache(bin_data, quiet)
if tmp_list:
for row in tmp_list:
if g_verbose:
row.append(path_name)
if row not in out_list:
out_list.append(row)
reg_item.clear()
except (AttributeError, TypeError, IOError), err:
error = "[-] Error reading MIR XML: %s" % str(err)
print error
return (error, None)
示例8: extract_hostnames
# 需要导入模块: from xml.etree import cElementTree [as 别名]
# 或者: from xml.etree.cElementTree import iterparse [as 别名]
def extract_hostnames(file):
host_names = []
hostname_pattern = re.compile("(^[^.]*)")
file_type = detect_type(file)
if file_type == "xml":
for event, elem in cElementTree.iterparse(file):
# Check if it is a record
if elem.tag == "record":
# Check that it is a RR Type that has an IP Address
if "address" in elem.attrib:
# Process A, AAAA and PTR Records
if re.search(r'PTR|^[A]$|AAAA', elem.attrib['type']):
host_names.append(re.search(hostname_pattern, elem.attrib['name']).group(1))
# Process NS Records
elif re.search(r'NS', elem.attrib['type']):
host_names.append(re.search(hostname_pattern, elem.attrib['target']).group(1))
# Process SOA Records
elif re.search(r'SOA', elem.attrib['type']):
host_names.append(re.search(hostname_pattern, elem.attrib['mname']).group(1))
# Process MX Records
elif re.search(r'MX', elem.attrib['type']):
host_names.append(re.search(hostname_pattern, elem.attrib['exchange']).group(1))
# Process SRV Records
elif re.search(r'SRV', elem.attrib['type']):
host_names.append(re.search(hostname_pattern, elem.attrib['target']).group(1))
elif file_type == "csv":
reader = csv.reader(open(file, 'r'), delimiter=',')
reader.next()
for row in reader:
host_names.append(re.search(hostname_pattern, row[1]).group(1))
host_names = list(set(host_names))
# Return list with no empty values
return filter(None, host_names)
示例9: main
# 需要导入模块: from xml.etree import cElementTree [as 别名]
# 或者: from xml.etree.cElementTree import iterparse [as 别名]
def main(argv):
file_obj = open(argv[1])
print "Reading XML file ",
sys.stdout.flush()
level = 0
sim_list = []
for event, elem in ElementTree.iterparse(file_obj, events=("start", "end")):
if event == "start":
level += 1
if event == "end":
level -= 1
if level == 0 and elem.tag == 'FlowMonitor':
sim = Simulation(elem)
sim_list.append(sim)
elem.clear() # won't need this any more
sys.stdout.write(".")
sys.stdout.flush()
print " done."
for sim in sim_list:
for flow in sim.flows:
t = flow.fiveTuple
proto = {6: 'TCP', 17: 'UDP'} [t.protocol]
print "FlowID: %i (%s %s/%s --> %s/%i)" % \
(flow.flowId, proto, t.sourceAddress, t.sourcePort, t.destinationAddress, t.destinationPort)
print "\tTX bitrate: %.2f kbit/s" % (flow.txBitrate*1e-3,)
print "\tRX bitrate: %.2f kbit/s" % (flow.rxBitrate*1e-3,)
print "\tMean Delay: %.2f ms" % (flow.delayMean*1e3,)
print "\tPacket Loss Ratio: %.2f %%" % (flow.packetLossRatio*100)
示例10: _poll
# 需要导入模块: from xml.etree import cElementTree [as 别名]
# 或者: from xml.etree.cElementTree import iterparse [as 别名]
def _poll(self, url):
request = urllib2.Request(url)
for key, value in self.http_headers:
request.add_header(key, value)
try:
self.log.info('Downloading feed from: "%s"', url)
_, fileobj = yield utils.fetch_url(request)
except utils.FetchUrlFailed as e:
self.log.error('Failed to download feed "%s": %r', url, e)
idiokit.stop(False)
self.log.info("Finished downloading the feed.")
byte = fileobj.read(1)
while byte and byte != "<":
byte = fileobj.read(1)
if byte == "<":
fileobj.seek(-1, 1)
try:
for _, elem in etree.iterparse(fileobj):
for event in self._parse(elem, url):
if event:
yield idiokit.send(event)
except ParseError as e:
self.log.error('Invalid format on feed: "%s", "%r"', url, e)
示例11: poll
# 需要导入模块: from xml.etree import cElementTree [as 别名]
# 或者: from xml.etree.cElementTree import iterparse [as 别名]
def poll(self):
url = self.feed_url % self.application_key
try:
self.log.info("Checking if {0!r} has new data".format(url))
info, _ = yield utils.fetch_url(HeadRequest(url))
etag = info.get("etag", None)
if etag is not None and self._etag == etag:
raise bot.PollSkipped("no new data detected (ETag stayed the same)")
self.log.info("Downloading data from {0!r}".format(url))
_, fileobj = yield utils.fetch_url(url)
except utils.FetchUrlFailed as error:
raise bot.PollSkipped("failed to download {0!r} ({1})".format(url, error))
self.log.info("Downloaded data from {0!r}".format(url))
reader = BZ2Reader(fileobj)
try:
depth = 0
sites = dict()
for event, element in etree.iterparse(reader, events=("start", "end")):
if event == "start" and element.tag == "entry":
depth += 1
if event == "end" and element.tag == "entry":
yield self._handle_entry(element, sites)
depth -= 1
if event == "end" and depth == 0:
element.clear()
except SyntaxError as error:
raise bot.PollSkipped("syntax error in report {0!r} ({1})".format(url, error))
else:
self._etag = etag
示例12: iter_queries
# 需要导入模块: from xml.etree import cElementTree [as 别名]
# 或者: from xml.etree.cElementTree import iterparse [as 别名]
def iter_queries(self):
"Yield (qid, docid, start, end, name) tuples"
for event, elem in iterparse(self.queries_file):
if elem.tag == QUERY_ELEM:
yield self._query(elem)
示例13: interproDB
# 需要导入模块: from xml.etree import cElementTree [as 别名]
# 或者: from xml.etree.cElementTree import iterparse [as 别名]
def interproDB(info, force=False, args={}):
iprXML = os.path.join(FUNDB, 'interpro.xml')
iprTSV = os.path.join(FUNDB, 'interpro.tsv')
if os.path.isfile(iprXML) and args.update and not force:
if check4newDB('interpro', info):
force = True
if not os.path.isfile(iprXML) or force:
lib.log.info('Downloading InterProScan Mapping file')
for x in [iprXML, iprTSV, iprXML+'.gz']:
if os.path.isfile(x):
os.remove(x)
if args.wget:
wget(DBURL.get('interpro'), iprXML+'.gz')
wget(DBURL.get('interpro-tsv'), iprTSV)
else:
download(DBURL.get('interpro'), iprXML+'.gz')
download(DBURL.get('interpro-tsv'), iprTSV)
md5 = calcmd5(iprXML+'.gz')
subprocess.call(['gunzip', '-f', 'interpro.xml.gz'],
cwd=os.path.join(FUNDB))
num_records = ''
version = ''
iprdate = ''
for event, elem in cElementTree.iterparse(iprXML):
if elem.tag == 'release':
for x in elem.getchildren():
if x.attrib['dbname'] == 'INTERPRO':
num_records = int(x.attrib['entry_count'])
version = x.attrib['version']
iprdate = x.attrib['file_date']
try:
iprdate = datetime.datetime.strptime(
iprdate, "%d-%b-%y").strftime("%Y-%m-%d")
except ValueError:
iprdate = datetime.datetime.strptime(
iprdate, "%d-%b-%Y").strftime("%Y-%m-%d")
info['interpro'] = ('xml', iprXML, version, iprdate, num_records, md5)
type, name, version, date, records, checksum = info.get('interpro')
lib.log.info('InterProScan XML: version={:} date={:} records={:,}'.format(
version, date, records))
示例14: books
# 需要导入模块: from xml.etree import cElementTree [as 别名]
# 或者: from xml.etree.cElementTree import iterparse [as 别名]
def books(file):
for event, elem in iterparse(file):
if event == 'start' and elem.tag == 'root':
books = elem
if event == 'end' and elem.tag == 'book':
print('{0}, {1}, {2}, {3}, {4}'.
format(elem.findtext('title'), elem.findtext('publisher'), elem.findtext('numberOfChapters'),
elem.findtext('pageCount'),elem.findtext('author')))
if event == 'end' and elem.tag == 'chapter':
print('{0}, {1}, {2}'.
format(elem.findtext('chapterNumber'), elem.findtext('chapterTitle'), elem.findtext('pageCount')))
开发者ID:PacktPublishing,项目名称:Learning-Python-Networking-Second-Edition,代码行数:13,代码来源:books_iterate_xml.py
示例15: _untar_and_parse_lhe_file
# 需要导入模块: from xml.etree import cElementTree [as 别名]
# 或者: from xml.etree.cElementTree import iterparse [as 别名]
def _untar_and_parse_lhe_file(filename, tags=None):
# Untar event file
new_filename, extension = os.path.splitext(filename)
if extension == ".gz":
if not os.path.exists(new_filename):
call_command("gunzip -c {} > {}".format(filename, new_filename))
filename = new_filename
for event, elem in ET.iterparse(filename):
if tags and elem.tag not in tags:
continue
else:
yield elem
elem.clear()