本文整理汇总了Python中stetl.util.Util.stripNamespaces方法的典型用法代码示例。如果您正苦于以下问题:Python Util.stripNamespaces方法的具体用法?Python Util.stripNamespaces怎么用?Python Util.stripNamespaces使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类stetl.util.Util
的用法示例。
在下文中一共展示了Util.stripNamespaces方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: process_xml
# 需要导入模块: from stetl.util import Util [as 别名]
# 或者: from stetl.util.Util import stripNamespaces [as 别名]
def process_xml(self, packet):
while self.context is not None:
# while not packet.is_end_of_doc():
try:
event, elem = next(self.context)
except (etree.XMLSyntaxError, StopIteration):
# workaround for etree.XMLSyntaxError https://bugs.launchpad.net/lxml/+bug/1185701
self.context = None
if self.context is None:
# Always end of doc
# TODO: is this still useful for a non-input component?
packet.set_end_of_doc()
log.info("End of doc: %s elem_count=%d" % (self.cur_file_path, self.elem_count))
return packet
# Filter out Namespace from the tag
# this is the easiest way to go for now
tag = elem.tag.split('}')
if len(tag) == 2:
# Namespaced tag: 2nd is tag
tag = tag[1]
else:
# Non-namespaced tag: first
tag = tag[0]
if tag in self.element_tags:
if event == "start":
pass
elif event == "end":
packet.data = deepcopy(elem)
self.elem_count += 1
if self.strip_namespaces:
packet.data = Util.stripNamespaces(elem).getroot()
# Clear the element which has been read. Don't clear the root document,
# since the last element hasn't been processed yet.
elem.clear()
# If there is a next component, let it process
if self.next:
# Hand-over data (line, doc whatever) to the next component
packet.format = self._output_format
packet = self.next.process(packet)
return packet
示例2: read
# 需要导入模块: from stetl.util import Util [as 别名]
# 或者: from stetl.util.Util import stripNamespaces [as 别名]
def read(self, packet):
event = None
packet.data = None
if self.context is None:
if not len(self.file_list):
# No more files left, all done
log.info("No more files left")
return packet
# Files available: pop next file
self.cur_file_path = self.file_list.pop(0)
fd = open(self.cur_file_path)
self.elem_count = 0
log.info("file opened : %s" % self.cur_file_path)
self.context = etree.iterparse(fd, events=("start", "end"))
self.context = iter(self.context)
event, self.root = self.context.next()
try:
event, elem = self.context.next()
except (etree.XMLSyntaxError, StopIteration):
# workaround for etree.XMLSyntaxError https://bugs.launchpad.net/lxml/+bug/1185701
self.context = None
if self.context is None:
# Always end of doc
packet.set_end_of_doc()
log.info("End of doc: %s elem_count=%d" % (self.cur_file_path, self.elem_count))
# Maybe end of stream (all docs done)
if not len(self.file_list):
# No more files left: end of stream
packet.set_end_of_stream()
log.info("End of stream")
return packet
# Filter out Namespace from the tag
# this is the easiest way to go for now
tag = elem.tag.split('}')
if len(tag) == 2:
# Namespaced tag: 2nd is tag
tag = tag[1]
else:
# Non-namespaced tag: first
tag = tag[0]
if tag in self.element_tags:
if event == "start":
# TODO check if deepcopy is the right thing to do here.
# packet.data = elem
pass
# self.root.remove(elem)
elif event == "end":
# Delete the element from the tree
# self.root.clear()
packet.data = elem
self.elem_count += 1
self.root.remove(elem)
if self.strip_namespaces:
packet.data = Util.stripNamespaces(elem).getroot()
return packet