本文整理汇总了Python中nlmmanipulate.NlmManipulate.append_safe方法的典型用法代码示例。如果您正苦于以下问题:Python NlmManipulate.append_safe方法的具体用法?Python NlmManipulate.append_safe怎么用?Python NlmManipulate.append_safe使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nlmmanipulate.NlmManipulate
的用法示例。
在下文中一共展示了NlmManipulate.append_safe方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: replace_in_text
# 需要导入模块: from nlmmanipulate import NlmManipulate [as 别名]
# 或者: from nlmmanipulate.NlmManipulate import append_safe [as 别名]
def replace_in_text(self, id, element, replace_text, ref_type):
before_after = element.text.split(replace_text, 1)
element.text = before_after[0]
new_element = etree.Element('xref')
new_element.attrib['rid'] = unicode(id)
new_element.attrib['ref-type'] = ref_type
new_element.text = replace_text
new_element.tail = ''.join(before_after[1:])
NlmManipulate.append_safe(element, new_element, self)
示例2: run_graphics_sibling
# 需要导入模块: from nlmmanipulate import NlmManipulate [as 别名]
# 或者: from nlmmanipulate.NlmManipulate import append_safe [as 别名]
#.........这里部分代码省略.........
if graphic_regex_colon.match(text):
use_next = True
separator = ':'
elif graphic_regex_dot.match(text):
use_next = True
separator = '.'
if not use_next:
if pprev is not None and pprev.tag == 'p':
text = manipulate.get_stripped_text(pprev)
if graphic_regex_colon.match(text):
use_previous = True
separator = ':'
elif graphic_regex_dot.match(text):
use_previous = True
separator = '.'
if not use_next or use_previous:
# see if the title in this section potentially contains text we can match
parent = graphic.getparent()
while parent is not None and not parent.tag.endswith('sec'):
parent = parent.getparent()
if parent is not None:
titles = parent.xpath('title')
else:
titles = []
if len(titles) > 0:
p = titles[0]
text = manipulate.get_stripped_text(p)
if graphic_regex_colon.match(text):
use_next = True
separator = ':'
elif graphic_regex_dot.match(text):
use_next = True
separator = '.'
if use_next or use_previous:
if use_next:
text = manipulate.get_stripped_text(p)
else:
text = manipulate.get_stripped_text(pprev)
p = pprev
# likely this is a table identifier
split_title = text.split(separator)
title = split_title[0].strip()
caption = (''.join(split_title[1:])).strip()
self.debug.print_debug(self, u'Handling title and caption for "{0}"'.format(title))
title_element = None
# use an existing title element if one exists
try:
title_element = graphic.xpath('label')[0]
except:
title_element = etree.Element('label')
graphic.insert(0, title_element)
title_element.text = title
caption_element = etree.Element('caption')
new_p = etree.Element('p')
new_p.text = caption
NlmManipulate.append_safe(caption_element, new_p, self)
NlmManipulate.append_safe(graphic, caption_element, self)
if p.tag.endswith('title'):
new_title = etree.Element('title')
new_title.text = ''
p.addnext(new_title)
p.getparent().remove(p)
else:
p.getparent().remove(p)
if graphic.tail:
graphic.tail = graphic.tail.replace(title + separator, '')
graphic.tail = graphic.tail.replace(caption + separator, '')
graphic.tail = graphic.tail.replace(caption, '')
if not 'id' in graphic.attrib:
graphic.attrib['id'] = u'ID{0}'.format(unicode(uuid.uuid4()))
graphic_titles.append(title)
graphic_ids.append(graphic.attrib['id'])
paragraphs = tree.xpath('//p')
self.link(graphic_ids, graphic_titles, paragraphs, 'fig')
tree.write(self.gv.nlm_file_path)
tree.write(self.gv.nlm_temp_file_path)
示例3: run_tables
# 需要导入模块: from nlmmanipulate import NlmManipulate [as 别名]
# 或者: from nlmmanipulate.NlmManipulate import append_safe [as 别名]
#.........这里部分代码省略.........
if table_regex_colon.match(text):
use_next = True
separator = ':'
used_title = True
elif table_regex_dot.match(text):
use_next = True
separator = '.'
used_title = True
if use_next or use_previous:
if use_next:
text = manipulate.get_stripped_text(p)
else:
text = manipulate.get_stripped_text(pprev)
p = pprev
# likely this is a table identifier
split_title = text.split(separator)
title = split_title[0]
caption = (''.join(split_title[1:])).strip()
# strip all formatting from caption for ease of parsing
# TODO: preserve formatting (far harder)
new_p = etree.Element('p')
new_p.text = caption
if p.tag.endswith('title'):
new_title = etree.Element('title')
new_title.text = ''
old_title = new_title
p.addnext(new_title)
p.getparent().remove(p)
else:
p.getparent().remove(p)
p = new_p
self.debug.print_debug(self, u'Handling title and caption for "{0}"'.format(title))
title_element = None
# use an existing title element if one exists
try:
title_element = table.xpath('label')[0]
except:
title_element = etree.Element('label')
table.insert(0, title_element)
title_element.text = title
caption_element = etree.Element('caption')
NlmManipulate.append_safe(caption_element, p, self)
table.insert(1, caption_element)
if not 'id' in table.attrib:
table.attrib['id'] = u'ID{0}'.format(unicode(uuid.uuid4()))
table_titles.append(title)
table_ids.append(table.attrib['id'])
if used_title:
# if we took the title out, then we should move the parent into its previous sibling and then
# strip tags
old_title.tag = 'REMOVE'
etree.strip_elements(tree, 'REMOVE')
section = table.getparent()
previous = section.getprevious()
while previous is not None and not previous.tag.endswith('sec'):
previous = previous.getprevious()
if previous is not None:
previous.append(section)
section.tag = 'REMOVE'
etree.strip_tags(tree, 'REMOVE')
self.debug.print_debug(self, u'Moved table and siblings to previous section')
else:
previous = section.getparent()
if previous is not None and previous.tag.endswith('sec'):
previous.append(section)
section.tag = 'REMOVE'
etree.strip_tags(tree, 'REMOVE')
self.debug.print_debug(self, u'Moved table and siblings to parent section')
paragraphs = tree.xpath('//p')
self.link(table_ids, table_titles, paragraphs, 'table')
tree.write(self.gv.nlm_file_path)
tree.write(self.gv.nlm_temp_file_path)
示例4: run_tables
# 需要导入模块: from nlmmanipulate import NlmManipulate [as 别名]
# 或者: from nlmmanipulate.NlmManipulate import append_safe [as 别名]
#.........这里部分代码省略.........
title = split_title[0].strip()
caption = (''.join(split_title[1:])).strip()
# strip all formatting from caption for ease of parsing
# TODO: preserve formatting (far harder)
new_p = etree.Element('p')
new_p.text = caption
if p.tag.endswith('title'):
new_title = etree.Element('title')
new_title.text = ''
old_title = new_title
p.addnext(new_title)
p.getparent().remove(p)
else:
p.getparent().remove(p)
p = new_p
self.debug.print_debug(self, u'Handling title and caption for "{0}"'.format(title))
title_element = None
# use an existing title element if one exists
try:
title_element = table.xpath('label')[0]
except:
title_element = etree.Element('label')
table.insert(0, title_element)
title_element.text = title
caption_element = etree.Element('caption')
NlmManipulate.append_safe(caption_element, p, self)
table.insert(1, caption_element)
if not 'id' in table.attrib:
table.attrib['id'] = u'ID{0}'.format(uuid.uuid4())
table_titles.append(title)
table_ids.append(table.attrib['id'])
if used_title:
# if we took the title out, then we should move the parent into its previous sibling and then
# strip tags
old_title.tag = 'REMOVE'
etree.strip_elements(tree, 'REMOVE')
section = table.getparent()
previous = section.getprevious()
while previous is not None and not previous.tag.endswith('sec'):
previous = previous.getprevious()
if previous is not None:
previous.append(section)
section.tag = 'REMOVE'
etree.strip_tags(tree, 'REMOVE')
self.debug.print_debug(self, u'Moved table and siblings to previous section')
else:
previous = section.getparent()
示例5: run_graphics
# 需要导入模块: from nlmmanipulate import NlmManipulate [as 别名]
# 或者: from nlmmanipulate.NlmManipulate import append_safe [as 别名]
def run_graphics(self):
# images are hard to handle because Word/OO puts them in different places
# for instance, the caption can come before or after;
# <p>Figure 1: Martin Eve at the pub<graphic xlink:href="media/image1.jpeg" position="float"
# orientation="portrait" xlink:type="simple"/>
self.debug.print_debug(self, u"Attempting to classify captions for graphics objects [plain]")
manipulate = NlmManipulate(self.gv)
tree = manipulate.load_dom_tree()
graphics = tree.xpath("//graphic")
graphic_titles = []
graphic_ids = []
graphic_regex_dot = re.compile("^.+?\s*\d+\..+")
graphic_regex_colon = re.compile("^.+?\s*\d+\:.+")
separator = ":"
for graphic in graphics:
use_next = False
# get the next sibling
p = graphic.getparent()
if p is not None and p.tag == "p":
text = manipulate.get_stripped_text(p)
if graphic_regex_colon.match(text):
use_next = True
separator = ":"
elif graphic_regex_dot.match(text):
use_next = True
separator = "."
if use_next:
text = manipulate.get_stripped_text(p)
# likely this is a table identifier
split_title = text.split(separator)
title = split_title[0].strip()
caption = ("".join(split_title[1:])).strip()
self.debug.print_debug(self, u'Handling title and caption for "{0}"'.format(title))
title_element = None
# use an existing title element if one exists
try:
title_element = graphic.xpath("label")[0]
except:
title_element = etree.Element("label")
graphic.insert(0, title_element)
title_element.text = title
caption_element = etree.Element("caption")
new_p = etree.Element("p")
new_p.text = caption
NlmManipulate.append_safe(caption_element, new_p, self)
NlmManipulate.append_safe(graphic, caption_element, self)
if graphic.tail:
graphic.tail = graphic.tail.replace(title + separator, "")
graphic.tail = graphic.tail.replace(caption + separator, "")
graphic.tail = graphic.tail.replace(caption, "")
if not "id" in graphic.attrib:
graphic.attrib["id"] = u"ID{0}".format(unicode(uuid.uuid4()))
graphic_titles.append(title)
graphic_ids.append(graphic.attrib["id"])
paragraphs = tree.xpath("//p")
self.link(graphic_ids, graphic_titles, paragraphs, "fig")
tree.write(self.gv.nlm_file_path)
tree.write(self.gv.nlm_temp_file_path)
self.run_graphics_sibling()