当前位置: 首页>>代码示例>>Python>>正文


Python NlmManipulate.append_safe方法代码示例

本文整理汇总了Python中nlmmanipulate.NlmManipulate.append_safe方法的典型用法代码示例。如果您正苦于以下问题:Python NlmManipulate.append_safe方法的具体用法?Python NlmManipulate.append_safe怎么用?Python NlmManipulate.append_safe使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nlmmanipulate.NlmManipulate的用法示例。


在下文中一共展示了NlmManipulate.append_safe方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: replace_in_text

# 需要导入模块: from nlmmanipulate import NlmManipulate [as 别名]
# 或者: from nlmmanipulate.NlmManipulate import append_safe [as 别名]
    def replace_in_text(self, id, element, replace_text, ref_type):
        before_after = element.text.split(replace_text, 1)
        element.text = before_after[0]

        new_element = etree.Element('xref')
        new_element.attrib['rid'] = unicode(id)
        new_element.attrib['ref-type'] = ref_type
        new_element.text = replace_text
        new_element.tail = ''.join(before_after[1:])

        NlmManipulate.append_safe(element, new_element, self)
开发者ID:sebastianmng,项目名称:meTypeset,代码行数:13,代码来源:captionclassifier.py

示例2: run_graphics_sibling

# 需要导入模块: from nlmmanipulate import NlmManipulate [as 别名]
# 或者: from nlmmanipulate.NlmManipulate import append_safe [as 别名]

#.........这里部分代码省略.........
                if graphic_regex_colon.match(text):
                    use_next = True
                    separator = ':'
                elif graphic_regex_dot.match(text):
                    use_next = True
                    separator = '.'

            if not use_next:
                if pprev is not None and pprev.tag == 'p':
                    text = manipulate.get_stripped_text(pprev)

                    if graphic_regex_colon.match(text):
                        use_previous = True
                        separator = ':'
                    elif graphic_regex_dot.match(text):
                        use_previous = True
                        separator = '.'

            if not use_next or use_previous:
                # see if the title in this section potentially contains text we can match
                parent = graphic.getparent()

                while parent is not None and not parent.tag.endswith('sec'):
                    parent = parent.getparent()
                    if parent is not None:
                      titles = parent.xpath('title')
                else:
                    titles = []

                if len(titles) > 0:
                    p = titles[0]

                    text = manipulate.get_stripped_text(p)

                    if graphic_regex_colon.match(text):
                        use_next = True
                        separator = ':'
                    elif graphic_regex_dot.match(text):
                        use_next = True
                        separator = '.'

            if use_next or use_previous:

                if use_next:
                    text = manipulate.get_stripped_text(p)
                else:
                    text = manipulate.get_stripped_text(pprev)
                    p = pprev

                # likely this is a table identifier
                split_title = text.split(separator)

                title = split_title[0].strip()
                caption = (''.join(split_title[1:])).strip()

                self.debug.print_debug(self, u'Handling title and caption for "{0}"'.format(title))

                title_element = None

                # use an existing title element if one exists
                try:
                    title_element = graphic.xpath('label')[0]
                except:
                    title_element = etree.Element('label')
                    graphic.insert(0, title_element)

                title_element.text = title

                caption_element = etree.Element('caption')
                new_p = etree.Element('p')
                new_p.text = caption

                NlmManipulate.append_safe(caption_element, new_p, self)
                NlmManipulate.append_safe(graphic, caption_element, self)

                if p.tag.endswith('title'):
                    new_title = etree.Element('title')
                    new_title.text = ''
                    p.addnext(new_title)
                    p.getparent().remove(p)
                else:
                    p.getparent().remove(p)

                if graphic.tail:
                    graphic.tail = graphic.tail.replace(title + separator, '')
                    graphic.tail = graphic.tail.replace(caption + separator, '')
                    graphic.tail = graphic.tail.replace(caption, '')

                if not 'id' in graphic.attrib:
                    graphic.attrib['id'] = u'ID{0}'.format(unicode(uuid.uuid4()))

                graphic_titles.append(title)
                graphic_ids.append(graphic.attrib['id'])

        paragraphs = tree.xpath('//p')

        self.link(graphic_ids, graphic_titles, paragraphs, 'fig')

        tree.write(self.gv.nlm_file_path)
        tree.write(self.gv.nlm_temp_file_path)
开发者ID:sebastianmng,项目名称:meTypeset,代码行数:104,代码来源:captionclassifier.py

示例3: run_tables

# 需要导入模块: from nlmmanipulate import NlmManipulate [as 别名]
# 或者: from nlmmanipulate.NlmManipulate import append_safe [as 别名]

#.........这里部分代码省略.........
                    if table_regex_colon.match(text):
                        use_next = True
                        separator = ':'
                        used_title = True
                    elif table_regex_dot.match(text):
                        use_next = True
                        separator = '.'
                        used_title = True

            if use_next or use_previous:

                if use_next:
                    text = manipulate.get_stripped_text(p)
                else:
                    text = manipulate.get_stripped_text(pprev)
                    p = pprev

                # likely this is a table identifier
                split_title = text.split(separator)

                title = split_title[0]
                caption = (''.join(split_title[1:])).strip()

                # strip all formatting from caption for ease of parsing
                # TODO: preserve formatting (far harder)
                new_p = etree.Element('p')
                new_p.text = caption

                if p.tag.endswith('title'):
                    new_title = etree.Element('title')
                    new_title.text = ''
                    old_title = new_title
                    p.addnext(new_title)
                    p.getparent().remove(p)
                else:
                    p.getparent().remove(p)

                p = new_p

                self.debug.print_debug(self, u'Handling title and caption for "{0}"'.format(title))

                title_element = None

                # use an existing title element if one exists
                try:
                    title_element = table.xpath('label')[0]
                except:
                    title_element = etree.Element('label')
                    table.insert(0, title_element)

                title_element.text = title

                caption_element = etree.Element('caption')
                NlmManipulate.append_safe(caption_element, p, self)
                table.insert(1, caption_element)

                if not 'id' in table.attrib:
                    table.attrib['id'] = u'ID{0}'.format(unicode(uuid.uuid4()))

                table_titles.append(title)
                table_ids.append(table.attrib['id'])

                if used_title:
                    # if we took the title out, then we should move the parent into its previous sibling and then
                    # strip tags
                    old_title.tag = 'REMOVE'

                    etree.strip_elements(tree, 'REMOVE')

                    section = table.getparent()

                    previous = section.getprevious()

                    while previous is not None and not previous.tag.endswith('sec'):
                        previous = previous.getprevious()

                    if previous is not None:
                        previous.append(section)
                        section.tag = 'REMOVE'

                        etree.strip_tags(tree, 'REMOVE')

                        self.debug.print_debug(self, u'Moved table and siblings to previous section')
                    else:
                        previous = section.getparent()

                        if previous is not None and previous.tag.endswith('sec'):
                            previous.append(section)
                            section.tag = 'REMOVE'

                            etree.strip_tags(tree, 'REMOVE')

                            self.debug.print_debug(self, u'Moved table and siblings to parent section')

        paragraphs = tree.xpath('//p')

        self.link(table_ids, table_titles, paragraphs, 'table')

        tree.write(self.gv.nlm_file_path)
        tree.write(self.gv.nlm_temp_file_path)
开发者ID:sebastianmng,项目名称:meTypeset,代码行数:104,代码来源:captionclassifier.py

示例4: run_tables

# 需要导入模块: from nlmmanipulate import NlmManipulate [as 别名]
# 或者: from nlmmanipulate.NlmManipulate import append_safe [as 别名]

#.........这里部分代码省略.........
                title = split_title[0].strip()
                caption = (''.join(split_title[1:])).strip()

                # strip all formatting from caption for ease of parsing
                # TODO: preserve formatting (far harder)
                new_p = etree.Element('p')
                new_p.text = caption

                if p.tag.endswith('title'):
                    new_title = etree.Element('title')
                    new_title.text = ''
                    old_title = new_title
                    p.addnext(new_title)
                    p.getparent().remove(p)
                else:
                    p.getparent().remove(p)

                p = new_p

                self.debug.print_debug(self, u'Handling title and caption for "{0}"'.format(title))

                title_element = None

                # use an existing title element if one exists
                try:
                    title_element = table.xpath('label')[0]
                except:
                    title_element = etree.Element('label')
                    table.insert(0, title_element)

                title_element.text = title

                caption_element = etree.Element('caption')
                NlmManipulate.append_safe(caption_element, p, self)
                table.insert(1, caption_element)

                if not 'id' in table.attrib:
                    table.attrib['id'] = u'ID{0}'.format(uuid.uuid4())

                table_titles.append(title)
                table_ids.append(table.attrib['id'])

                if used_title:
                    # if we took the title out, then we should move the parent into its previous sibling and then
                    # strip tags
                    old_title.tag = 'REMOVE'

                    etree.strip_elements(tree, 'REMOVE')

                    section = table.getparent()

                    previous = section.getprevious()

                    while previous is not None and not previous.tag.endswith('sec'):
                        previous = previous.getprevious()

                    if previous is not None:
                        previous.append(section)
                        section.tag = 'REMOVE'

                        etree.strip_tags(tree, 'REMOVE')

                        self.debug.print_debug(self, u'Moved table and siblings to previous section')
                    else:
                        previous = section.getparent()
开发者ID:MartinPaulEve,项目名称:meTypeset,代码行数:69,代码来源:captionclassifier.py

示例5: run_graphics

# 需要导入模块: from nlmmanipulate import NlmManipulate [as 别名]
# 或者: from nlmmanipulate.NlmManipulate import append_safe [as 别名]
    def run_graphics(self):
        # images are hard to handle because Word/OO puts them in different places
        # for instance, the caption can come before or after;
        # <p>Figure 1: Martin Eve at the pub<graphic xlink:href="media/image1.jpeg" position="float"
        # orientation="portrait" xlink:type="simple"/>

        self.debug.print_debug(self, u"Attempting to classify captions for graphics objects [plain]")

        manipulate = NlmManipulate(self.gv)

        tree = manipulate.load_dom_tree()

        graphics = tree.xpath("//graphic")

        graphic_titles = []
        graphic_ids = []
        graphic_regex_dot = re.compile("^.+?\s*\d+\..+")
        graphic_regex_colon = re.compile("^.+?\s*\d+\:.+")

        separator = ":"

        for graphic in graphics:
            use_next = False

            # get the next sibling
            p = graphic.getparent()

            if p is not None and p.tag == "p":
                text = manipulate.get_stripped_text(p)

                if graphic_regex_colon.match(text):
                    use_next = True
                    separator = ":"
                elif graphic_regex_dot.match(text):
                    use_next = True
                    separator = "."

            if use_next:
                text = manipulate.get_stripped_text(p)

                # likely this is a table identifier
                split_title = text.split(separator)

                title = split_title[0].strip()
                caption = ("".join(split_title[1:])).strip()

                self.debug.print_debug(self, u'Handling title and caption for "{0}"'.format(title))

                title_element = None

                # use an existing title element if one exists
                try:
                    title_element = graphic.xpath("label")[0]
                except:
                    title_element = etree.Element("label")
                    graphic.insert(0, title_element)

                title_element.text = title

                caption_element = etree.Element("caption")
                new_p = etree.Element("p")
                new_p.text = caption

                NlmManipulate.append_safe(caption_element, new_p, self)
                NlmManipulate.append_safe(graphic, caption_element, self)

                if graphic.tail:
                    graphic.tail = graphic.tail.replace(title + separator, "")
                    graphic.tail = graphic.tail.replace(caption + separator, "")
                    graphic.tail = graphic.tail.replace(caption, "")

                if not "id" in graphic.attrib:
                    graphic.attrib["id"] = u"ID{0}".format(unicode(uuid.uuid4()))

                graphic_titles.append(title)
                graphic_ids.append(graphic.attrib["id"])

        paragraphs = tree.xpath("//p")

        self.link(graphic_ids, graphic_titles, paragraphs, "fig")

        tree.write(self.gv.nlm_file_path)
        tree.write(self.gv.nlm_temp_file_path)

        self.run_graphics_sibling()
开发者ID:withanage,项目名称:meTypeset,代码行数:87,代码来源:captionclassifier.py


注:本文中的nlmmanipulate.NlmManipulate.append_safe方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。