本文整理汇总了Python中nlmmanipulate.NlmManipulate.find_text方法的典型用法代码示例。如果您正苦于以下问题:Python NlmManipulate.find_text方法的具体用法?Python NlmManipulate.find_text怎么用?Python NlmManipulate.find_text使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nlmmanipulate.NlmManipulate
的用法示例。
在下文中一共展示了NlmManipulate.find_text方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run
# 需要导入模块: from nlmmanipulate import NlmManipulate [as 别名]
# 或者: from nlmmanipulate.NlmManipulate import find_text [as 别名]
def run(self, interactive):
if interactive:
self.run_prompt()
return
manipulate = NlmManipulate(self.gv)
tree = manipulate.load_dom_tree()
# pre-cleanup: remove all empty ext-links as these break the linker
items_to_clean = tree.xpath('//ext-link')
count = 0
for item in items_to_clean:
if '{http://www.w3.org/1999/xlink}href' in item.attrib and \
item.attrib['{http://www.w3.org/1999/xlink}href'] == '':
count += 1
item.tag = 'REMOVE'
etree.strip_tags(item.getparent(), 'REMOVE')
if count > 0:
manipulate.save_tree(tree)
self.debug.print_debug(self, u'Removed {0} blank ext-link tags'.format(count))
ref_items = tree.xpath('//back/ref-list/ref')
self.clean_ref_items(tree, ref_items, manipulate)
# handle numbered reference items
references_and_numbers = {}
for ref in ref_items:
text = manipulate.get_stripped_text(ref)
ref_match = re.compile('^(?P<number>\d+)\.*')
result = ref_match.match(text)
if result:
references_and_numbers[result.group('number')] = ref
parsed = self.process_ibid_authors(ref_items)
if parsed > 0:
manipulate.save_tree(tree)
self.debug.print_debug(self, u'Replace {0} instances of "---." at start of references'.format(parsed))
to_link = []
to_stub = []
square_bracket_count = {}
for p in tree.xpath('//sec//p[not(mml:math)] | //td',
namespaces={'mml': 'http://www.w3.org/1998/Math/MathML'}):
text = manipulate.get_stripped_text(p)
reference_test = re.compile('\((?P<text>[^%]+?)\)')
matches = reference_test.finditer(text)
# exclude any square brackets with numbers inside
sub_match = re.compile('\[(?P<square>\d*[,\-;\d\s]*)\]')
smatch = sub_match.search(text)
if smatch:
smatches = sub_match.finditer(text)
for smatch in smatches:
self.debug.print_debug(self, u'Handling references in square '
u'brackets: [{0}] '.format(smatch.group('square')))
for item in re.split(';|,', smatch.group('square')):
if '-' in item:
parent, tail = manipulate.find_text(p, item)
if parent is not None:
new_string = ''
try:
split_range = item.strip().split('-')
for no in range(int(split_range[0]), int(split_range[1]) + 1):
new_string += str(no) + ','
except:
self.debug.print_debug(self, u'Unable to parse reference '
u'number in range {0}'.format(item))
break
if new_string.endswith(',') and not item.endswith(','):
new_string = new_string[0:len(new_string) - 1]
if tail and new_string != '':
parent.tail = parent.tail.replace(item, new_string)
elif not tail and new_string != '':
parent.text = parent.text.replace(item, new_string)
try:
split_range = item.strip().split('-')
for no in range(int(split_range[0]), int(split_range[1]) + 1):
self.debug.print_debug(self, u'Parsing reference '
u'number in range {0}'.format(str(no)))
#.........这里部分代码省略.........