本文整理汇总了Python中calibre.ebooks.BeautifulSoup.Tag.append方法的典型用法代码示例。如果您正苦于以下问题:Python Tag.append方法的具体用法?Python Tag.append怎么用?Python Tag.append使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类calibre.ebooks.BeautifulSoup.Tag
的用法示例。
在下文中一共展示了Tag.append方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: postprocess_html
# 需要导入模块: from calibre.ebooks.BeautifulSoup import Tag [as 别名]
# 或者: from calibre.ebooks.BeautifulSoup.Tag import append [as 别名]
def postprocess_html(self, soup, first_fetch):
author_general = soup.find('span', { 'class': 'author_general' })
author_general.em.extract()
# the complete content
full_div = None
transcript_div = soup.find('div', { 'id': 'transcript' })
if transcript_div: # that's an interview
# get all <div class="qa" />
qa_div_list = list(find_by_class(transcript_div, 'div', 'qa'))
for qa_div in qa_div_list:
qa_div.extract()
# replace all <a class="question_link">...</a> with <strong>...</strong>
question_link = qa_div.find('a', { 'class': 'question_link' })
question_strong = Tag(soup, 'strong')
question_strong.append(question_link.string)
question_link.replaceWith(question_strong)
full_div = find_by_class(soup.find('div', { 'id': 'content' }), 'div', 'presentation_full').next()
# clean the <h1 />
full_div.h1.span.extract()
title_div = full_div.h1.div
title_div.replaceWith(title_div.string)
# clear the presentation area
for div in full_div.findAll('div'):
div.extract()
# add qa list back to presentation area
for qa_div in qa_div_list:
full_div.append(qa_div)
else:
# text only without title
text_div = find_by_class(soup, 'div', 'text_info').next()
text_div.extract()
for other in text_div.findAll('div'):
other.extract()
# full_div contains title
full_div = soup.find('div', { 'id': 'content' })
for other in full_div.findAll('div'):
other.extract()
full_div.append(text_div)
# keep full_div in <body /> only
full_div.extract()
for other in soup.body:
other.extract()
soup.body.append(full_div)
return soup
示例2: construct
# 需要导入模块: from calibre.ebooks.BeautifulSoup import Tag [as 别名]
# 或者: from calibre.ebooks.BeautifulSoup.Tag import append [as 别名]
def construct(self, book_notes):
'''
Given a list of notes, render HTML
'''
soup = None
if book_notes:
soup = BeautifulSoup('''<div class="{0}"></div>'''.format('book_notes'))
for note in book_notes:
div_tag = Tag(soup, 'div', [('class', "book_note")])
p_tag = Tag(soup, 'p', [('class', "book_note"),
('style', "{0}".format(self._get_note_style()))])
p_tag.append(note)
div_tag.append(p_tag)
soup.div.append(div_tag)
return soup
示例3: merge_annotations
# 需要导入模块: from calibre.ebooks.BeautifulSoup import Tag [as 别名]
# 或者: from calibre.ebooks.BeautifulSoup.Tag import append [as 别名]
#.........这里部分代码省略.........
# Extract old user_annotations
ouas = old_soup.find('div', 'user_annotations')
if ouas:
ouas.extract()
# Capture existing annotations
parent.opts.db.capture_content(ouas, cid, TRANSIENT_DB)
# Regurgitate old_soup with current CSS
regurgitated_soup = BeautifulSoup(parent.opts.db.rerender_to_html(TRANSIENT_DB, cid))
# Find new annotations
uas = new_soup.findAll('div', 'annotation')
new_hashes = set([ua['hash'] for ua in uas])
updates = list(new_hashes.difference(old_hashes))
if len(updates) and ouas is not None:
# Append new to regurgitated
dtc = len(regurgitated_soup.div)
for new_annotation_id in updates:
new_annotation = new_soup.find('div', {'hash': new_annotation_id})
regurgitated_soup.div.insert(dtc, new_annotation)
dtc += 1
if old_soup:
merged_soup = unicode(old_soup) + unicode(sort_merged_annotations(regurgitated_soup))
else:
merged_soup = unicode(sort_merged_annotations(regurgitated_soup))
else:
if old_soup:
merged_soup = unicode(old_soup) + unicode(new_soup)
else:
merged_soup = unicode(new_soup)
return merged_soup
else:
'''
Newer technique: Use timestamps to merge annotations
'''
timestamps = {}
# Get the timestamps and hashes of the stored annotations
suas = old_soup.findAll('div', 'annotation')
for sua in suas:
#print("sua: %s" % sua.prettify())
timestamp = sua.find('td', 'timestamp')['uts']
timestamps[timestamp] = {'stored_hash': sua['hash']}
# Rerender stored annotations
ouas = old_soup.find('div', 'user_annotations')
if ouas:
ouas.extract()
# Capture existing annotations
parent.opts.db.capture_content(ouas, cid, TRANSIENT_DB)
# Regurgitate annotations with current CSS
rerendered_annotations = parent.opts.db.rerender_to_html(TRANSIENT_DB, cid)
regurgitated_soup = BeautifulSoup(rerendered_annotations)
# Add device annotation timestamps and hashes
duas = new_soup.findAll('div', 'annotation')
for dua in duas:
timestamp = dua.find('td', 'timestamp')['uts']
if timestamp in timestamps:
timestamps[timestamp]['device_hash'] = dua['hash']
else:
timestamps[timestamp] = {'device_hash': dua['hash']}
merged_annotations = Tag(BeautifulSoup(), 'div',
[('class', "user_annotations"), ('style','margin:0')])
for ts in sorted(timestamps):
if 'stored_hash' in timestamps[ts] and not 'device_hash' in timestamps[ts]:
# Stored only - add from regurgitated_soup
annotation = regurgitated_soup.find('div', {'hash': timestamps[ts]['stored_hash']})
elif not 'stored_hash' in timestamps[ts] and 'device_hash' in timestamps[ts]:
# Device only - add from new_soup
annotation = new_soup.find('div', {'hash': timestamps[ts]['device_hash']})
elif timestamps[ts]['stored_hash'] == timestamps[ts]['device_hash']:
# Stored matches device - add from regurgitated_soup, as user may have modified
annotation = regurgitated_soup.find('div', {'hash': timestamps[ts]['stored_hash']})
elif timestamps[ts]['stored_hash'] != timestamps[ts]['device_hash']:
# Device has been updated since initial capture - add from new_soup
annotation = new_soup.find('div', {'hash': timestamps[ts]['device_hash']})
else:
continue
merged_annotations.append(annotation)
merged_annotations = sort_merged_annotations(merged_annotations)
# Update new_soup with merged_annotations
new_soup_uas = new_soup.find('div', 'user_annotations')
new_soup_uas.replaceWith(merged_annotations)
return unicode(new_soup)