本文整理汇总了Python中calibre.ebooks.BeautifulSoup.BeautifulSoup.find方法的典型用法代码示例。如果您正苦于以下问题:Python BeautifulSoup.find方法的具体用法?Python BeautifulSoup.find怎么用?Python BeautifulSoup.find使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类calibre.ebooks.BeautifulSoup.BeautifulSoup
的用法示例。
在下文中一共展示了BeautifulSoup.find方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: extract_calibre_cover
# 需要导入模块: from calibre.ebooks.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from calibre.ebooks.BeautifulSoup.BeautifulSoup import find [as 别名]
def extract_calibre_cover(raw, base, log):
from calibre.ebooks.BeautifulSoup import BeautifulSoup
soup = BeautifulSoup(raw)
matches = soup.find(name=["h1", "h2", "h3", "h4", "h5", "h6", "p", "span", "font", "br"])
images = soup.findAll("img")
if matches is None and len(images) == 1 and images[0].get("alt", "") == "cover":
img = images[0]
img = os.path.join(base, *img["src"].split("/"))
if os.path.exists(img):
return open(img, "rb").read()
# Look for a simple cover, i.e. a body with no text and only one <img> tag
if matches is None:
body = soup.find("body")
if body is not None:
text = u"".join(map(unicode, body.findAll(text=True)))
if text.strip():
# Body has text, abort
return
images = body.findAll("img", src=True)
if 0 < len(images) < 2:
img = os.path.join(base, *images[0]["src"].split("/"))
if os.path.exists(img):
return open(img, "rb").read()
示例2: extract_calibre_cover
# 需要导入模块: from calibre.ebooks.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from calibre.ebooks.BeautifulSoup.BeautifulSoup import find [as 别名]
def extract_calibre_cover(raw, base, log):
from calibre.ebooks.BeautifulSoup import BeautifulSoup
soup = BeautifulSoup(raw)
matches = soup.find(name=['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'span',
'font', 'br'])
images = soup.findAll('img')
if matches is None and len(images) == 1 and \
images[0].get('alt', '')=='cover':
img = images[0]
img = os.path.join(base, *img['src'].split('/'))
if os.path.exists(img):
return open(img, 'rb').read()
# Look for a simple cover, i.e. a body with no text and only one <img> tag
if matches is None:
body = soup.find('body')
if body is not None:
text = u''.join(map(unicode, body.findAll(text=True)))
if text.strip():
# Body has text, abort
return
images = body.findAll('img', src=True)
if 0 < len(images) < 2:
img = os.path.join(base, *images[0]['src'].split('/'))
if os.path.exists(img):
return open(img, 'rb').read()
示例3: get_soup
# 需要导入模块: from calibre.ebooks.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from calibre.ebooks.BeautifulSoup.BeautifulSoup import find [as 别名]
def get_soup(self, src, url=None):
nmassage = []
nmassage.extend(self.preprocess_regexps)
# Remove comments as they can leave detritus when extracting tags leaves
# multiple nested comments
nmassage.append((re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''))
usrc = xml_to_unicode(src, self.verbose, strip_encoding_pats=True)[0]
usrc = self.preprocess_raw_html(usrc, url)
for pat, repl in nmassage:
usrc = pat.sub(repl, usrc)
soup = BeautifulSoup(usrc)
replace = self.prepreprocess_html_ext(soup)
if replace is not None:
replace = xml_to_unicode(replace, self.verbose, strip_encoding_pats=True)[0]
for pat, repl in nmassage:
replace = pat.sub(repl, replace)
soup = BeautifulSoup(replace)
if self.keep_only_tags:
body = soup.new_tag('body')
try:
if isinstance(self.keep_only_tags, dict):
self.keep_only_tags = [self.keep_only_tags]
for spec in self.keep_only_tags:
for tag in soup.find('body').findAll(**spec):
body.insert(len(body.contents), tag)
soup.find('body').replaceWith(body)
except AttributeError: # soup has no body element
pass
def remove_beyond(tag, next):
while tag is not None and getattr(tag, 'name', None) != 'body':
after = getattr(tag, next)
while after is not None:
ns = getattr(tag, next)
after.extract()
after = ns
tag = tag.parent
if self.remove_tags_after is not None:
rt = [self.remove_tags_after] if isinstance(self.remove_tags_after, dict) else self.remove_tags_after
for spec in rt:
tag = soup.find(**spec)
remove_beyond(tag, 'nextSibling')
if self.remove_tags_before is not None:
rt = [self.remove_tags_before] if isinstance(self.remove_tags_before, dict) else self.remove_tags_before
for spec in rt:
tag = soup.find(**spec)
remove_beyond(tag, 'previousSibling')
for kwds in self.remove_tags:
for tag in soup.findAll(**kwds):
tag.extract()
return self.preprocess_html_ext(soup)
示例4: generate_html
# 需要导入模块: from calibre.ebooks.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from calibre.ebooks.BeautifulSoup.BeautifulSoup import find [as 别名]
def generate_html(comments):
args = dict(xmlns=XHTML_NS,
title_str=title_str,
css=css,
title=title,
author=author,
publisher=publisher,
pubdate_label=_('Published'), pubdate=pubdate,
series_label=_('Series'), series=series,
rating_label=_('Rating'), rating=rating,
tags_label=_('Tags'), tags=tags,
comments=comments,
footer=''
)
for key in mi.custom_field_keys():
try:
display_name, val = mi.format_field_extended(key)[:2]
key = key.replace('#', '_')
args[key] = escape(val)
args[key+'_label'] = escape(display_name)
except:
pass
# Used in the comment describing use of custom columns in templates
args['_genre_label'] = args.get('_genre_label', '{_genre_label}')
args['_genre'] = args.get('_genre', '{_genre}')
generated_html = P('jacket/template.xhtml',
data=True).decode('utf-8').format(**args)
# Post-process the generated html to strip out empty header items
soup = BeautifulSoup(generated_html)
if not series:
series_tag = soup.find(attrs={'class':'cbj_series'})
if series_tag is not None:
series_tag.extract()
if not rating:
rating_tag = soup.find(attrs={'class':'cbj_rating'})
if rating_tag is not None:
rating_tag.extract()
if not tags:
tags_tag = soup.find(attrs={'class':'cbj_tags'})
if tags_tag is not None:
tags_tag.extract()
if not pubdate:
pubdate_tag = soup.find(attrs={'class':'cbj_pubdata'})
if pubdate_tag is not None:
pubdate_tag.extract()
if output_profile.short_name != 'kindle':
hr_tag = soup.find('hr', attrs={'class':'cbj_kindle_banner_hr'})
if hr_tag is not None:
hr_tag.extract()
return strip_encoding_declarations(
soup.renderContents('utf-8').decode('utf-8'))
示例5: get_soup
# 需要导入模块: from calibre.ebooks.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from calibre.ebooks.BeautifulSoup.BeautifulSoup import find [as 别名]
def get_soup(self, src, url=None):
nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
nmassage.extend(self.preprocess_regexps)
# Some websites have buggy doctype declarations that mess up beautifulsoup
nmassage += [(re.compile(r'<!DOCTYPE .+?>', re.DOTALL|re.IGNORECASE), lambda m: '')]
# Remove comments as they can leave detritus when extracting tags leaves
# multiple nested comments
nmassage.append((re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''))
usrc = xml_to_unicode(src, self.verbose, strip_encoding_pats=True)[0]
usrc = self.preprocess_raw_html(usrc, url)
soup = BeautifulSoup(usrc, markupMassage=nmassage)
replace = self.prepreprocess_html_ext(soup)
if replace is not None:
soup = BeautifulSoup(xml_to_unicode(replace, self.verbose, strip_encoding_pats=True)[0], markupMassage=nmassage)
if self.keep_only_tags:
body = Tag(soup, 'body')
try:
if isinstance(self.keep_only_tags, dict):
self.keep_only_tags = [self.keep_only_tags]
for spec in self.keep_only_tags:
for tag in soup.find('body').findAll(**spec):
body.insert(len(body.contents), tag)
soup.find('body').replaceWith(body)
except AttributeError: # soup has no body element
pass
def remove_beyond(tag, next):
while tag is not None and getattr(tag, 'name', None) != 'body':
after = getattr(tag, next)
while after is not None:
ns = getattr(tag, next)
after.extract()
after = ns
tag = tag.parent
if self.remove_tags_after is not None:
rt = [self.remove_tags_after] if isinstance(self.remove_tags_after, dict) else self.remove_tags_after
for spec in rt:
tag = soup.find(**spec)
remove_beyond(tag, 'nextSibling')
if self.remove_tags_before is not None:
rt = [self.remove_tags_before] if isinstance(self.remove_tags_before, dict) else self.remove_tags_before
for spec in rt:
tag = soup.find(**spec)
remove_beyond(tag, 'previousSibling')
for kwds in self.remove_tags:
for tag in soup.findAll(**kwds):
tag.extract()
return self.preprocess_html_ext(soup)
示例6: existing_annotations
# 需要导入模块: from calibre.ebooks.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from calibre.ebooks.BeautifulSoup.BeautifulSoup import find [as 别名]
def existing_annotations(parent, field, return_all=False):
"""
Return count of existing annotations, or existence of any
"""
# import calibre_plugins.marvin_manager.config as cfg
_log_location(field)
annotation_map = []
if field:
db = parent.opts.gui.current_db
id = db.FIELD_MAP["id"]
for i, record in enumerate(db.data.iterall()):
mi = db.get_metadata(record[id], index_is_id=True)
if field == "Comments":
if mi.comments:
soup = BeautifulSoup(mi.comments)
else:
continue
else:
soup = BeautifulSoup(mi.get_user_metadata(field, False)["#value#"])
if soup.find("div", "user_annotations") is not None:
annotation_map.append(mi.id)
if not return_all:
break
if return_all:
_log("Identified %d annotated books of %d total books" % (len(annotation_map), len(db.data)))
_log("annotation_map: %s" % repr(annotation_map))
else:
_log("no active field")
return annotation_map
示例7: save_soup
# 需要导入模块: from calibre.ebooks.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from calibre.ebooks.BeautifulSoup.BeautifulSoup import find [as 别名]
def save_soup(soup, target):
ns = BeautifulSoup('<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />')
nm = ns.find('meta')
metas = soup.findAll('meta', content=True)
added = False
for meta in metas:
if 'charset' in meta.get('content', '').lower():
meta.replaceWith(nm)
added = True
if not added:
head = soup.find('head')
if head is not None:
head.insert(0, nm)
selfdir = os.path.dirname(target)
for tag in soup.findAll(['img', 'link', 'a']):
for key in ('src', 'href'):
path = tag.get(key, None)
if path and os.path.isfile(path) and os.path.exists(path) and os.path.isabs(path):
tag[key] = unicode_path(relpath(path, selfdir).replace(os.sep, '/'))
html = unicode(soup)
with open(target, 'wb') as f:
f.write(html.encode('utf-8'))
示例8: existing_annotations
# 需要导入模块: from calibre.ebooks.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from calibre.ebooks.BeautifulSoup.BeautifulSoup import find [as 别名]
def existing_annotations(parent, field, return_all=False):
'''
Return count of existing annotations, or existence of any
'''
import calibre_plugins.annotations.config as cfg
annotation_map = []
if field:
db = parent.opts.gui.current_db
id = db.FIELD_MAP['id']
for i, record in enumerate(db.data.iterall()):
mi = db.get_metadata(record[id], index_is_id=True)
if field == 'Comments':
if mi.comments:
soup = BeautifulSoup(mi.comments)
else:
continue
else:
soup = BeautifulSoup(mi.get_user_metadata(field, False)['#value#'])
if soup.find('div', 'user_annotations') is not None:
annotation_map.append(mi.id)
if not return_all:
break
if return_all:
_log_location("Identified %d annotated books of %d total books" %
(len(annotation_map), len(db.data)))
return annotation_map
示例9: get_series
# 需要导入模块: from calibre.ebooks.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from calibre.ebooks.BeautifulSoup.BeautifulSoup import find [as 别名]
def get_series(title, authors, timeout=60):
mi = Metadata(title, authors)
if title and title[0] in _ignore_starts:
title = title[1:]
title = re.sub(r'^(A|The|An)\s+', '', title).strip()
if not title:
return mi
if isinstance(title, unicode):
title = title.encode('utf-8')
title = urllib.quote_plus(title)
author = authors[0].strip()
if not author:
return mi
if ',' in author:
author = author.split(',')[0]
else:
author = author.split()[-1]
url = URL.format(author, title)
br = browser()
try:
raw = br.open_novisit(url, timeout=timeout).read()
except URLError as e:
if isinstance(e.reason, socket.timeout):
raise Exception('KDL Server busy, try again later')
raise
if 'see the full results' not in raw:
return mi
raw = xml_to_unicode(raw)[0]
soup = BeautifulSoup(raw)
searcharea = soup.find('div', attrs={'class':'searcharea'})
if searcharea is None:
return mi
ss = searcharea.find('div', attrs={'class':'seriessearch'})
if ss is None:
return mi
a = ss.find('a', href=True)
if a is None:
return mi
href = a['href'].partition('?')[-1]
data = urlparse.parse_qs(href)
series = data.get('SeriesName', [])
if not series:
return mi
series = series[0]
series = re.sub(r' series$', '', series).strip()
if series:
mi.series = series
ns = ss.nextSibling
if ns.contents:
raw = unicode(ns.contents[0])
raw = raw.partition('.')[0].strip()
try:
mi.series_index = int(raw)
except:
pass
return mi
示例10: find_all_annotated_books
# 需要导入模块: from calibre.ebooks.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from calibre.ebooks.BeautifulSoup.BeautifulSoup import find [as 别名]
def find_all_annotated_books(self):
'''
Find all annotated books in library
'''
self._log_location("field: {0}".format(self.field))
cids = self.cdb.search_getting_ids('formats:EPUB', '')
for cid in cids:
mi = self.cdb.get_metadata(cid, index_is_id=True)
raw = mi.get_user_metadata(self.field, False)
if raw['#value#'] is not None:
soup = BeautifulSoup(raw['#value#'])
if soup.find('div', 'user_annotations') is not None:
self.annotation_map.append(mi.id)
示例11: _remove_old_style
# 需要导入模块: from calibre.ebooks.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from calibre.ebooks.BeautifulSoup.BeautifulSoup import find [as 别名]
def _remove_old_style(self, html):
'''
Remove the old style tag, finalize soup in preparation for styling
'''
unstyled_soup = BeautifulSoup(html)
head = unstyled_soup.find("head")
voc = unstyled_soup.body.find('div', {'class': 'vocabulary'})
tds = voc.findAll(lambda tag: tag.name == 'td' and tag.a)
dart = random.randrange(len(tds))
self.td = tds[dart]
self.oh = self.td.a['href']
self.td.a['href'] = self._finalize()
old_style = head.find('style')
if old_style:
old_style.extract()
return unstyled_soup
示例12: _inject_css
# 需要导入模块: from calibre.ebooks.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from calibre.ebooks.BeautifulSoup.BeautifulSoup import find [as 别名]
def _inject_css(self, html):
'''
stick a <style> element into html
'''
css = self.prefs.get('injected_css', None)
if css:
try:
styled_soup = BeautifulSoup(html)
head = styled_soup.find("head")
style_tag = Tag(styled_soup, 'style')
style_tag['type'] = "text/css"
style_tag.insert(0, css)
head.insert(0, style_tag)
html = styled_soup.renderContents()
except:
return html
return(html)
示例13: move_annotations
# 需要导入模块: from calibre.ebooks.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from calibre.ebooks.BeautifulSoup.BeautifulSoup import find [as 别名]
def move_annotations(
parent, annotation_map, old_destination_field, new_destination_field, window_title="Moving annotations"
):
"""
Move annotations from old_destination_field to new_destination_field
annotation_map precalculated in thread in config.py
"""
import calibre_plugins.marvin_manager.config as cfg
_log_location(annotation_map)
_log(" %s -> %s" % (old_destination_field, new_destination_field))
db = parent.opts.gui.current_db
id = db.FIELD_MAP["id"]
# Show progress
pb = ProgressBar(parent=parent, window_title=window_title)
total_books = len(annotation_map)
pb.set_maximum(total_books)
pb.set_value(1)
pb.set_label("{:^100}".format("Moving annotations for %d books" % total_books))
pb.show()
transient_db = "transient"
# Prepare a new COMMENTS_DIVIDER
comments_divider = '<div class="comments_divider"><p style="text-align:center;margin:1em 0 1em 0">{0}</p></div>'.format(
cfg.plugin_prefs.get(
"COMMENTS_DIVIDER", "· · • · ✦ · • · ·"
)
)
for cid in annotation_map:
mi = db.get_metadata(cid, index_is_id=True)
# Comments -> custom
if old_destination_field == "Comments" and new_destination_field.startswith("#"):
if mi.comments:
old_soup = BeautifulSoup(mi.comments)
uas = old_soup.find("div", "user_annotations")
if uas:
# Remove user_annotations from Comments
uas.extract()
# Remove comments_divider from Comments
cd = old_soup.find("div", "comments_divider")
if cd:
cd.extract()
# Save stripped Comments
mi.comments = unicode(old_soup)
# Capture content
parent.opts.db.capture_content(uas, cid, transient_db)
# Regurgitate content with current CSS style
new_soup = parent.opts.db.rerender_to_html(transient_db, cid)
# Add user_annotations to destination
um = mi.metadata_for_field(new_destination_field)
um["#value#"] = unicode(new_soup)
mi.set_user_metadata(new_destination_field, um)
# Update the record with stripped Comments, populated custom field
db.set_metadata(
cid, mi, set_title=False, set_authors=False, commit=True, force_changes=True, notify=True
)
pb.increment()
# custom -> Comments
elif old_destination_field.startswith("#") and new_destination_field == "Comments":
if mi.get_user_metadata(old_destination_field, False)["#value#"] is not None:
old_soup = BeautifulSoup(mi.get_user_metadata(old_destination_field, False)["#value#"])
uas = old_soup.find("div", "user_annotations")
if uas:
# Remove user_annotations from custom field
uas.extract()
# Capture content
parent.opts.db.capture_content(uas, cid, transient_db)
# Regurgitate content with current CSS style
new_soup = parent.opts.db.rerender_to_html(transient_db, cid)
# Save stripped custom field data
um = mi.metadata_for_field(old_destination_field)
um["#value#"] = unicode(old_soup)
mi.set_user_metadata(old_destination_field, um)
# Add user_annotations to Comments
if mi.comments is None:
mi.comments = unicode(new_soup)
else:
mi.comments = mi.comments + unicode(comments_divider) + unicode(new_soup)
# Update the record with stripped custom field, updated Comments
db.set_metadata(
cid, mi, set_title=False, set_authors=False, commit=True, force_changes=True, notify=True
)
pb.increment()
#.........这里部分代码省略.........
示例14: generate_html
# 需要导入模块: from calibre.ebooks.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from calibre.ebooks.BeautifulSoup.BeautifulSoup import find [as 别名]
def generate_html(comments):
args = dict(xmlns=XHTML_NS,
title_str=title_str,
css=css,
title=title,
author=author,
publisher=publisher,
pubdate_label=_('Published'), pubdate=pubdate,
series_label=_('Series'), series=series,
rating_label=_('Rating'), rating=rating,
tags_label=_('Tags'), tags=tags,
comments=comments,
footer='',
searchable_tags=' '.join(escape(t)+'ttt' for t in tags.tags_list),
)
for key in mi.custom_field_keys():
m = mi.get_user_metadata(key, False) or {}
try:
display_name, val = mi.format_field_extended(key)[:2]
dkey = key.replace('#', '_')
dt = m.get('datatype')
if dt == 'series':
args[dkey] = Series(mi.get(key), mi.get(key + '_index'))
elif dt == 'rating':
args[dkey] = rating_to_stars(mi.get(key), m.get('display', {}).get('allow_half_stars', False))
elif dt == 'comments':
val = val or ''
display = m.get('display', {})
ctype = display.get('interpret_as') or 'html'
if ctype == 'long-text':
val = '<pre style="white-space:pre-wrap">%s</pre>' % escape(val)
elif ctype == 'short-text':
val = '<span>%s</span>' % escape(val)
elif ctype == 'markdown':
val = markdown(val)
else:
val = comments_to_html(val)
args[dkey] = val
else:
args[dkey] = escape(val)
args[dkey+'_label'] = escape(display_name)
except Exception:
# if the val (custom column contents) is None, don't add to args
pass
if False:
print("Custom column values available in jacket template:")
for key in args.keys():
if key.startswith('_') and not key.endswith('_label'):
print(" %s: %s" % ('#' + key[1:], args[key]))
# Used in the comment describing use of custom columns in templates
# Don't change this unless you also change it in template.xhtml
args['_genre_label'] = args.get('_genre_label', '{_genre_label}')
args['_genre'] = args.get('_genre', '{_genre}')
formatter = SafeFormatter()
generated_html = formatter.format(template, **args)
# Post-process the generated html to strip out empty header items
soup = BeautifulSoup(generated_html)
if not series:
series_tag = soup.find(attrs={'class':'cbj_series'})
if series_tag is not None:
series_tag.extract()
if not rating:
rating_tag = soup.find(attrs={'class':'cbj_rating'})
if rating_tag is not None:
rating_tag.extract()
if not tags:
tags_tag = soup.find(attrs={'class':'cbj_tags'})
if tags_tag is not None:
tags_tag.extract()
if not pubdate:
pubdate_tag = soup.find(attrs={'class':'cbj_pubdata'})
if pubdate_tag is not None:
pubdate_tag.extract()
if output_profile.short_name != 'kindle':
hr_tag = soup.find('hr', attrs={'class':'cbj_kindle_banner_hr'})
if hr_tag is not None:
hr_tag.extract()
return strip_encoding_declarations(
soup.renderContents('utf-8').decode('utf-8'))
示例15: generate_html
# 需要导入模块: from calibre.ebooks.BeautifulSoup import BeautifulSoup [as 别名]
# 或者: from calibre.ebooks.BeautifulSoup.BeautifulSoup import find [as 别名]
def generate_html(comments):
args = dict(xmlns=XHTML_NS,
title_str=title_str,
css=css,
title=title,
author=author,
publisher=publisher,
pubdate_label=_('Published'), pubdate=pubdate,
series_label=_('Series'), series=series,
rating_label=_('Rating'), rating=rating,
tags_label=_('Tags'), tags=tags,
comments=comments,
footer='',
searchable_tags=' '.join(escape(t)+'ttt' for t in tags.tags_list),
)
for key in mi.custom_field_keys():
try:
display_name, val = mi.format_field_extended(key)[:2]
key = key.replace('#', '_')
args[key] = escape(val)
args[key+'_label'] = escape(display_name)
except:
# if the val (custom column contents) is None, don't add to args
pass
if False:
print("Custom column values available in jacket template:")
for key in args.keys():
if key.startswith('_') and not key.endswith('_label'):
print(" %s: %s" % ('#' + key[1:], args[key]))
# Used in the comment describing use of custom columns in templates
# Don't change this unless you also change it in template.xhtml
args['_genre_label'] = args.get('_genre_label', '{_genre_label}')
args['_genre'] = args.get('_genre', '{_genre}')
formatter = SafeFormatter()
generated_html = formatter.format(template, **args)
# Post-process the generated html to strip out empty header items
soup = BeautifulSoup(generated_html)
if not series:
series_tag = soup.find(attrs={'class':'cbj_series'})
if series_tag is not None:
series_tag.extract()
if not rating:
rating_tag = soup.find(attrs={'class':'cbj_rating'})
if rating_tag is not None:
rating_tag.extract()
if not tags:
tags_tag = soup.find(attrs={'class':'cbj_tags'})
if tags_tag is not None:
tags_tag.extract()
if not pubdate:
pubdate_tag = soup.find(attrs={'class':'cbj_pubdata'})
if pubdate_tag is not None:
pubdate_tag.extract()
if output_profile.short_name != 'kindle':
hr_tag = soup.find('hr', attrs={'class':'cbj_kindle_banner_hr'})
if hr_tag is not None:
hr_tag.extract()
return strip_encoding_declarations(
soup.renderContents('utf-8').decode('utf-8'))