本文整理汇总了Python中calibre.ebooks.conversion.utils.HeuristicProcessor.fix_nbsp_indents方法的典型用法代码示例。如果您正苦于以下问题:Python HeuristicProcessor.fix_nbsp_indents方法的具体用法?Python HeuristicProcessor.fix_nbsp_indents怎么用?Python HeuristicProcessor.fix_nbsp_indents使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类calibre.ebooks.conversion.utils.HeuristicProcessor
的用法示例。
在下文中一共展示了HeuristicProcessor.fix_nbsp_indents方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: smarten_punctuation
# 需要导入模块: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 别名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 别名]
def smarten_punctuation(self):
preprocessor = HeuristicProcessor(log=self.log)
for name in self.get_html_names():
self.log.info("Smartening punctuation for file {0}".format(name))
html = self.get_raw(name)
html = html.encode("UTF-8")
# Fix non-breaking space indents
html = preprocessor.fix_nbsp_indents(html)
# Smarten punctuation
html = smartyPants(html)
# Ellipsis to HTML entity
html = re.sub(ur'(?u)(?<=\w)\s?(\.\s+?){2}\.', '…', html, flags=re.UNICODE | re.MULTILINE)
# Double-dash and unicode char code to em-dash
html = string.replace(html, '---', ' – ')
html = string.replace(html, u"\x97", ' – ')
html = string.replace(html, '--', ' — ')
html = string.replace(html, u"\u2014", ' — ')
html = string.replace(html, u"\u2013", ' – ')
# Fix comment nodes that got mangled
html = string.replace(html, u'<! — ', u'<!-- ')
html = string.replace(html, u' — >', u' -->')
# Remove Unicode replacement characters
html = string.replace(html, u"\uFFFD", "")
self.dirty(name)
self.flush_cache()
示例2: smarten_punctuation
# 需要导入模块: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 别名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 别名]
def smarten_punctuation(self):
"""Convert standard punctuation to "smart" punctuation."""
preprocessor = HeuristicProcessor(log=self.log)
for name in self.html_names():
self.log.info("Smartening punctuation for file {0}".format(name))
html = self.get_raw(name, force_unicode=True)
if html is None:
continue
# Fix non-breaking space indents
html = preprocessor.fix_nbsp_indents(html)
# Smarten punctuation
html = smartyPants(html)
# Ellipsis to HTML entity
html = ELLIPSIS_RE.sub("…", html)
# Double-dash and unicode char code to em-dash
html = string.replace(html, "---", " – ")
html = string.replace(html, "\x97", " – ")
html = string.replace(html, "\u2013", " – ")
html = string.replace(html, "--", " — ")
html = string.replace(html, "\u2014", " — ")
# Fix comment nodes that got mangled
html = string.replace(html, "<! — ", "<!-- ")
html = string.replace(html, " — >", " -->")
self.dirty(name)
self.flush_cache()
示例3: smarten_punctuation
# 需要导入模块: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 别名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 别名]
def smarten_punctuation(self):
preprocessor = HeuristicProcessor(log = self.log)
for name in self.get_html_names():
html = self.get_raw(name)
html = html.encode("UTF-8")
# Fix non-breaking space indents
html = preprocessor.fix_nbsp_indents(html)
# Smarten punctuation
html = smartyPants(html)
# Ellipsis to HTML entity
html = re.sub(r'(?u)(?<=\w)\s?(\.\s+?){2}\.', '…', html)
# Double-dash and unicode char code to em-dash
html = string.replace(html, '---', ' – ')
html = string.replace(html, u"\x97", ' – ')
html = string.replace(html, '--', ' — ')
html = string.replace(html, u"\u2014", ' — ')
html = string.replace(html, u"\u2013", ' – ')
html = string.replace(html, u"...", "…")
# Remove Unicode replacement characters
html = string.replace(html, u"\uFFFD", "")
self.set(name, html)
示例4: smarten_punctuation
# 需要导入模块: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 别名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 别名]
def smarten_punctuation(html, log=None):
from calibre.utils.smartypants import smartyPants
from calibre.ebooks.chardet import substitute_entites
from calibre.ebooks.conversion.utils import HeuristicProcessor
preprocessor = HeuristicProcessor(log=log)
from uuid import uuid4
start = 'calibre-smartypants-'+str(uuid4())
stop = 'calibre-smartypants-'+str(uuid4())
html = html.replace('<!--', start)
html = html.replace('-->', stop)
html = preprocessor.fix_nbsp_indents(html)
html = smartyPants(html)
html = html.replace(start, '<!--')
html = html.replace(stop, '-->')
return substitute_entites(html)
示例5: smarten_punctuation
# 需要导入模块: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 别名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 别名]
def smarten_punctuation(html, log):
from calibre.utils.smartypants import smartyPants
from calibre.ebooks.chardet import substitute_entites
from calibre.ebooks.conversion.utils import HeuristicProcessor
preprocessor = HeuristicProcessor(log=log)
from uuid import uuid4
start = 'calibre-smartypants-'+str(uuid4())
stop = 'calibre-smartypants-'+str(uuid4())
html = html.replace('<!--', start)
html = html.replace('-->', stop)
html = preprocessor.fix_nbsp_indents(html)
html = smartyPants(html)
html = html.replace(start, '<!--')
html = html.replace(stop, '-->')
# convert ellipsis to entities to prevent wrapping
html = re.sub(r'(?u)(?<=\w)\s?(\.\s?){2}\.', '…', html)
# convert double dashes to em-dash
html = re.sub(r'\s--\s', u'\u2014', html)
return substitute_entites(html)
示例6: smarten_punctuation
# 需要导入模块: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 别名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 别名]
def smarten_punctuation(html, log):
from calibre.utils.smartypants import smartyPants
from calibre.ebooks.chardet import substitute_entites
from calibre.ebooks.conversion.utils import HeuristicProcessor
preprocessor = HeuristicProcessor(log=log)
from uuid import uuid4
start = "calibre-smartypants-" + str(uuid4())
stop = "calibre-smartypants-" + str(uuid4())
html = html.replace("<!--", start)
html = html.replace("-->", stop)
html = preprocessor.fix_nbsp_indents(html)
html = smartyPants(html)
html = html.replace(start, "<!--")
html = html.replace(stop, "-->")
# convert ellipsis to entities to prevent wrapping
html = re.sub(r"(?u)(?<=\w)\s?(\.\s?){2}\.", "…", html)
# convert double dashes to em-dash
html = re.sub(r"\s--\s", u"\u2014", html)
return substitute_entites(html)