当前位置: 首页>>代码示例>>Python>>正文


Python HeuristicProcessor.fix_nbsp_indents方法代码示例

本文整理汇总了Python中calibre.ebooks.conversion.utils.HeuristicProcessor.fix_nbsp_indents方法的典型用法代码示例。如果您正苦于以下问题:Python HeuristicProcessor.fix_nbsp_indents方法的具体用法?Python HeuristicProcessor.fix_nbsp_indents怎么用?Python HeuristicProcessor.fix_nbsp_indents使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在calibre.ebooks.conversion.utils.HeuristicProcessor的用法示例。


在下文中一共展示了HeuristicProcessor.fix_nbsp_indents方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: smarten_punctuation

# 需要导入模块: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 别名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 别名]
    def smarten_punctuation(self):
        preprocessor = HeuristicProcessor(log=self.log)

        for name in self.get_html_names():
            self.log.info("Smartening punctuation for file {0}".format(name))
            html = self.get_raw(name)
            html = html.encode("UTF-8")

            # Fix non-breaking space indents
            html = preprocessor.fix_nbsp_indents(html)
            # Smarten punctuation
            html = smartyPants(html)
            # Ellipsis to HTML entity
            html = re.sub(ur'(?u)(?<=\w)\s?(\.\s+?){2}\.', '&hellip;', html, flags=re.UNICODE | re.MULTILINE)
            # Double-dash and unicode char code to em-dash
            html = string.replace(html, '---', ' &#x2013; ')
            html = string.replace(html, u"\x97", ' &#x2013; ')
            html = string.replace(html, '--', ' &#x2014; ')
            html = string.replace(html, u"\u2014", ' &#x2014; ')
            html = string.replace(html, u"\u2013", ' &#x2013; ')

            # Fix comment nodes that got mangled
            html = string.replace(html, u'<! &#x2014; ', u'<!-- ')
            html = string.replace(html, u' &#x2014; >', u' -->')

            # Remove Unicode replacement characters
            html = string.replace(html, u"\uFFFD", "")

            self.dirty(name)
        self.flush_cache()
开发者ID:d-faure,项目名称:calibre-kobo-driver,代码行数:32,代码来源:container.py

示例2: smarten_punctuation

# 需要导入模块: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 别名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 别名]
    def smarten_punctuation(self):
        """Convert standard punctuation to "smart" punctuation."""
        preprocessor = HeuristicProcessor(log=self.log)

        for name in self.html_names():
            self.log.info("Smartening punctuation for file {0}".format(name))
            html = self.get_raw(name, force_unicode=True)
            if html is None:
                continue

            # Fix non-breaking space indents
            html = preprocessor.fix_nbsp_indents(html)

            # Smarten punctuation
            html = smartyPants(html)

            # Ellipsis to HTML entity
            html = ELLIPSIS_RE.sub("&hellip;", html)

            # Double-dash and unicode char code to em-dash
            html = string.replace(html, "---", " &#x2013; ")
            html = string.replace(html, "\x97", " &#x2013; ")
            html = string.replace(html, "\u2013", " &#x2013; ")
            html = string.replace(html, "--", " &#x2014; ")
            html = string.replace(html, "\u2014", " &#x2014; ")

            # Fix comment nodes that got mangled
            html = string.replace(html, "<! &#x2014; ", "<!-- ")
            html = string.replace(html, " &#x2014; >", " -->")

            self.dirty(name)
        self.flush_cache()
开发者ID:jgoguen,项目名称:calibre-kobo-driver,代码行数:34,代码来源:container.py

示例3: smarten_punctuation

# 需要导入模块: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 别名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 别名]
	def smarten_punctuation(self):
		preprocessor = HeuristicProcessor(log = self.log)

		for name in self.get_html_names():
			html = self.get_raw(name)
			html = html.encode("UTF-8")

			# Fix non-breaking space indents
			html = preprocessor.fix_nbsp_indents(html)
			# Smarten punctuation
			html = smartyPants(html)
			# Ellipsis to HTML entity
			html = re.sub(r'(?u)(?<=\w)\s?(\.\s+?){2}\.', '&hellip;', html)
			# Double-dash and unicode char code to em-dash
			html = string.replace(html, '---', ' &#x2013; ')
			html = string.replace(html, u"\x97", ' &#x2013; ')
			html = string.replace(html, '--', ' &#x2014; ')
			html = string.replace(html, u"\u2014", ' &#x2014; ')
			html = string.replace(html, u"\u2013", ' &#x2013; ')
			html = string.replace(html, u"...", "&#x2026;")

			# Remove Unicode replacement characters
			html = string.replace(html, u"\uFFFD", "")

			self.set(name, html)
开发者ID:richp1964,项目名称:calibre-kobo-driver,代码行数:27,代码来源:container.py

示例4: smarten_punctuation

# 需要导入模块: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 别名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 别名]
def smarten_punctuation(html, log=None):
    from calibre.utils.smartypants import smartyPants
    from calibre.ebooks.chardet import substitute_entites
    from calibre.ebooks.conversion.utils import HeuristicProcessor
    preprocessor = HeuristicProcessor(log=log)
    from uuid import uuid4
    start = 'calibre-smartypants-'+str(uuid4())
    stop = 'calibre-smartypants-'+str(uuid4())
    html = html.replace('<!--', start)
    html = html.replace('-->', stop)
    html = preprocessor.fix_nbsp_indents(html)
    html = smartyPants(html)
    html = html.replace(start, '<!--')
    html = html.replace(stop, '-->')
    return substitute_entites(html)
开发者ID:JimmXinu,项目名称:calibre,代码行数:17,代码来源:preprocess.py

示例5: smarten_punctuation

# 需要导入模块: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 别名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 别名]
def smarten_punctuation(html, log):
    from calibre.utils.smartypants import smartyPants
    from calibre.ebooks.chardet import substitute_entites
    from calibre.ebooks.conversion.utils import HeuristicProcessor
    preprocessor = HeuristicProcessor(log=log)
    from uuid import uuid4
    start = 'calibre-smartypants-'+str(uuid4())
    stop = 'calibre-smartypants-'+str(uuid4())
    html = html.replace('<!--', start)
    html = html.replace('-->', stop)
    html = preprocessor.fix_nbsp_indents(html)
    html = smartyPants(html)
    html = html.replace(start, '<!--')
    html = html.replace(stop, '-->')
    # convert ellipsis to entities to prevent wrapping
    html = re.sub(r'(?u)(?<=\w)\s?(\.\s?){2}\.', '&hellip;', html)
    # convert double dashes to em-dash
    html = re.sub(r'\s--\s', u'\u2014', html)
    return substitute_entites(html)
开发者ID:2014gwang,项目名称:KindleEar,代码行数:21,代码来源:preprocess.py

示例6: smarten_punctuation

# 需要导入模块: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 别名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 别名]
def smarten_punctuation(html, log):
    from calibre.utils.smartypants import smartyPants
    from calibre.ebooks.chardet import substitute_entites
    from calibre.ebooks.conversion.utils import HeuristicProcessor

    preprocessor = HeuristicProcessor(log=log)
    from uuid import uuid4

    start = "calibre-smartypants-" + str(uuid4())
    stop = "calibre-smartypants-" + str(uuid4())
    html = html.replace("<!--", start)
    html = html.replace("-->", stop)
    html = preprocessor.fix_nbsp_indents(html)
    html = smartyPants(html)
    html = html.replace(start, "<!--")
    html = html.replace(stop, "-->")
    # convert ellipsis to entities to prevent wrapping
    html = re.sub(r"(?u)(?<=\w)\s?(\.\s?){2}\.", "&hellip;", html)
    # convert double dashes to em-dash
    html = re.sub(r"\s--\s", u"\u2014", html)
    return substitute_entites(html)
开发者ID:nozuono,项目名称:calibre-webserver,代码行数:23,代码来源:preprocess.py


注:本文中的calibre.ebooks.conversion.utils.HeuristicProcessor.fix_nbsp_indents方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。