當前位置: 首頁>>代碼示例>>Python>>正文


Python HeuristicProcessor.fix_nbsp_indents方法代碼示例

本文整理匯總了Python中calibre.ebooks.conversion.utils.HeuristicProcessor.fix_nbsp_indents方法的典型用法代碼示例。如果您正苦於以下問題:Python HeuristicProcessor.fix_nbsp_indents方法的具體用法?Python HeuristicProcessor.fix_nbsp_indents怎麽用?Python HeuristicProcessor.fix_nbsp_indents使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在calibre.ebooks.conversion.utils.HeuristicProcessor的用法示例。


在下文中一共展示了HeuristicProcessor.fix_nbsp_indents方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: smarten_punctuation

# 需要導入模塊: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 別名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 別名]
    def smarten_punctuation(self):
        preprocessor = HeuristicProcessor(log=self.log)

        for name in self.get_html_names():
            self.log.info("Smartening punctuation for file {0}".format(name))
            html = self.get_raw(name)
            html = html.encode("UTF-8")

            # Fix non-breaking space indents
            html = preprocessor.fix_nbsp_indents(html)
            # Smarten punctuation
            html = smartyPants(html)
            # Ellipsis to HTML entity
            html = re.sub(ur'(?u)(?<=\w)\s?(\.\s+?){2}\.', '&hellip;', html, flags=re.UNICODE | re.MULTILINE)
            # Double-dash and unicode char code to em-dash
            html = string.replace(html, '---', ' &#x2013; ')
            html = string.replace(html, u"\x97", ' &#x2013; ')
            html = string.replace(html, '--', ' &#x2014; ')
            html = string.replace(html, u"\u2014", ' &#x2014; ')
            html = string.replace(html, u"\u2013", ' &#x2013; ')

            # Fix comment nodes that got mangled
            html = string.replace(html, u'<! &#x2014; ', u'<!-- ')
            html = string.replace(html, u' &#x2014; >', u' -->')

            # Remove Unicode replacement characters
            html = string.replace(html, u"\uFFFD", "")

            self.dirty(name)
        self.flush_cache()
開發者ID:d-faure,項目名稱:calibre-kobo-driver,代碼行數:32,代碼來源:container.py

示例2: smarten_punctuation

# 需要導入模塊: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 別名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 別名]
    def smarten_punctuation(self):
        """Convert standard punctuation to "smart" punctuation."""
        preprocessor = HeuristicProcessor(log=self.log)

        for name in self.html_names():
            self.log.info("Smartening punctuation for file {0}".format(name))
            html = self.get_raw(name, force_unicode=True)
            if html is None:
                continue

            # Fix non-breaking space indents
            html = preprocessor.fix_nbsp_indents(html)

            # Smarten punctuation
            html = smartyPants(html)

            # Ellipsis to HTML entity
            html = ELLIPSIS_RE.sub("&hellip;", html)

            # Double-dash and unicode char code to em-dash
            html = string.replace(html, "---", " &#x2013; ")
            html = string.replace(html, "\x97", " &#x2013; ")
            html = string.replace(html, "\u2013", " &#x2013; ")
            html = string.replace(html, "--", " &#x2014; ")
            html = string.replace(html, "\u2014", " &#x2014; ")

            # Fix comment nodes that got mangled
            html = string.replace(html, "<! &#x2014; ", "<!-- ")
            html = string.replace(html, " &#x2014; >", " -->")

            self.dirty(name)
        self.flush_cache()
開發者ID:jgoguen,項目名稱:calibre-kobo-driver,代碼行數:34,代碼來源:container.py

示例3: smarten_punctuation

# 需要導入模塊: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 別名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 別名]
	def smarten_punctuation(self):
		preprocessor = HeuristicProcessor(log = self.log)

		for name in self.get_html_names():
			html = self.get_raw(name)
			html = html.encode("UTF-8")

			# Fix non-breaking space indents
			html = preprocessor.fix_nbsp_indents(html)
			# Smarten punctuation
			html = smartyPants(html)
			# Ellipsis to HTML entity
			html = re.sub(r'(?u)(?<=\w)\s?(\.\s+?){2}\.', '&hellip;', html)
			# Double-dash and unicode char code to em-dash
			html = string.replace(html, '---', ' &#x2013; ')
			html = string.replace(html, u"\x97", ' &#x2013; ')
			html = string.replace(html, '--', ' &#x2014; ')
			html = string.replace(html, u"\u2014", ' &#x2014; ')
			html = string.replace(html, u"\u2013", ' &#x2013; ')
			html = string.replace(html, u"...", "&#x2026;")

			# Remove Unicode replacement characters
			html = string.replace(html, u"\uFFFD", "")

			self.set(name, html)
開發者ID:richp1964,項目名稱:calibre-kobo-driver,代碼行數:27,代碼來源:container.py

示例4: smarten_punctuation

# 需要導入模塊: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 別名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 別名]
def smarten_punctuation(html, log=None):
    from calibre.utils.smartypants import smartyPants
    from calibre.ebooks.chardet import substitute_entites
    from calibre.ebooks.conversion.utils import HeuristicProcessor
    preprocessor = HeuristicProcessor(log=log)
    from uuid import uuid4
    start = 'calibre-smartypants-'+str(uuid4())
    stop = 'calibre-smartypants-'+str(uuid4())
    html = html.replace('<!--', start)
    html = html.replace('-->', stop)
    html = preprocessor.fix_nbsp_indents(html)
    html = smartyPants(html)
    html = html.replace(start, '<!--')
    html = html.replace(stop, '-->')
    return substitute_entites(html)
開發者ID:JimmXinu,項目名稱:calibre,代碼行數:17,代碼來源:preprocess.py

示例5: smarten_punctuation

# 需要導入模塊: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 別名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 別名]
def smarten_punctuation(html, log):
    from calibre.utils.smartypants import smartyPants
    from calibre.ebooks.chardet import substitute_entites
    from calibre.ebooks.conversion.utils import HeuristicProcessor
    preprocessor = HeuristicProcessor(log=log)
    from uuid import uuid4
    start = 'calibre-smartypants-'+str(uuid4())
    stop = 'calibre-smartypants-'+str(uuid4())
    html = html.replace('<!--', start)
    html = html.replace('-->', stop)
    html = preprocessor.fix_nbsp_indents(html)
    html = smartyPants(html)
    html = html.replace(start, '<!--')
    html = html.replace(stop, '-->')
    # convert ellipsis to entities to prevent wrapping
    html = re.sub(r'(?u)(?<=\w)\s?(\.\s?){2}\.', '&hellip;', html)
    # convert double dashes to em-dash
    html = re.sub(r'\s--\s', u'\u2014', html)
    return substitute_entites(html)
開發者ID:2014gwang,項目名稱:KindleEar,代碼行數:21,代碼來源:preprocess.py

示例6: smarten_punctuation

# 需要導入模塊: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 別名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 別名]
def smarten_punctuation(html, log):
    from calibre.utils.smartypants import smartyPants
    from calibre.ebooks.chardet import substitute_entites
    from calibre.ebooks.conversion.utils import HeuristicProcessor

    preprocessor = HeuristicProcessor(log=log)
    from uuid import uuid4

    start = "calibre-smartypants-" + str(uuid4())
    stop = "calibre-smartypants-" + str(uuid4())
    html = html.replace("<!--", start)
    html = html.replace("-->", stop)
    html = preprocessor.fix_nbsp_indents(html)
    html = smartyPants(html)
    html = html.replace(start, "<!--")
    html = html.replace(stop, "-->")
    # convert ellipsis to entities to prevent wrapping
    html = re.sub(r"(?u)(?<=\w)\s?(\.\s?){2}\.", "&hellip;", html)
    # convert double dashes to em-dash
    html = re.sub(r"\s--\s", u"\u2014", html)
    return substitute_entites(html)
開發者ID:nozuono,項目名稱:calibre-webserver,代碼行數:23,代碼來源:preprocess.py


注:本文中的calibre.ebooks.conversion.utils.HeuristicProcessor.fix_nbsp_indents方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。