Python HeuristicProcessor.fix_nbsp_indents方法代碼示例

本文整理匯總了Python中calibre.ebooks.conversion.utils.HeuristicProcessor.fix_nbsp_indents方法的典型用法代碼示例。如果您正苦於以下問題：Python HeuristicProcessor.fix_nbsp_indents方法的具體用法？Python HeuristicProcessor.fix_nbsp_indents怎麽用？Python HeuristicProcessor.fix_nbsp_indents使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類calibre.ebooks.conversion.utils.HeuristicProcessor的用法示例。

在下文中一共展示了HeuristicProcessor.fix_nbsp_indents方法的6個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: smarten_punctuation

# 需要導入模塊: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 別名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 別名]
    def smarten_punctuation(self):
        preprocessor = HeuristicProcessor(log=self.log)

        for name in self.get_html_names():
            self.log.info("Smartening punctuation for file {0}".format(name))
            html = self.get_raw(name)
            html = html.encode("UTF-8")

            # Fix non-breaking space indents
            html = preprocessor.fix_nbsp_indents(html)
            # Smarten punctuation
            html = smartyPants(html)
            # Ellipsis to HTML entity
            html = re.sub(ur'(?u)(?<=\w)\s?(\.\s+?){2}\.', '&hellip;', html, flags=re.UNICODE | re.MULTILINE)
            # Double-dash and unicode char code to em-dash
            html = string.replace(html, '---', ' &#x2013; ')
            html = string.replace(html, u"\x97", ' &#x2013; ')
            html = string.replace(html, '--', ' &#x2014; ')
            html = string.replace(html, u"\u2014", ' &#x2014; ')
            html = string.replace(html, u"\u2013", ' &#x2013; ')

            # Fix comment nodes that got mangled
            html = string.replace(html, u'<! &#x2014; ', u'<!-- ')
            html = string.replace(html, u' &#x2014; >', u' -->')

            # Remove Unicode replacement characters
            html = string.replace(html, u"\uFFFD", "")

            self.dirty(name)
        self.flush_cache()

開發者ID:d-faure，項目名稱:calibre-kobo-driver，代碼行數:32，代碼來源:container.py

示例2: smarten_punctuation

# 需要導入模塊: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 別名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 別名]
    def smarten_punctuation(self):
        """Convert standard punctuation to "smart" punctuation."""
        preprocessor = HeuristicProcessor(log=self.log)

        for name in self.html_names():
            self.log.info("Smartening punctuation for file {0}".format(name))
            html = self.get_raw(name, force_unicode=True)
            if html is None:
                continue

            # Fix non-breaking space indents
            html = preprocessor.fix_nbsp_indents(html)

            # Smarten punctuation
            html = smartyPants(html)

            # Ellipsis to HTML entity
            html = ELLIPSIS_RE.sub("&hellip;", html)

            # Double-dash and unicode char code to em-dash
            html = string.replace(html, "---", " &#x2013; ")
            html = string.replace(html, "\x97", " &#x2013; ")
            html = string.replace(html, "\u2013", " &#x2013; ")
            html = string.replace(html, "--", " &#x2014; ")
            html = string.replace(html, "\u2014", " &#x2014; ")

            # Fix comment nodes that got mangled
            html = string.replace(html, "<! &#x2014; ", "<!-- ")
            html = string.replace(html, " &#x2014; >", " -->")

            self.dirty(name)
        self.flush_cache()

開發者ID:jgoguen，項目名稱:calibre-kobo-driver，代碼行數:34，代碼來源:container.py

示例3: smarten_punctuation

# 需要導入模塊: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 別名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 別名]
	def smarten_punctuation(self):
		preprocessor = HeuristicProcessor(log = self.log)

		for name in self.get_html_names():
			html = self.get_raw(name)
			html = html.encode("UTF-8")

			# Fix non-breaking space indents
			html = preprocessor.fix_nbsp_indents(html)
			# Smarten punctuation
			html = smartyPants(html)
			# Ellipsis to HTML entity
			html = re.sub(r'(?u)(?<=\w)\s?(\.\s+?){2}\.', '&hellip;', html)
			# Double-dash and unicode char code to em-dash
			html = string.replace(html, '---', ' &#x2013; ')
			html = string.replace(html, u"\x97", ' &#x2013; ')
			html = string.replace(html, '--', ' &#x2014; ')
			html = string.replace(html, u"\u2014", ' &#x2014; ')
			html = string.replace(html, u"\u2013", ' &#x2013; ')
			html = string.replace(html, u"...", "&#x2026;")

			# Remove Unicode replacement characters
			html = string.replace(html, u"\uFFFD", "")

			self.set(name, html)

開發者ID:richp1964，項目名稱:calibre-kobo-driver，代碼行數:27，代碼來源:container.py

示例4: smarten_punctuation

# 需要導入模塊: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 別名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 別名]
def smarten_punctuation(html, log=None):
    from calibre.utils.smartypants import smartyPants
    from calibre.ebooks.chardet import substitute_entites
    from calibre.ebooks.conversion.utils import HeuristicProcessor
    preprocessor = HeuristicProcessor(log=log)
    from uuid import uuid4
    start = 'calibre-smartypants-'+str(uuid4())
    stop = 'calibre-smartypants-'+str(uuid4())
    html = html.replace('<!--', start)
    html = html.replace('-->', stop)
    html = preprocessor.fix_nbsp_indents(html)
    html = smartyPants(html)
    html = html.replace(start, '<!--')
    html = html.replace(stop, '-->')
    return substitute_entites(html)

開發者ID:JimmXinu，項目名稱:calibre，代碼行數:17，代碼來源:preprocess.py

示例5: smarten_punctuation

# 需要導入模塊: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 別名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 別名]
def smarten_punctuation(html, log):
    from calibre.utils.smartypants import smartyPants
    from calibre.ebooks.chardet import substitute_entites
    from calibre.ebooks.conversion.utils import HeuristicProcessor
    preprocessor = HeuristicProcessor(log=log)
    from uuid import uuid4
    start = 'calibre-smartypants-'+str(uuid4())
    stop = 'calibre-smartypants-'+str(uuid4())
    html = html.replace('<!--', start)
    html = html.replace('-->', stop)
    html = preprocessor.fix_nbsp_indents(html)
    html = smartyPants(html)
    html = html.replace(start, '<!--')
    html = html.replace(stop, '-->')
    # convert ellipsis to entities to prevent wrapping
    html = re.sub(r'(?u)(?<=\w)\s?(\.\s?){2}\.', '&hellip;', html)
    # convert double dashes to em-dash
    html = re.sub(r'\s--\s', u'\u2014', html)
    return substitute_entites(html)

開發者ID:2014gwang，項目名稱:KindleEar，代碼行數:21，代碼來源:preprocess.py

示例6: smarten_punctuation

# 需要導入模塊: from calibre.ebooks.conversion.utils import HeuristicProcessor [as 別名]
# 或者: from calibre.ebooks.conversion.utils.HeuristicProcessor import fix_nbsp_indents [as 別名]
def smarten_punctuation(html, log):
    from calibre.utils.smartypants import smartyPants
    from calibre.ebooks.chardet import substitute_entites
    from calibre.ebooks.conversion.utils import HeuristicProcessor

    preprocessor = HeuristicProcessor(log=log)
    from uuid import uuid4

    start = "calibre-smartypants-" + str(uuid4())
    stop = "calibre-smartypants-" + str(uuid4())
    html = html.replace("<!--", start)
    html = html.replace("-->", stop)
    html = preprocessor.fix_nbsp_indents(html)
    html = smartyPants(html)
    html = html.replace(start, "<!--")
    html = html.replace(stop, "-->")
    # convert ellipsis to entities to prevent wrapping
    html = re.sub(r"(?u)(?<=\w)\s?(\.\s?){2}\.", "&hellip;", html)
    # convert double dashes to em-dash
    html = re.sub(r"\s--\s", u"\u2014", html)
    return substitute_entites(html)

開發者ID:nozuono，項目名稱:calibre-webserver，代碼行數:23，代碼來源:preprocess.py

注：本文中的calibre.ebooks.conversion.utils.HeuristicProcessor.fix_nbsp_indents方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。