本文整理汇总了Python中bs4.dammit.UnicodeDammit方法的典型用法代码示例。如果您正苦于以下问题:Python dammit.UnicodeDammit方法的具体用法?Python dammit.UnicodeDammit怎么用?Python dammit.UnicodeDammit使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类bs4.dammit
的用法示例。
在下文中一共展示了dammit.UnicodeDammit方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: prepare_markup
# 需要导入模块: from bs4 import dammit [as 别名]
# 或者: from bs4.dammit import UnicodeDammit [as 别名]
def prepare_markup(self, markup, user_specified_encoding=None,
document_declared_encoding=None, exclude_encodings=None):
"""
:return: A 4-tuple (markup, original encoding, encoding
declared within markup, whether any characters had to be
replaced with REPLACEMENT CHARACTER).
"""
if isinstance(markup, unicode):
yield (markup, None, None, False)
return
try_encodings = [user_specified_encoding, document_declared_encoding]
dammit = UnicodeDammit(markup, try_encodings, is_html=True,
exclude_encodings=exclude_encodings)
yield (dammit.markup, dammit.original_encoding,
dammit.declared_html_encoding,
dammit.contains_replacement_characters)
示例2: prepare_markup
# 需要导入模块: from bs4 import dammit [as 别名]
# 或者: from bs4.dammit import UnicodeDammit [as 别名]
def prepare_markup(self, markup, user_specified_encoding=None,
document_declared_encoding=None):
"""
:return: A 4-tuple (markup, original encoding, encoding
declared within markup, whether any characters had to be
replaced with REPLACEMENT CHARACTER).
"""
if isinstance(markup, unicode):
yield (markup, None, None, False)
return
try_encodings = [user_specified_encoding, document_declared_encoding]
dammit = UnicodeDammit(markup, try_encodings, is_html=True)
yield (dammit.markup, dammit.original_encoding,
dammit.declared_html_encoding,
dammit.contains_replacement_characters)
示例3: prepare_markup
# 需要导入模块: from bs4 import dammit [as 别名]
# 或者: from bs4.dammit import UnicodeDammit [as 别名]
def prepare_markup(self, markup, user_specified_encoding=None,
document_declared_encoding=None, exclude_encodings=None):
"""
:return: A 4-tuple (markup, original encoding, encoding
declared within markup, whether any characters had to be
replaced with REPLACEMENT CHARACTER).
"""
if isinstance(markup, str):
yield (markup, None, None, False)
return
try_encodings = [user_specified_encoding, document_declared_encoding]
dammit = UnicodeDammit(markup, try_encodings, is_html=True,
exclude_encodings=exclude_encodings)
yield (dammit.markup, dammit.original_encoding,
dammit.declared_html_encoding,
dammit.contains_replacement_characters)
示例4: prepare_markup
# 需要导入模块: from bs4 import dammit [as 别名]
# 或者: from bs4.dammit import UnicodeDammit [as 别名]
def prepare_markup(self, markup, user_specified_encoding=None,
document_declared_encoding=None):
"""
:return: A 4-tuple (markup, original encoding, encoding
declared within markup, whether any characters had to be
replaced with REPLACEMENT CHARACTER).
"""
if isinstance(markup, unicode):
return markup, None, None, False
try_encodings = [user_specified_encoding, document_declared_encoding]
dammit = UnicodeDammit(markup, try_encodings, is_html=True)
return (dammit.markup, dammit.original_encoding,
dammit.declared_html_encoding,
dammit.contains_replacement_characters)
示例5: prepare_markup
# 需要导入模块: from bs4 import dammit [as 别名]
# 或者: from bs4.dammit import UnicodeDammit [as 别名]
def prepare_markup(self, markup, user_specified_encoding=None,
document_declared_encoding=None):
"""
:return: A 3-tuple (markup, original encoding, encoding
declared within markup).
"""
if isinstance(markup, unicode):
return markup, None, None, False
try_encodings = [user_specified_encoding, document_declared_encoding]
dammit = UnicodeDammit(markup, try_encodings, is_html=True)
return (dammit.markup, dammit.original_encoding,
dammit.declared_html_encoding,
dammit.contains_replacement_characters)
示例6: test_smart_quote_substitution
# 需要导入模块: from bs4 import dammit [as 别名]
# 或者: from bs4.dammit import UnicodeDammit [as 别名]
def test_smart_quote_substitution(self):
# MS smart quotes are a common source of frustration, so we
# give them a special test.
quotes = b"\x91\x92foo\x93\x94"
dammit = UnicodeDammit(quotes)
self.assertEqual(self.sub.substitute_html(dammit.markup),
"‘’foo“”")
示例7: test_smart_quotes_to_unicode
# 需要导入模块: from bs4 import dammit [as 别名]
# 或者: from bs4.dammit import UnicodeDammit [as 别名]
def test_smart_quotes_to_unicode(self):
markup = b"<foo>\x91\x92\x93\x94</foo>"
dammit = UnicodeDammit(markup)
self.assertEqual(
dammit.unicode_markup, "<foo>\u2018\u2019\u201c\u201d</foo>")
示例8: test_smart_quotes_to_xml_entities
# 需要导入模块: from bs4 import dammit [as 别名]
# 或者: from bs4.dammit import UnicodeDammit [as 别名]
def test_smart_quotes_to_xml_entities(self):
markup = b"<foo>\x91\x92\x93\x94</foo>"
dammit = UnicodeDammit(markup, smart_quotes_to="xml")
self.assertEqual(
dammit.unicode_markup, "<foo>‘’“”</foo>")
示例9: test_smart_quotes_to_html_entities
# 需要导入模块: from bs4 import dammit [as 别名]
# 或者: from bs4.dammit import UnicodeDammit [as 别名]
def test_smart_quotes_to_html_entities(self):
markup = b"<foo>\x91\x92\x93\x94</foo>"
dammit = UnicodeDammit(markup, smart_quotes_to="html")
self.assertEqual(
dammit.unicode_markup, "<foo>‘’“”</foo>")
示例10: test_detect_utf8
# 需要导入模块: from bs4 import dammit [as 别名]
# 或者: from bs4.dammit import UnicodeDammit [as 别名]
def test_detect_utf8(self):
utf8 = b"\xc3\xa9"
dammit = UnicodeDammit(utf8)
self.assertEqual(dammit.unicode_markup, '\xe9')
self.assertEqual(dammit.original_encoding, 'utf-8')