本文整理汇总了Python中codecs.BOM_UTF32_BE属性的典型用法代码示例。如果您正苦于以下问题:Python codecs.BOM_UTF32_BE属性的具体用法?Python codecs.BOM_UTF32_BE怎么用?Python codecs.BOM_UTF32_BE使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类codecs
的用法示例。
在下文中一共展示了codecs.BOM_UTF32_BE属性的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _detect_encoding
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF32_BE [as 别名]
def _detect_encoding(self, fileid):
if isinstance(fileid, PathPointer):
s = fileid.open().readline()
else:
with open(fileid, 'rb') as infile:
s = infile.readline()
if s.startswith(codecs.BOM_UTF16_BE):
return 'utf-16-be'
if s.startswith(codecs.BOM_UTF16_LE):
return 'utf-16-le'
if s.startswith(codecs.BOM_UTF32_BE):
return 'utf-32-be'
if s.startswith(codecs.BOM_UTF32_LE):
return 'utf-32-le'
if s.startswith(codecs.BOM_UTF8):
return 'utf-8'
m = re.match(br'\s*<\?xml\b.*\bencoding="([^"]+)"', s)
if m:
return m.group(1).decode()
m = re.match(br"\s*<\?xml\b.*\bencoding='([^']+)'", s)
if m:
return m.group(1).decode()
# No encoding found -- what should the default be?
return 'utf-8'
示例2: test_xml_encodings
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF32_BE [as 别名]
def test_xml_encodings(self):
base = TESTDATA[plistlib.FMT_XML]
for xml_encoding, encoding, bom in [
(b'utf-8', 'utf-8', codecs.BOM_UTF8),
(b'utf-16', 'utf-16-le', codecs.BOM_UTF16_LE),
(b'utf-16', 'utf-16-be', codecs.BOM_UTF16_BE),
# Expat does not support UTF-32
#(b'utf-32', 'utf-32-le', codecs.BOM_UTF32_LE),
#(b'utf-32', 'utf-32-be', codecs.BOM_UTF32_BE),
]:
pl = self._create(fmt=plistlib.FMT_XML)
with self.subTest(encoding=encoding):
data = base.replace(b'UTF-8', xml_encoding)
data = bom + data.decode('utf-8').encode(encoding)
pl2 = plistlib.loads(data)
self.assertEqual(dict(pl), dict(pl2))
示例3: guess_json_utf
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF32_BE [as 别名]
def guess_json_utf(data):
"""
:rtype: str
"""
# JSON always starts with two ASCII characters, so detection is as
# easy as counting the nulls and from their location and count
# determine the encoding. Also detect a BOM, if present.
sample = data[:4]
if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
return 'utf-32' # BOM included
if sample[:3] == codecs.BOM_UTF8:
return 'utf-8-sig' # BOM included, MS style (discouraged)
if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
return 'utf-16' # BOM included
nullcount = sample.count(_null)
if nullcount == 0:
return 'utf-8'
if nullcount == 2:
if sample[::2] == _null2: # 1st and 3rd are null
return 'utf-16-be'
if sample[1::2] == _null2: # 2nd and 4th are null
return 'utf-16-le'
# Did not detect 2 valid UTF-16 ascii-range characters
if nullcount == 3:
if sample[:3] == _null3:
return 'utf-32-be'
if sample[1:] == _null3:
return 'utf-32-le'
# Did not detect a valid UTF-32 ascii-range character
return None
示例4: detect_encoding
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF32_BE [as 别名]
def detect_encoding(data):
"""Detect which UTF codec was used to encode the given bytes.
The latest JSON standard (:rfc:`8259`) suggests that only UTF-8 is
accepted. Older documents allowed 8, 16, or 32. 16 and 32 can be big
or little endian. Some editors or libraries may prepend a BOM.
:param data: Bytes in unknown UTF encoding.
:return: UTF encoding name
"""
head = data[:4]
if head[:3] == codecs.BOM_UTF8:
return 'utf-8-sig'
if b'\x00' not in head:
return 'utf-8'
if head in (codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE):
return 'utf-32'
if head[:2] in (codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE):
return 'utf-16'
if len(head) == 4:
if head[:3] == b'\x00\x00\x00':
return 'utf-32-be'
if head[::2] == b'\x00\x00':
return 'utf-16-be'
if head[1:] == b'\x00\x00\x00':
return 'utf-32-le'
if head[1::2] == b'\x00\x00':
return 'utf-16-le'
if len(head) == 2:
return 'utf-16-be' if head.startswith(b'\x00') else 'utf-16-le'
return 'utf-8'
示例5: detectBOM
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF32_BE [as 别名]
def detectBOM(self):
"""Attempts to detect at BOM at the start of the stream. If
an encoding can be determined from the BOM return the name of the
encoding otherwise return None"""
bomDict = {
codecs.BOM_UTF8: 'utf-8',
codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be',
codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be'
}
# Go to beginning of file and read in 4 bytes
string = self.rawStream.read(4)
assert isinstance(string, bytes)
# Try detecting the BOM using bytes from the string
encoding = bomDict.get(string[:3]) # UTF-8
seek = 3
if not encoding:
# Need to detect UTF-32 before UTF-16
encoding = bomDict.get(string) # UTF-32
seek = 4
if not encoding:
encoding = bomDict.get(string[:2]) # UTF-16
seek = 2
# Set the read position past the BOM if one was found, otherwise
# set it to the start of the stream
if encoding:
self.rawStream.seek(seek)
return lookupEncoding(encoding)
else:
self.rawStream.seek(0)
return None
示例6: detectBOM
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF32_BE [as 别名]
def detectBOM(self):
"""Attempts to detect at BOM at the start of the stream. If
an encoding can be determined from the BOM return the name of the
encoding otherwise return None"""
bomDict = {
codecs.BOM_UTF8: 'utf-8',
codecs.BOM_UTF16_LE: 'utf-16-le', codecs.BOM_UTF16_BE: 'utf-16-be',
codecs.BOM_UTF32_LE: 'utf-32-le', codecs.BOM_UTF32_BE: 'utf-32-be'
}
# Go to beginning of file and read in 4 bytes
string = self.rawStream.read(4)
assert isinstance(string, bytes)
# Try detecting the BOM using bytes from the string
encoding = bomDict.get(string[:3]) # UTF-8
seek = 3
if not encoding:
# Need to detect UTF-32 before UTF-16
encoding = bomDict.get(string) # UTF-32
seek = 4
if not encoding:
encoding = bomDict.get(string[:2]) # UTF-16
seek = 2
# Set the read position past the BOM if one was found, otherwise
# set it to the start of the stream
self.rawStream.seek(encoding and seek or 0)
return encoding
示例7: encode_endian
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF32_BE [as 别名]
def encode_endian(text, encoding, errors="strict", le=True):
"""Like text.encode(encoding) but always returns little endian/big endian
BOMs instead of the system one.
Args:
text (text)
encoding (str)
errors (str)
le (boolean): if little endian
Returns:
bytes
Raises:
UnicodeEncodeError
LookupError
"""
encoding = codecs.lookup(encoding).name
if encoding == "utf-16":
if le:
return codecs.BOM_UTF16_LE + text.encode("utf-16-le", errors)
else:
return codecs.BOM_UTF16_BE + text.encode("utf-16-be", errors)
elif encoding == "utf-32":
if le:
return codecs.BOM_UTF32_LE + text.encode("utf-32-le", errors)
else:
return codecs.BOM_UTF32_BE + text.encode("utf-32-be", errors)
else:
return text.encode(encoding, errors)