本文整理汇总了Python中codecs.BOM_UTF16属性的典型用法代码示例。如果您正苦于以下问题:Python codecs.BOM_UTF16属性的具体用法?Python codecs.BOM_UTF16怎么用?Python codecs.BOM_UTF16使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类codecs
的用法示例。
在下文中一共展示了codecs.BOM_UTF16属性的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: detectFileEncodingToRead
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF16 [as 别名]
def detectFileEncodingToRead(fName, text=None):
"""Detects the read encoding"""
if text is None:
with open(fName, 'rb') as diskfile:
text = diskfile.read(1024)
# Step 1: check for BOM
if text.startswith(BOM_UTF8):
return 'bom-utf-8'
if text.startswith(BOM_UTF16):
return 'bom-utf-16'
if text.startswith(BOM_UTF32):
return 'bom-utf-32'
# Check if it was a user assigned encoding
userAssignedEncoding = getFileEncoding(fName)
if userAssignedEncoding:
return userAssignedEncoding
# Step 3: extract encoding from the file
encFromFile = getCodingFromBytes(text)
if encFromFile:
return encFromFile
# Step 4: check the project default encoding
project = GlobalData().project
if project.isLoaded():
projectEncoding = project.props['encoding']
if projectEncoding:
return projectEncoding
# Step 5: checks the IDE encoding
ideEncoding = Settings()['encoding']
if ideEncoding:
return ideEncoding
# Step 6: default
return DEFAULT_ENCODING
示例2: writeEncodedFile
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF16 [as 别名]
def writeEncodedFile(fName, content, encoding):
"""Writes into a file taking care of encoding"""
normEnc = getNormalizedEncoding(encoding)
try:
if normEnc.startswith('bom_'):
enc = normEnc[4:]
if enc == 'utf_8':
encContent = BOM_UTF8 + content.encode(enc)
elif enc == 'utf_16':
encContent = BOM_UTF16 + content.encode(enc)
else:
encContent = BOM_UTF32 + content.encode(enc)
else:
encContent = content.encode(normEnc)
# Workaround for empty files: if there is no visible content and
# the file is saved then the editor reports precisely \n which is
# saved on disk and then detected as octet-stream. If there are
# more than one \n then the file is detected as plain text.
# The octet stream files are not openable in Codimension
if encContent == b'\n':
encContent = b''
except (UnicodeError, LookupError) as exc:
raise Exception('Error encoding the buffer content with ' + encoding +
': ' + str(exc))
try:
with open(fName, 'wb') as diskfile:
diskfile.write(encContent)
except Exception as exc:
raise Exception('Error writing encoded buffer content into ' +
fName + ': ' + str(exc))
示例3: decode
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF16 [as 别名]
def decode(str, errors='strict'):
"""
Decode strings
:param str str: input string
:param str errors:error level
:return: str
"""
output = ''
try:
if len(str) < 3:
if codecs.BOM_UTF8.startswith(str):
# not enough data to decide if this is a BOM
# => try again on the next call
output = ""
elif str[:3] == codecs.BOM_UTF8:
(output, sizes) = codecs.utf_8_decode(str[3:], errors)
elif str[:3] == codecs.BOM_UTF16:
output = str[3:].decode('utf16')
else:
# (else) no BOM present
(output, sizes) = codecs.utf_8_decode(str, errors)
return str(output)
except (UnicodeDecodeError, Exception):
# seems, its getting not a content (images, file, etc)
try:
return str.decode('cp1251')
except (UnicodeDecodeError, Exception):
return ""
示例4: combine_logs
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF16 [as 别名]
def combine_logs(combinedlog, iterable, mode="wb"):
""" write all lines (iterable) into a single log file. """
fp = io.open(combinedlog, mode)
if ENCODING == 'utf-16':
import codecs
fp.write(codecs.BOM_UTF16)
for chunk in iterable:
fp.write(chunk)
fp.close()
示例5: test_utf_16_encode
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF16 [as 别名]
def test_utf_16_encode(self):
# On little-endian systems, UTF-16 encodes in UTF-16-LE prefixed with BOM
data, num_processed = codecs.utf_16_encode("abc")
self.assertEqual(data, codecs.BOM_UTF16 + b'a\0b\0c\0')
self.assertEqual(num_processed, 3)
self.assertRaises(TypeError, codecs.utf_16_encode, b"abc")
self.assertRaises(TypeError, codecs.utf_16_encode, None)
self.assertRaises(TypeError, codecs.utf_16_encode, None, None)
self.assertEquals(codecs.utf_16_encode("", None), (codecs.BOM_UTF16, 0))
示例6: test_utf_16
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF16 [as 别名]
def test_utf_16(self):
b_dabcd = b'\xda\xdb\xdc\xdd'
s_dabcd = b_dabcd.decode("utf_16", errors="surrogateescape")
self.assertEqual(s_dabcd, '\U001069dc')
encoded = s_dabcd.encode("utf_16", errors="surrogateescape")
# encoded will have BOM added
self.assertEqual(encoded, codecs.BOM_UTF16 + b_dabcd)
示例7: decode
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF16 [as 别名]
def decode(text):
"""
Function to decode a text.
@param text text to decode (string)
@return decoded text and encoding
"""
try:
if text.startswith(BOM_UTF8):
# UTF-8 with BOM
return to_text_string(text[len(BOM_UTF8):], 'utf-8'), 'utf-8-bom'
elif text.startswith(BOM_UTF16):
# UTF-16 with BOM
return to_text_string(text[len(BOM_UTF16):], 'utf-16'), 'utf-16'
elif text.startswith(BOM_UTF32):
# UTF-32 with BOM
return to_text_string(text[len(BOM_UTF32):], 'utf-32'), 'utf-32'
coding = get_coding(text)
if coding:
return to_text_string(text, coding), coding
except (UnicodeError, LookupError):
pass
# Assume UTF-8
try:
return to_text_string(text, 'utf-8'), 'utf-8-guessed'
except (UnicodeError, LookupError):
pass
# Assume Latin-1 (behaviour before 3.7.1)
return to_text_string(text, "latin-1"), 'latin-1-guessed'
示例8: is_text_file
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF16 [as 别名]
def is_text_file(filename):
"""
Test if the given path is a text-like file.
Adapted from: http://stackoverflow.com/a/3002505
Original Authors: Trent Mick <TrentM@ActiveState.com>
Jorge Orpinel <jorge@orpinel.com>
"""
try:
open(filename)
except Exception:
return False
with open(filename, 'rb') as fid:
try:
CHUNKSIZE = 1024
chunk = fid.read(CHUNKSIZE)
# check for a UTF BOM
for bom in [BOM_UTF8, BOM_UTF16, BOM_UTF32]:
if chunk.startswith(bom):
return True
chunk = chunk.decode('utf-8')
while 1:
if '\0' in chunk: # found null byte
return False
if len(chunk) < CHUNKSIZE:
break # done
chunk = fid.read(CHUNKSIZE).decode('utf-8')
except UnicodeDecodeError:
return False
except Exception:
pass
return True
示例9: export
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF16 [as 别名]
def export(self):
out = cStringIO.StringIO()
final = cStringIO.StringIO()
import csv
writer = csv.writer(out, delimiter='\t')
if self.rows:
import codecs
final.write(codecs.BOM_UTF16)
writer.writerow(
[unicode(col).encode("utf8") for col in self.rows.colnames])
data = out.getvalue().decode("utf8")
data = data.encode("utf-16")
data = data[2:]
final.write(data)
out.truncate(0)
records = self.represented()
for row in records:
writer.writerow(
[str(col).decode('utf8').encode("utf-8") for col in row])
data = out.getvalue().decode("utf8")
data = data.encode("utf-16")
data = data[2:]
final.write(data)
out.truncate(0)
return str(final.getvalue())
示例10: detectEncodingOnClearExplicit
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF16 [as 别名]
def detectEncodingOnClearExplicit(fName, content):
"""Provides the reading encoding as a file would be read"""
# The function is used in case the user reset the explicit encoding
# so the current encoding needs to be set as if the file would be
# read again
try:
with open(fName, 'rb') as diskfile:
text = diskfile.read(1024)
if text.startswith(BOM_UTF8):
return 'bom-utf-8'
if text.startswith(BOM_UTF16):
return 'bom-utf-16'
if text.startswith(BOM_UTF32):
return 'bom-utf-32'
# The function is called when an explicit encoding is reset so
# there is no need to check for it
encFromBuffer = getCodingFromText(content)
if encFromBuffer:
if isValidEncoding(encFromBuffer):
return encFromBuffer
project = GlobalData().project
if project.isLoaded():
projectEncoding = project.props['encoding']
if projectEncoding:
if isValidEncoding(projectEncoding):
return projectEncoding
ideEncoding = Settings()['encoding']
if ideEncoding:
if isValidEncoding(ideEncoding):
return ideEncoding
return DEFAULT_ENCODING
except Exception as exc:
logging.warning('Error while guessing encoding for reading %s: %s',
fName, str(exc))
logging.warning('The default encoding %s will be used',
DEFAULT_ENCODING)
return DEFAULT_ENCODING