本文整理汇总了Python中codecs.BOM_UTF8属性的典型用法代码示例。如果您正苦于以下问题:Python codecs.BOM_UTF8属性的具体用法?Python codecs.BOM_UTF8怎么用?Python codecs.BOM_UTF8使用的例子?那么恭喜您, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类codecs
的用法示例。
在下文中一共展示了codecs.BOM_UTF8属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _buffer_decode
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF8 [as 别名]
def _buffer_decode(self, input, errors, final):
if self.first:
if len(input) < 3:
if codecs.BOM_UTF8.startswith(input):
# not enough data to decide if this really is a BOM
# => try again on the next call
return ("", 0)
else:
self.first = 0
else:
self.first = 0
if input[:3] == codecs.BOM_UTF8:
(output, consumed) = \
codecs.utf_8_decode(input[3:], errors, final)
return (output, consumed+3)
return codecs.utf_8_decode(input, errors, final)
示例2: _detect_encoding
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF8 [as 别名]
def _detect_encoding(self, fileid):
if isinstance(fileid, PathPointer):
s = fileid.open().readline()
else:
with open(fileid, 'rb') as infile:
s = infile.readline()
if s.startswith(codecs.BOM_UTF16_BE):
return 'utf-16-be'
if s.startswith(codecs.BOM_UTF16_LE):
return 'utf-16-le'
if s.startswith(codecs.BOM_UTF32_BE):
return 'utf-32-be'
if s.startswith(codecs.BOM_UTF32_LE):
return 'utf-32-le'
if s.startswith(codecs.BOM_UTF8):
return 'utf-8'
m = re.match(br'\s*<\?xml\b.*\bencoding="([^"]+)"', s)
if m:
return m.group(1).decode()
m = re.match(br"\s*<\?xml\b.*\bencoding='([^']+)'", s)
if m:
return m.group(1).decode()
# No encoding found -- what should the default be?
return 'utf-8'
示例3: _load_gitignore_file
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF8 [as 别名]
def _load_gitignore_file(source_location):
# reference: https://git-scm.com/docs/gitignore
git_ignore_file = os.path.join(source_location, ".gitignore")
if not os.path.exists(git_ignore_file):
return None, 0
encoding = "utf-8"
header = open(git_ignore_file, "rb").read(len(codecs.BOM_UTF8))
if header.startswith(codecs.BOM_UTF8):
encoding = "utf-8-sig"
ignore_list = []
for line in open(git_ignore_file, 'r', encoding=encoding).readlines():
rule = line.rstrip()
# skip empty line and comment
if not rule or rule.startswith('#'):
continue
# the ignore rule at the end has higher priority
ignore_list = [IgnoreRule(rule)] + ignore_list
return ignore_list, len(ignore_list)
示例4: test_stream_bom
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF8 [as 别名]
def test_stream_bom(self):
unistring = u"ABC\u00A1\u2200XYZ"
bytestring = codecs.BOM_UTF8 + "ABC\xC2\xA1\xE2\x88\x80XYZ"
reader = codecs.getreader("utf-8-sig")
for sizehint in [None] + range(1, 11) + \
[64, 128, 256, 512, 1024]:
istream = reader(StringIO.StringIO(bytestring))
ostream = StringIO.StringIO()
while 1:
if sizehint is not None:
data = istream.read(sizehint)
else:
data = istream.read()
if not data:
break
ostream.write(data)
got = ostream.getvalue()
self.assertEqual(got, unistring)
示例5: test_all
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF8 [as 别名]
def test_all(self):
api = (
"encode", "decode",
"register", "CodecInfo", "Codec", "IncrementalEncoder",
"IncrementalDecoder", "StreamReader", "StreamWriter", "lookup",
"getencoder", "getdecoder", "getincrementalencoder",
"getincrementaldecoder", "getreader", "getwriter",
"register_error", "lookup_error",
"strict_errors", "replace_errors", "ignore_errors",
"xmlcharrefreplace_errors", "backslashreplace_errors",
"open", "EncodedFile",
"iterencode", "iterdecode",
"BOM", "BOM_BE", "BOM_LE",
"BOM_UTF8", "BOM_UTF16", "BOM_UTF16_BE", "BOM_UTF16_LE",
"BOM_UTF32", "BOM_UTF32_BE", "BOM_UTF32_LE",
"BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE", # Undocumented
"StreamReaderWriter", "StreamRecoder",
)
self.assertEqual(sorted(api), sorted(codecs.__all__))
for api in codecs.__all__:
getattr(codecs, api)
示例6: load_file
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF8 [as 别名]
def load_file(self):
with open(self.__wallet_path, 'rb') as f:
content = f.read()
if content.startswith(codecs.BOM_UTF8):
content = content[len(codecs.BOM_UTF8):]
content = content.decode('utf-8')
wallet_dict = json.loads(content)
create_time = wallet_dict.get('createTime', '')
default_id = wallet_dict.get('defaultOntid', '')
default_address = wallet_dict.get('defaultAccountAddress', '')
identities = wallet_dict.get('identities', list())
try:
scrypt_dict = wallet_dict['scrypt']
scrypt_obj = Scrypt(scrypt_dict.get('n', 16384), scrypt_dict.get('r', 8), scrypt_dict.get('p', 8),
scrypt_dict.get('dk_len', 64))
wallet = WalletData(wallet_dict['name'], wallet_dict['version'], create_time, default_id,
default_address, scrypt_obj, identities, wallet_dict['accounts'])
except KeyError as e:
raise SDKException(ErrorCode.param_err(f'wallet file format error: {e}.'))
return wallet
示例7: guess_json_utf
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF8 [as 别名]
def guess_json_utf(data):
# JSON always starts with two ASCII characters, so detection is as
# easy as counting the nulls and from their location and count
# determine the encoding. Also detect a BOM, if present.
sample = data[:4]
if sample in (codecs.BOM_UTF32_LE, codecs.BOM32_BE):
return 'utf-32' # BOM included
if sample[:3] == codecs.BOM_UTF8:
return 'utf-8-sig' # BOM included, MS style (discouraged)
if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
return 'utf-16' # BOM included
nullcount = sample.count(_null)
if nullcount == 0:
return 'utf-8'
if nullcount == 2:
if sample[::2] == _null2: # 1st and 3rd are null
return 'utf-16-be'
if sample[1::2] == _null2: # 2nd and 4th are null
return 'utf-16-le'
# Did not detect 2 valid UTF-16 ascii-range characters
if nullcount == 3:
if sample[:3] == _null3:
return 'utf-32-be'
if sample[1:] == _null3:
return 'utf-32-le'
# Did not detect a valid UTF-32 ascii-range character
return None
示例8: guess_json_utf
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF8 [as 别名]
def guess_json_utf(data):
"""
:rtype: str
"""
# JSON always starts with two ASCII characters, so detection is as
# easy as counting the nulls and from their location and count
# determine the encoding. Also detect a BOM, if present.
sample = data[:4]
if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
return 'utf-32' # BOM included
if sample[:3] == codecs.BOM_UTF8:
return 'utf-8-sig' # BOM included, MS style (discouraged)
if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
return 'utf-16' # BOM included
nullcount = sample.count(_null)
if nullcount == 0:
return 'utf-8'
if nullcount == 2:
if sample[::2] == _null2: # 1st and 3rd are null
return 'utf-16-be'
if sample[1::2] == _null2: # 2nd and 4th are null
return 'utf-16-le'
# Did not detect 2 valid UTF-16 ascii-range characters
if nullcount == 3:
if sample[:3] == _null3:
return 'utf-32-be'
if sample[1:] == _null3:
return 'utf-32-le'
# Did not detect a valid UTF-32 ascii-range character
return None
示例9: test_bom
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF8 [as 别名]
def test_bom(self):
fn = os.path.join(TEST_DATA_DIR, "bom.py")
data = self.check_file_refactoring(fn)
self.assertTrue(data.startswith(codecs.BOM_UTF8))
示例10: file_has_bom
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF8 [as 别名]
def file_has_bom(project_path, directories_to_skip, *args, **kwargs):
for root, dirs, filenames in os.walk(project_path):
dirs[:] = [
d for d in dirs
if d not in directories_to_skip
]
for name in filenames:
with open(os.path.join(root, name), 'rb') as file_handle:
file_content = file_handle.read(3) # we don't need to read the whole file
if file_content.startswith(codecs.BOM_UTF8):
return name
示例11: test_strip_bom
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF8 [as 别名]
def test_strip_bom(self):
content = u"\u3053\u3093\u306b\u3061\u308f"
json_doc = codecs.BOM_UTF8 + b(json.dumps(content))
self.assertEqual(json.load(BytesIO(json_doc)), content)
for doc in json_doc, json_doc.decode('utf8'):
self.assertEqual(json.loads(doc), content)
示例12: detect_encoding
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF8 [as 别名]
def detect_encoding(data):
"""Detect which UTF codec was used to encode the given bytes.
The latest JSON standard (:rfc:`8259`) suggests that only UTF-8 is
accepted. Older documents allowed 8, 16, or 32. 16 and 32 can be big
or little endian. Some editors or libraries may prepend a BOM.
:param data: Bytes in unknown UTF encoding.
:return: UTF encoding name
"""
head = data[:4]
if head[:3] == codecs.BOM_UTF8:
return 'utf-8-sig'
if b'\x00' not in head:
return 'utf-8'
if head in (codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE):
return 'utf-32'
if head[:2] in (codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE):
return 'utf-16'
if len(head) == 4:
if head[:3] == b'\x00\x00\x00':
return 'utf-32-be'
if head[::2] == b'\x00\x00':
return 'utf-16-be'
if head[1:] == b'\x00\x00\x00':
return 'utf-32-le'
if head[1::2] == b'\x00\x00':
return 'utf-16-le'
if len(head) == 2:
return 'utf-16-be' if head.startswith(b'\x00') else 'utf-16-le'
return 'utf-8'
示例13: detectBOM
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF8 [as 别名]
def detectBOM(self):
"""Attempts to detect at BOM at the start of the stream. If
an encoding can be determined from the BOM return the name of the
encoding otherwise return None"""
bomDict = {
codecs.BOM_UTF8: 'utf-8',
codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be',
codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be'
}
# Go to beginning of file and read in 4 bytes
string = self.rawStream.read(4)
assert isinstance(string, bytes)
# Try detecting the BOM using bytes from the string
encoding = bomDict.get(string[:3]) # UTF-8
seek = 3
if not encoding:
# Need to detect UTF-32 before UTF-16
encoding = bomDict.get(string) # UTF-32
seek = 4
if not encoding:
encoding = bomDict.get(string[:2]) # UTF-16
seek = 2
# Set the read position past the BOM if one was found, otherwise
# set it to the start of the stream
if encoding:
self.rawStream.seek(seek)
return lookupEncoding(encoding)
else:
self.rawStream.seek(0)
return None
示例14: guess_json_utf
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF8 [as 别名]
def guess_json_utf(data):
"""
:rtype: str
"""
# JSON always starts with two ASCII characters, so detection is as
# easy as counting the nulls and from their location and count
# determine the encoding. Also detect a BOM, if present.
sample = data[:4]
if sample in (codecs.BOM_UTF32_LE, codecs.BOM32_BE):
return 'utf-32' # BOM included
if sample[:3] == codecs.BOM_UTF8:
return 'utf-8-sig' # BOM included, MS style (discouraged)
if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
return 'utf-16' # BOM included
nullcount = sample.count(_null)
if nullcount == 0:
return 'utf-8'
if nullcount == 2:
if sample[::2] == _null2: # 1st and 3rd are null
return 'utf-16-be'
if sample[1::2] == _null2: # 2nd and 4th are null
return 'utf-16-le'
# Did not detect 2 valid UTF-16 ascii-range characters
if nullcount == 3:
if sample[:3] == _null3:
return 'utf-32-be'
if sample[1:] == _null3:
return 'utf-32-le'
# Did not detect a valid UTF-32 ascii-range character
return None
示例15: detectFileEncodingToRead
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import BOM_UTF8 [as 别名]
def detectFileEncodingToRead(fName, text=None):
"""Detects the read encoding"""
if text is None:
with open(fName, 'rb') as diskfile:
text = diskfile.read(1024)
# Step 1: check for BOM
if text.startswith(BOM_UTF8):
return 'bom-utf-8'
if text.startswith(BOM_UTF16):
return 'bom-utf-16'
if text.startswith(BOM_UTF32):
return 'bom-utf-32'
# Check if it was a user assigned encoding
userAssignedEncoding = getFileEncoding(fName)
if userAssignedEncoding:
return userAssignedEncoding
# Step 3: extract encoding from the file
encFromFile = getCodingFromBytes(text)
if encFromFile:
return encFromFile
# Step 4: check the project default encoding
project = GlobalData().project
if project.isLoaded():
projectEncoding = project.props['encoding']
if projectEncoding:
return projectEncoding
# Step 5: checks the IDE encoding
ideEncoding = Settings()['encoding']
if ideEncoding:
return ideEncoding
# Step 6: default
return DEFAULT_ENCODING