當前位置: 首頁>>代碼示例>>Python>>正文


Python codecs.BOM_UTF32_BE屬性代碼示例

本文整理匯總了Python中codecs.BOM_UTF32_BE屬性的典型用法代碼示例。如果您正苦於以下問題:Python codecs.BOM_UTF32_BE屬性的具體用法?Python codecs.BOM_UTF32_BE怎麽用?Python codecs.BOM_UTF32_BE使用的例子?那麽, 這裏精選的屬性代碼示例或許可以為您提供幫助。您也可以進一步了解該屬性所在codecs的用法示例。


在下文中一共展示了codecs.BOM_UTF32_BE屬性的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: _detect_encoding

# 需要導入模塊: import codecs [as 別名]
# 或者: from codecs import BOM_UTF32_BE [as 別名]
def _detect_encoding(self, fileid):
        if isinstance(fileid, PathPointer):
            s = fileid.open().readline()
        else:
            with open(fileid, 'rb') as infile:
                s = infile.readline()
        if s.startswith(codecs.BOM_UTF16_BE):
            return 'utf-16-be'
        if s.startswith(codecs.BOM_UTF16_LE):
            return 'utf-16-le'
        if s.startswith(codecs.BOM_UTF32_BE):
            return 'utf-32-be'
        if s.startswith(codecs.BOM_UTF32_LE):
            return 'utf-32-le'
        if s.startswith(codecs.BOM_UTF8):
            return 'utf-8'
        m = re.match(br'\s*<\?xml\b.*\bencoding="([^"]+)"', s)
        if m:
            return m.group(1).decode()
        m = re.match(br"\s*<\?xml\b.*\bencoding='([^']+)'", s)
        if m:
            return m.group(1).decode()
        # No encoding found -- what should the default be?
        return 'utf-8' 
開發者ID:rafasashi,項目名稱:razzy-spinner,代碼行數:26,代碼來源:xmldocs.py

示例2: test_xml_encodings

# 需要導入模塊: import codecs [as 別名]
# 或者: from codecs import BOM_UTF32_BE [as 別名]
def test_xml_encodings(self):
        base = TESTDATA[plistlib.FMT_XML]

        for xml_encoding, encoding, bom in [
                    (b'utf-8', 'utf-8', codecs.BOM_UTF8),
                    (b'utf-16', 'utf-16-le', codecs.BOM_UTF16_LE),
                    (b'utf-16', 'utf-16-be', codecs.BOM_UTF16_BE),
                    # Expat does not support UTF-32
                    #(b'utf-32', 'utf-32-le', codecs.BOM_UTF32_LE),
                    #(b'utf-32', 'utf-32-be', codecs.BOM_UTF32_BE),
                ]:

            pl = self._create(fmt=plistlib.FMT_XML)
            with self.subTest(encoding=encoding):
                data = base.replace(b'UTF-8', xml_encoding)
                data = bom + data.decode('utf-8').encode(encoding)
                pl2 = plistlib.loads(data)
                self.assertEqual(dict(pl), dict(pl2)) 
開發者ID:Microvellum,項目名稱:Fluid-Designer,代碼行數:20,代碼來源:test_plistlib.py

示例3: guess_json_utf

# 需要導入模塊: import codecs [as 別名]
# 或者: from codecs import BOM_UTF32_BE [as 別名]
def guess_json_utf(data):
    """
    :rtype: str
    """
    # JSON always starts with two ASCII characters, so detection is as
    # easy as counting the nulls and from their location and count
    # determine the encoding. Also detect a BOM, if present.
    sample = data[:4]
    if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
        return 'utf-32'     # BOM included
    if sample[:3] == codecs.BOM_UTF8:
        return 'utf-8-sig'  # BOM included, MS style (discouraged)
    if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
        return 'utf-16'     # BOM included
    nullcount = sample.count(_null)
    if nullcount == 0:
        return 'utf-8'
    if nullcount == 2:
        if sample[::2] == _null2:   # 1st and 3rd are null
            return 'utf-16-be'
        if sample[1::2] == _null2:  # 2nd and 4th are null
            return 'utf-16-le'
        # Did not detect 2 valid UTF-16 ascii-range characters
    if nullcount == 3:
        if sample[:3] == _null3:
            return 'utf-32-be'
        if sample[1:] == _null3:
            return 'utf-32-le'
        # Did not detect a valid UTF-32 ascii-range character
    return None 
開發者ID:danielecook,項目名稱:gist-alfred,代碼行數:32,代碼來源:utils.py

示例4: detect_encoding

# 需要導入模塊: import codecs [as 別名]
# 或者: from codecs import BOM_UTF32_BE [as 別名]
def detect_encoding(data):
    """Detect which UTF codec was used to encode the given bytes.

    The latest JSON standard (:rfc:`8259`) suggests that only UTF-8 is
    accepted. Older documents allowed 8, 16, or 32. 16 and 32 can be big
    or little endian. Some editors or libraries may prepend a BOM.

    :param data: Bytes in unknown UTF encoding.
    :return: UTF encoding name
    """
    head = data[:4]

    if head[:3] == codecs.BOM_UTF8:
        return 'utf-8-sig'

    if b'\x00' not in head:
        return 'utf-8'

    if head in (codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE):
        return 'utf-32'

    if head[:2] in (codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE):
        return 'utf-16'

    if len(head) == 4:
        if head[:3] == b'\x00\x00\x00':
            return 'utf-32-be'

        if head[::2] == b'\x00\x00':
            return 'utf-16-be'

        if head[1:] == b'\x00\x00\x00':
            return 'utf-32-le'

        if head[1::2] == b'\x00\x00':
            return 'utf-16-le'

    if len(head) == 2:
        return 'utf-16-be' if head.startswith(b'\x00') else 'utf-16-le'

    return 'utf-8' 
開發者ID:Frank-qlu,項目名稱:recruit,代碼行數:43,代碼來源:__init__.py

示例5: detectBOM

# 需要導入模塊: import codecs [as 別名]
# 或者: from codecs import BOM_UTF32_BE [as 別名]
def detectBOM(self):
        """Attempts to detect at BOM at the start of the stream. If
        an encoding can be determined from the BOM return the name of the
        encoding otherwise return None"""
        bomDict = {
            codecs.BOM_UTF8: 'utf-8',
            codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be',
            codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be'
        }

        # Go to beginning of file and read in 4 bytes
        string = self.rawStream.read(4)
        assert isinstance(string, bytes)

        # Try detecting the BOM using bytes from the string
        encoding = bomDict.get(string[:3])         # UTF-8
        seek = 3
        if not encoding:
            # Need to detect UTF-32 before UTF-16
            encoding = bomDict.get(string)         # UTF-32
            seek = 4
            if not encoding:
                encoding = bomDict.get(string[:2])  # UTF-16
                seek = 2

        # Set the read position past the BOM if one was found, otherwise
        # set it to the start of the stream
        if encoding:
            self.rawStream.seek(seek)
            return lookupEncoding(encoding)
        else:
            self.rawStream.seek(0)
            return None 
開發者ID:Frank-qlu,項目名稱:recruit,代碼行數:35,代碼來源:_inputstream.py

示例6: detectBOM

# 需要導入模塊: import codecs [as 別名]
# 或者: from codecs import BOM_UTF32_BE [as 別名]
def detectBOM(self):
        """Attempts to detect at BOM at the start of the stream. If
        an encoding can be determined from the BOM return the name of the
        encoding otherwise return None"""
        bomDict = {
            codecs.BOM_UTF8: 'utf-8',
            codecs.BOM_UTF16_LE: 'utf-16-le', codecs.BOM_UTF16_BE: 'utf-16-be',
            codecs.BOM_UTF32_LE: 'utf-32-le', codecs.BOM_UTF32_BE: 'utf-32-be'
        }

        # Go to beginning of file and read in 4 bytes
        string = self.rawStream.read(4)
        assert isinstance(string, bytes)

        # Try detecting the BOM using bytes from the string
        encoding = bomDict.get(string[:3])         # UTF-8
        seek = 3
        if not encoding:
            # Need to detect UTF-32 before UTF-16
            encoding = bomDict.get(string)         # UTF-32
            seek = 4
            if not encoding:
                encoding = bomDict.get(string[:2])  # UTF-16
                seek = 2

        # Set the read position past the BOM if one was found, otherwise
        # set it to the start of the stream
        self.rawStream.seek(encoding and seek or 0)

        return encoding 
開發者ID:jpush,項目名稱:jbox,代碼行數:32,代碼來源:inputstream.py

示例7: encode_endian

# 需要導入模塊: import codecs [as 別名]
# 或者: from codecs import BOM_UTF32_BE [as 別名]
def encode_endian(text, encoding, errors="strict", le=True):
    """Like text.encode(encoding) but always returns little endian/big endian
    BOMs instead of the system one.

    Args:
        text (text)
        encoding (str)
        errors (str)
        le (boolean): if little endian
    Returns:
        bytes
    Raises:
        UnicodeEncodeError
        LookupError
    """

    encoding = codecs.lookup(encoding).name

    if encoding == "utf-16":
        if le:
            return codecs.BOM_UTF16_LE + text.encode("utf-16-le", errors)
        else:
            return codecs.BOM_UTF16_BE + text.encode("utf-16-be", errors)
    elif encoding == "utf-32":
        if le:
            return codecs.BOM_UTF32_LE + text.encode("utf-32-le", errors)
        else:
            return codecs.BOM_UTF32_BE + text.encode("utf-32-be", errors)
    else:
        return text.encode(encoding, errors) 
開發者ID:bugatsinho,項目名稱:bugatsinho.github.io,代碼行數:32,代碼來源:_util.py


注:本文中的codecs.BOM_UTF32_BE屬性示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。