當前位置: 首頁>>代碼示例>>Python>>正文


Python universaldetector.UniversalDetector類代碼示例

本文整理匯總了Python中charade.universaldetector.UniversalDetector的典型用法代碼示例。如果您正苦於以下問題:Python UniversalDetector類的具體用法?Python UniversalDetector怎麽用?Python UniversalDetector使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


在下文中一共展示了UniversalDetector類的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: decode_raw_line

 def decode_raw_line(line):
     #first, try to decode using utf-8
     try:
         line = line.decode('utf8', 'strict')
     except UnicodeError:
         # if this fails and charade is loaded, try to guess the correct encoding
         if charadeLoaded:
             u = UniversalDetector()
             u.feed(line)
             u.close()
             if u.result['encoding']:
                 # try to use the guessed encoding
                 try:
                     line = line.decode(u.result['encoding'],
                         'strict')
                 # on error, give up and replace the offending characters
                 except UnicodeError:
                     line = line.decode(errors='replace')
             else:
                 # if no encoding could be guessed, fall back to utf-8 and
                 # replace offending characters
                 line = line.decode('utf8', 'replace')
         # if charade is not loaded, try to decode using utf-8 and replace any
         # offending characters
         else:
             line = line.decode('utf8', 'replace')
     return line
開發者ID:Hoaas,項目名稱:Limnoria,代碼行數:27,代碼來源:str.py

示例2: runTest

 def runTest(self):
     u = UniversalDetector()
     for line in open(self.file_name, 'rb'):
         u.feed(line)
         if u.done:
             break
     u.close()
     self.assertEqual(u.result['encoding'].lower(), self.encoding,
                      "Expected %s, but got %r in %s" %
                      (self.encoding, u.result['encoding'],
                       self.file_name))
開發者ID:byroot,項目名稱:charade,代碼行數:11,代碼來源:test.py

示例3: description_of

def description_of(path):
    """Return a string describing the probable encoding of a file."""
    u = UniversalDetector()
    for line in open(path, 'rb'):
        u.feed(line)
    u.close()
    result = u.result
    if result['encoding']:
        return '%s: %s with confidence %s' % (path,
                                              result['encoding'],
                                              result['confidence'])
    else:
        return '%s: no result' % path
開發者ID:byroot,項目名稱:charade,代碼行數:13,代碼來源:chardetect.py

示例4: _read

    def _read(self):
        """Called by _select() when we can read data."""
        try:
            self.inbuffer += self.conn.recv(1024)
            self.eagains = 0 # If we successfully recv'ed, we can reset this.
            lines = self.inbuffer.split(b'\n')
            self.inbuffer = lines.pop()
            for line in lines:
                if sys.version_info[0] >= 3:
                    #first, try to decode using utf-8
                    try:
                        line = line.decode('utf8', 'strict')
                    except UnicodeError:
                        # if this fails and charade is loaded, try to guess the correct encoding
                        if charadeLoaded:
                            u = UniversalDetector()
                            u.feed(line)
                            u.close()
                            if u.result['encoding']:
                                # try to use the guessed encoding
                                try:
                                    line = line.decode(u.result['encoding'],
                                        'strict')
                                # on error, give up and replace the offending characters
                                except UnicodeError:
                                    line = line.decode(errors='replace')
                            else:
                                # if no encoding could be guessed, fall back to utf-8 and
                                # replace offending characters
                                line = line.decode('utf8', 'replace')
                        # if charade is not loaded, try to decode using utf-8 and replace any
                        # offending characters
                        else:
                            line = line.decode('utf8', 'replace')

                msg = drivers.parseMsg(line)
                if msg is not None and self.irc is not None:
                    self.irc.feedMsg(msg)
        except socket.timeout:
            pass
        except SSLError as e:
            if e.args[0] == 'The read operation timed out':
                pass
            else:
                self._handleSocketError(e)
                return
        except socket.error as e:
            self._handleSocketError(e)
            return
        if self.irc and not self.irc.zombie:
            self._sendIfMsgs()
開發者ID:Poorchop,項目名稱:Limnoria,代碼行數:51,代碼來源:Socket.py

示例5: detectEncoding

    def detectEncoding(self, parseMeta=True, chardet=True):
        # First look for a BOM
        # This will also read past the BOM if present
        encoding = self.detectBOM()
        confidence = "certain"
        # If there is no BOM need to look for meta elements with encoding
        # information
        if encoding is None and parseMeta:
            encoding = self.detectEncodingMeta()
            confidence = "tentative"
        # Guess with chardet, if avaliable
        if encoding is None and chardet:
            confidence = "tentative"
            try:
                try:
                    from charade.universaldetector import UniversalDetector
                except ImportError:
                    from chardet.universaldetector import UniversalDetector
                buffers = []
                detector = UniversalDetector()
                while not detector.done:
                    buffer = self.rawStream.read(self.numBytesChardet)
                    assert isinstance(buffer, bytes)
                    if not buffer:
                        break
                    buffers.append(buffer)
                    detector.feed(buffer)
                detector.close()
                encoding = detector.result['encoding']
                self.rawStream.seek(0)
            except ImportError:
                pass
        # If all else fails use the default encoding
        if encoding is None:
            confidence = "tentative"
            encoding = self.defaultEncoding

        # Substitute for equivalent encodings:
        encodingSub = {"iso-8859-1": "windows-1252"}

        if encoding.lower() in encodingSub:
            encoding = encodingSub[encoding.lower()]

        return encoding, confidence
開發者ID:13lcp2000,項目名稱:recetario4-4,代碼行數:44,代碼來源:inputstream.py


注:本文中的charade.universaldetector.UniversalDetector類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。