当前位置: 首页>>代码示例>>Python>>正文


Python universaldetector.UniversalDetector类代码示例

本文整理汇总了Python中charade.universaldetector.UniversalDetector的典型用法代码示例。如果您正苦于以下问题:Python UniversalDetector类的具体用法?Python UniversalDetector怎么用?Python UniversalDetector使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了UniversalDetector类的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: decode_raw_line

 def decode_raw_line(line):
     #first, try to decode using utf-8
     try:
         line = line.decode('utf8', 'strict')
     except UnicodeError:
         # if this fails and charade is loaded, try to guess the correct encoding
         if charadeLoaded:
             u = UniversalDetector()
             u.feed(line)
             u.close()
             if u.result['encoding']:
                 # try to use the guessed encoding
                 try:
                     line = line.decode(u.result['encoding'],
                         'strict')
                 # on error, give up and replace the offending characters
                 except UnicodeError:
                     line = line.decode(errors='replace')
             else:
                 # if no encoding could be guessed, fall back to utf-8 and
                 # replace offending characters
                 line = line.decode('utf8', 'replace')
         # if charade is not loaded, try to decode using utf-8 and replace any
         # offending characters
         else:
             line = line.decode('utf8', 'replace')
     return line
开发者ID:Hoaas,项目名称:Limnoria,代码行数:27,代码来源:str.py

示例2: runTest

 def runTest(self):
     u = UniversalDetector()
     for line in open(self.file_name, 'rb'):
         u.feed(line)
         if u.done:
             break
     u.close()
     self.assertEqual(u.result['encoding'].lower(), self.encoding,
                      "Expected %s, but got %r in %s" %
                      (self.encoding, u.result['encoding'],
                       self.file_name))
开发者ID:byroot,项目名称:charade,代码行数:11,代码来源:test.py

示例3: description_of

def description_of(path):
    """Return a string describing the probable encoding of a file."""
    u = UniversalDetector()
    for line in open(path, 'rb'):
        u.feed(line)
    u.close()
    result = u.result
    if result['encoding']:
        return '%s: %s with confidence %s' % (path,
                                              result['encoding'],
                                              result['confidence'])
    else:
        return '%s: no result' % path
开发者ID:byroot,项目名称:charade,代码行数:13,代码来源:chardetect.py

示例4: _read

    def _read(self):
        """Called by _select() when we can read data."""
        try:
            self.inbuffer += self.conn.recv(1024)
            self.eagains = 0 # If we successfully recv'ed, we can reset this.
            lines = self.inbuffer.split(b'\n')
            self.inbuffer = lines.pop()
            for line in lines:
                if sys.version_info[0] >= 3:
                    #first, try to decode using utf-8
                    try:
                        line = line.decode('utf8', 'strict')
                    except UnicodeError:
                        # if this fails and charade is loaded, try to guess the correct encoding
                        if charadeLoaded:
                            u = UniversalDetector()
                            u.feed(line)
                            u.close()
                            if u.result['encoding']:
                                # try to use the guessed encoding
                                try:
                                    line = line.decode(u.result['encoding'],
                                        'strict')
                                # on error, give up and replace the offending characters
                                except UnicodeError:
                                    line = line.decode(errors='replace')
                            else:
                                # if no encoding could be guessed, fall back to utf-8 and
                                # replace offending characters
                                line = line.decode('utf8', 'replace')
                        # if charade is not loaded, try to decode using utf-8 and replace any
                        # offending characters
                        else:
                            line = line.decode('utf8', 'replace')

                msg = drivers.parseMsg(line)
                if msg is not None and self.irc is not None:
                    self.irc.feedMsg(msg)
        except socket.timeout:
            pass
        except SSLError as e:
            if e.args[0] == 'The read operation timed out':
                pass
            else:
                self._handleSocketError(e)
                return
        except socket.error as e:
            self._handleSocketError(e)
            return
        if self.irc and not self.irc.zombie:
            self._sendIfMsgs()
开发者ID:Poorchop,项目名称:Limnoria,代码行数:51,代码来源:Socket.py

示例5: detectEncoding

    def detectEncoding(self, parseMeta=True, chardet=True):
        # First look for a BOM
        # This will also read past the BOM if present
        encoding = self.detectBOM()
        confidence = "certain"
        # If there is no BOM need to look for meta elements with encoding
        # information
        if encoding is None and parseMeta:
            encoding = self.detectEncodingMeta()
            confidence = "tentative"
        # Guess with chardet, if avaliable
        if encoding is None and chardet:
            confidence = "tentative"
            try:
                try:
                    from charade.universaldetector import UniversalDetector
                except ImportError:
                    from chardet.universaldetector import UniversalDetector
                buffers = []
                detector = UniversalDetector()
                while not detector.done:
                    buffer = self.rawStream.read(self.numBytesChardet)
                    assert isinstance(buffer, bytes)
                    if not buffer:
                        break
                    buffers.append(buffer)
                    detector.feed(buffer)
                detector.close()
                encoding = detector.result['encoding']
                self.rawStream.seek(0)
            except ImportError:
                pass
        # If all else fails use the default encoding
        if encoding is None:
            confidence = "tentative"
            encoding = self.defaultEncoding

        # Substitute for equivalent encodings:
        encodingSub = {"iso-8859-1": "windows-1252"}

        if encoding.lower() in encodingSub:
            encoding = encodingSub[encoding.lower()]

        return encoding, confidence
开发者ID:13lcp2000,项目名称:recetario4-4,代码行数:44,代码来源:inputstream.py


注:本文中的charade.universaldetector.UniversalDetector类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。