本文整理汇总了Python中charade.universaldetector.UniversalDetector类的典型用法代码示例。如果您正苦于以下问题:Python UniversalDetector类的具体用法?Python UniversalDetector怎么用?Python UniversalDetector使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了UniversalDetector类的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: decode_raw_line
def decode_raw_line(line):
#first, try to decode using utf-8
try:
line = line.decode('utf8', 'strict')
except UnicodeError:
# if this fails and charade is loaded, try to guess the correct encoding
if charadeLoaded:
u = UniversalDetector()
u.feed(line)
u.close()
if u.result['encoding']:
# try to use the guessed encoding
try:
line = line.decode(u.result['encoding'],
'strict')
# on error, give up and replace the offending characters
except UnicodeError:
line = line.decode(errors='replace')
else:
# if no encoding could be guessed, fall back to utf-8 and
# replace offending characters
line = line.decode('utf8', 'replace')
# if charade is not loaded, try to decode using utf-8 and replace any
# offending characters
else:
line = line.decode('utf8', 'replace')
return line
示例2: runTest
def runTest(self):
u = UniversalDetector()
for line in open(self.file_name, 'rb'):
u.feed(line)
if u.done:
break
u.close()
self.assertEqual(u.result['encoding'].lower(), self.encoding,
"Expected %s, but got %r in %s" %
(self.encoding, u.result['encoding'],
self.file_name))
示例3: description_of
def description_of(path):
"""Return a string describing the probable encoding of a file."""
u = UniversalDetector()
for line in open(path, 'rb'):
u.feed(line)
u.close()
result = u.result
if result['encoding']:
return '%s: %s with confidence %s' % (path,
result['encoding'],
result['confidence'])
else:
return '%s: no result' % path
示例4: _read
def _read(self):
"""Called by _select() when we can read data."""
try:
self.inbuffer += self.conn.recv(1024)
self.eagains = 0 # If we successfully recv'ed, we can reset this.
lines = self.inbuffer.split(b'\n')
self.inbuffer = lines.pop()
for line in lines:
if sys.version_info[0] >= 3:
#first, try to decode using utf-8
try:
line = line.decode('utf8', 'strict')
except UnicodeError:
# if this fails and charade is loaded, try to guess the correct encoding
if charadeLoaded:
u = UniversalDetector()
u.feed(line)
u.close()
if u.result['encoding']:
# try to use the guessed encoding
try:
line = line.decode(u.result['encoding'],
'strict')
# on error, give up and replace the offending characters
except UnicodeError:
line = line.decode(errors='replace')
else:
# if no encoding could be guessed, fall back to utf-8 and
# replace offending characters
line = line.decode('utf8', 'replace')
# if charade is not loaded, try to decode using utf-8 and replace any
# offending characters
else:
line = line.decode('utf8', 'replace')
msg = drivers.parseMsg(line)
if msg is not None and self.irc is not None:
self.irc.feedMsg(msg)
except socket.timeout:
pass
except SSLError as e:
if e.args[0] == 'The read operation timed out':
pass
else:
self._handleSocketError(e)
return
except socket.error as e:
self._handleSocketError(e)
return
if self.irc and not self.irc.zombie:
self._sendIfMsgs()
示例5: detectEncoding
def detectEncoding(self, parseMeta=True, chardet=True):
# First look for a BOM
# This will also read past the BOM if present
encoding = self.detectBOM()
confidence = "certain"
# If there is no BOM need to look for meta elements with encoding
# information
if encoding is None and parseMeta:
encoding = self.detectEncodingMeta()
confidence = "tentative"
# Guess with chardet, if avaliable
if encoding is None and chardet:
confidence = "tentative"
try:
try:
from charade.universaldetector import UniversalDetector
except ImportError:
from chardet.universaldetector import UniversalDetector
buffers = []
detector = UniversalDetector()
while not detector.done:
buffer = self.rawStream.read(self.numBytesChardet)
assert isinstance(buffer, bytes)
if not buffer:
break
buffers.append(buffer)
detector.feed(buffer)
detector.close()
encoding = detector.result['encoding']
self.rawStream.seek(0)
except ImportError:
pass
# If all else fails use the default encoding
if encoding is None:
confidence = "tentative"
encoding = self.defaultEncoding
# Substitute for equivalent encodings:
encodingSub = {"iso-8859-1": "windows-1252"}
if encoding.lower() in encodingSub:
encoding = encodingSub[encoding.lower()]
return encoding, confidence