本文整理匯總了Python中future.backports.email.errors.UndecodableBytesDefect方法的典型用法代碼示例。如果您正苦於以下問題:Python errors.UndecodableBytesDefect方法的具體用法?Python errors.UndecodableBytesDefect怎麽用?Python errors.UndecodableBytesDefect使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類future.backports.email.errors
的用法示例。
在下文中一共展示了errors.UndecodableBytesDefect方法的3個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: decode
# 需要導入模塊: from future.backports.email import errors [as 別名]
# 或者: from future.backports.email.errors import UndecodableBytesDefect [as 別名]
def decode(ew):
"""Decode encoded word and return (string, charset, lang, defects) tuple.
An RFC 2047/2243 encoded word has the form:
=?charset*lang?cte?encoded_string?=
where '*lang' may be omitted but the other parts may not be.
This function expects exactly such a string (that is, it does not check the
syntax and may raise errors if the string is not well formed), and returns
the encoded_string decoded first from its Content Transfer Encoding and
then from the resulting bytes into unicode using the specified charset. If
the cte-decoded string does not successfully decode using the specified
character set, a defect is added to the defects list and the unknown octets
are replaced by the unicode 'unknown' character \uFDFF.
The specified charset and language are returned. The default for language,
which is rarely if ever encountered, is the empty string.
"""
_, charset, cte, cte_string, _ = str(ew).split('?')
charset, _, lang = charset.partition('*')
cte = cte.lower()
# Recover the original bytes and do CTE decoding.
bstring = cte_string.encode('ascii', 'surrogateescape')
bstring, defects = _cte_decoders[cte](bstring)
# Turn the CTE decoded bytes into unicode.
try:
string = bstring.decode(charset)
except UnicodeError:
defects.append(errors.UndecodableBytesDefect("Encoded word "
"contains bytes not decodable using {} charset".format(charset)))
string = bstring.decode(charset, 'surrogateescape')
except LookupError:
string = bstring.decode('ascii', 'surrogateescape')
if charset.lower() != 'unknown-8bit':
defects.append(errors.CharsetError("Unknown charset {} "
"in encoded word; decoded as unknown bytes".format(charset)))
return string, charset, lang, defects
示例2: _fold
# 需要導入模塊: from future.backports.email import errors [as 別名]
# 或者: from future.backports.email.errors import UndecodableBytesDefect [as 別名]
def _fold(self, folded):
for part in self.parts:
tstr = str(part)
tlen = len(tstr)
try:
str(part).encode('us-ascii')
except UnicodeEncodeError:
if any(isinstance(x, errors.UndecodableBytesDefect)
for x in part.all_defects):
charset = 'unknown-8bit'
else:
# XXX: this should be a policy setting
charset = 'utf-8'
tstr = part.cte_encode(charset, folded.policy)
tlen = len(tstr)
if folded.append_if_fits(part, tstr):
continue
# Peel off the leading whitespace if any and make it sticky, to
# avoid infinite recursion.
ws = part.pop_leading_fws()
if ws is not None:
# Peel off the leading whitespace and make it sticky, to
# avoid infinite recursion.
folded.stickyspace = str(part.pop(0))
if folded.append_if_fits(part):
continue
if part.has_fws:
part._fold(folded)
continue
# There are no fold points in this one; it is too long for a single
# line and can't be split...we just have to put it on its own line.
folded.append(tstr)
folded.newline()
示例3: params
# 需要導入模塊: from future.backports.email import errors [as 別名]
# 或者: from future.backports.email.errors import UndecodableBytesDefect [as 別名]
def params(self):
# The RFC specifically states that the ordering of parameters is not
# guaranteed and may be reordered by the transport layer. So we have
# to assume the RFC 2231 pieces can come in any order. However, we
# output them in the order that we first see a given name, which gives
# us a stable __str__.
params = OrderedDict()
for token in self:
if not token.token_type.endswith('parameter'):
continue
if token[0].token_type != 'attribute':
continue
name = token[0].value.strip()
if name not in params:
params[name] = []
params[name].append((token.section_number, token))
for name, parts in params.items():
parts = sorted(parts)
# XXX: there might be more recovery we could do here if, for
# example, this is really a case of a duplicate attribute name.
value_parts = []
charset = parts[0][1].charset
for i, (section_number, param) in enumerate(parts):
if section_number != i:
param.defects.append(errors.InvalidHeaderDefect(
"inconsistent multipart parameter numbering"))
value = param.param_value
if param.extended:
try:
value = unquote_to_bytes(value)
except UnicodeEncodeError:
# source had surrogate escaped bytes. What we do now
# is a bit of an open question. I'm not sure this is
# the best choice, but it is what the old algorithm did
value = unquote(value, encoding='latin-1')
else:
try:
value = value.decode(charset, 'surrogateescape')
except LookupError:
# XXX: there should really be a custom defect for
# unknown character set to make it easy to find,
# because otherwise unknown charset is a silent
# failure.
value = value.decode('us-ascii', 'surrogateescape')
if utils._has_surrogates(value):
param.defects.append(errors.UndecodableBytesDefect())
value_parts.append(value)
value = ''.join(value_parts)
yield name, value