本文整理汇总了Python中urllib.parse.unquote_to_bytes方法的典型用法代码示例。如果您正苦于以下问题:Python parse.unquote_to_bytes方法的具体用法?Python parse.unquote_to_bytes怎么用?Python parse.unquote_to_bytes使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类urllib.parse
的用法示例。
在下文中一共展示了parse.unquote_to_bytes方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: compat_urllib_parse_unquote_to_bytes
# 需要导入模块: from urllib import parse [as 别名]
# 或者: from urllib.parse import unquote_to_bytes [as 别名]
def compat_urllib_parse_unquote_to_bytes(string):
"""unquote_to_bytes('abc%20def') -> b'abc def'."""
# Note: strings are encoded as UTF-8. This is only an issue if it contains
# unescaped non-ASCII characters, which URIs should not.
if not string:
# Is it a string-like object?
string.split
return b''
if isinstance(string, compat_str):
string = string.encode('utf-8')
bits = string.split(b'%')
if len(bits) == 1:
return string
res = [bits[0]]
append = res.append
for item in bits[1:]:
try:
append(compat_urllib_parse._hextochr[item[:2]])
append(item[2:])
except KeyError:
append(b'%')
append(item)
return b''.join(res)
示例2: compat_urllib_parse_unquote
# 需要导入模块: from urllib import parse [as 别名]
# 或者: from urllib.parse import unquote_to_bytes [as 别名]
def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
"""Replace %xx escapes by their single-character equivalent. The optional
encoding and errors parameters specify how to decode percent-encoded
sequences into Unicode characters, as accepted by the bytes.decode()
method.
By default, percent-encoded sequences are decoded with UTF-8, and invalid
sequences are replaced by a placeholder character.
unquote('abc%20def') -> 'abc def'.
"""
if '%' not in string:
string.split
return string
if encoding is None:
encoding = 'utf-8'
if errors is None:
errors = 'replace'
bits = _asciire.split(string)
res = [bits[0]]
append = res.append
for i in range(1, len(bits), 2):
append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
append(bits[i + 1])
return ''.join(res)
示例3: url_unescape
# 需要导入模块: from urllib import parse [as 别名]
# 或者: from urllib.parse import unquote_to_bytes [as 别名]
def url_unescape(value, encoding='utf-8', plus=True):
"""Decodes the given value from a URL.
:param value: url will be unescaped
:param encoding: url encoding
:param plus: whether need plus default True
:return: the unescaped url
"""
if encoding is None:
if plus:
# unquote_to_bytes doesn't have a _plus variant
value = to_basestring(value).replace('+', ' ')
return urllib_parser.unquote_to_bytes(value)
else:
unquote = (urllib_parser.unquote_plus if plus else urllib_parser.unquote)
return unquote(to_basestring(value), encoding=encoding)
示例4: uri_to_iri
# 需要导入模块: from urllib import parse [as 别名]
# 或者: from urllib.parse import unquote_to_bytes [as 别名]
def uri_to_iri(uri):
"""
Converts a Uniform Resource Identifier(URI) into an Internationalized
Resource Identifier(IRI).
This is the algorithm from section 3.2 of RFC 3987.
Takes an URI in ASCII bytes (e.g. '/I%20%E2%99%A5%20Django/') and returns
unicode containing the encoded result (e.g. '/I \xe2\x99\xa5 Django/').
"""
if uri is None:
return uri
uri = force_bytes(uri)
iri = unquote_to_bytes(uri) if six.PY3 else unquote(uri)
return repercent_broken_unicode(iri).decode('utf-8')
示例5: unquote_bytes_to_wsgi
# 需要导入模块: from urllib import parse [as 别名]
# 或者: from urllib.parse import unquote_to_bytes [as 别名]
def unquote_bytes_to_wsgi(bytestring):
return unquote_to_bytes(bytestring).decode('latin-1')
示例6: _get_path
# 需要导入模块: from urllib import parse [as 别名]
# 或者: from urllib.parse import unquote_to_bytes [as 别名]
def _get_path(self, parsed):
path = parsed.path
# If there are parameters, add them
if parsed.params:
path += ";" + parsed.params
path = unquote_to_bytes(path)
# Replace the behavior where non-ASCII values in the WSGI environ are
# arbitrarily decoded with ISO-8859-1.
# Refs comment in `get_bytes_from_wsgi()`.
return path.decode('iso-8859-1')
示例7: url_unescape
# 需要导入模块: from urllib import parse [as 别名]
# 或者: from urllib.parse import unquote_to_bytes [as 别名]
def url_unescape(value, encoding='utf-8', plus=True):
"""Decodes the given value from a URL.
The argument may be either a byte or unicode string.
If encoding is None, the result will be a byte string. Otherwise,
the result is a unicode string in the specified encoding.
If ``plus`` is true (the default), plus signs will be interpreted
as spaces (literal plus signs must be represented as "%2B"). This
is appropriate for query strings and form-encoded values but not
for the path component of a URL. Note that this default is the
reverse of Python's urllib module.
.. versionadded:: 3.1
The ``plus`` argument
"""
if encoding is None:
if plus:
# unquote_to_bytes doesn't have a _plus variant
value = to_basestring(value).replace('+', ' ')
return urllib_parse.unquote_to_bytes(value)
else:
unquote = (urllib_parse.unquote_plus if plus
else urllib_parse.unquote)
return unquote(to_basestring(value), encoding=encoding)
示例8: unquote_plus_to_bytes
# 需要导入模块: from urllib import parse [as 别名]
# 或者: from urllib.parse import unquote_to_bytes [as 别名]
def unquote_plus_to_bytes(s):
if isinstance(s, str):
return unquote_to_bytes(s.replace('+', ' '))
else:
return unquote_to_bytes(s.replace(b'+', b' '))
示例9: data_open
# 需要导入模块: from urllib import parse [as 别名]
# 或者: from urllib.parse import unquote_to_bytes [as 别名]
def data_open(self, req):
# data URLs as specified in RFC 2397.
#
# ignores POSTed data
#
# syntax:
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
# mediatype := [ type "/" subtype ] *( ";" parameter )
# data := *urlchar
# parameter := attribute "=" value
url = req.get_full_url()
scheme, data = url.split(':', 1)
mediatype, data = data.split(',', 1)
# even base64 encoded data URLs might be quoted so unquote in any case:
data = compat_urllib_parse_unquote_to_bytes(data)
if mediatype.endswith(';base64'):
data = binascii.a2b_base64(data)
mediatype = mediatype[:-7]
if not mediatype:
mediatype = 'text/plain;charset=US-ASCII'
headers = email.message_from_string(
'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))
return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
示例10: _unquotepath
# 需要导入模块: from urllib import parse [as 别名]
# 或者: from urllib.parse import unquote_to_bytes [as 别名]
def _unquotepath(path):
for reserved in ('2f', '2F', '3f', '3F'):
path = path.replace('%' + reserved, '%25' + reserved.upper())
if six.PY2:
# in Python 2, '%a3' becomes '\xa3', which is what we want
return unquote(path)
else:
# in Python 3,
# standard lib's unquote() does not work for non-UTF-8
# percent-escaped characters, they get lost.
# e.g., '%a3' becomes 'REPLACEMENT CHARACTER' (U+FFFD)
#
# unquote_to_bytes() returns raw bytes instead
return unquote_to_bytes(path)
示例11: parse_options_header
# 需要导入模块: from urllib import parse [as 别名]
# 或者: from urllib.parse import unquote_to_bytes [as 别名]
def parse_options_header(value, multiple=False):
"""Parse a ``Content-Type`` like header into a tuple with the content
type and the options:
>>> parse_options_header('text/html; charset=utf8')
('text/html', {'charset': 'utf8'})
This should not be used to parse ``Cache-Control`` like headers that use
a slightly different format. For these headers use the
:func:`parse_dict_header` function.
.. versionadded:: 0.5
:param value: the header to parse.
:param multiple: Whether try to parse and return multiple MIME types
:return: (mimetype, options) or (mimetype, options, mimetype, options, …)
if multiple=True
"""
if not value:
return '', {}
result = []
value = "," + value.replace("\n", ",")
while value:
match = _option_header_start_mime_type.match(value)
if not match:
break
result.append(match.group(1)) # mimetype
options = {}
# Parse options
rest = match.group(2)
while rest:
optmatch = _option_header_piece_re.match(rest)
if not optmatch:
break
option, encoding, _, option_value = optmatch.groups()
option = unquote_header_value(option)
if option_value is not None:
option_value = unquote_header_value(
option_value,
option == 'filename')
if encoding is not None:
option_value = _unquote(option_value).decode(encoding)
options[option] = option_value
rest = rest[optmatch.end():]
result.append(options)
if multiple is False:
return tuple(result)
value = rest
return tuple(result) if result else ('', {})
示例12: routeevent
# 需要导入模块: from urllib import parse [as 别名]
# 或者: from urllib.parse import unquote_to_bytes [as 别名]
def routeevent(self, path, routinemethod, container = None, host = None, vhost = None, method = [b'GET', b'HEAD']):
'''
Route specified path to a routine factory
:param path: path to match, can be a regular expression
:param routinemethod: factory function routinemethod(event), event is the HttpRequestEvent
:param container: routine container. If None, default to self for bound method, or event.connection if not
:param host: if specified, only response to request to specified host
:param vhost: if specified, only response to request to specified vhost.
If not specified, response to dispatcher default vhost.
:param method: if specified, response to specified methods
'''
regm = re.compile(path + b'$')
if vhost is None:
vhost = self.vhost
if container is None:
container = getattr(routinemethod, '__self__', None)
def ismatch(event):
# Check vhost
if vhost is not None and getattr(event.createby, 'vhost', '') != vhost:
return False
# First parse the path
# RFC said we should accept absolute path
psplit = urlsplit(event.path)
if psplit.path[:1] != b'/':
# For security reason, ignore unrecognized path
return False
if psplit.netloc and host is not None and host != psplit.netloc:
# Maybe a proxy request, ignore it
return False
if getattr(event.createby, 'unquoteplus', True):
realpath = unquote_plus_to_bytes(psplit.path)
else:
realpath = unquote_to_bytes(psplit.path)
m = regm.match(realpath)
if m is None:
return False
event.realpath = realpath
event.querystring = psplit.query
event.path_match = m
return True
def func(event, scheduler):
try:
if event.canignore:
# Already processed
return
event.canignore = True
c = event.connection if container is None else container
c.subroutine(routinemethod(event), False)
except Exception:
pass
for m in method:
self.registerHandler(HttpRequestEvent.createMatcher(host, None, m, _ismatch = ismatch), func)
示例13: decode_object
# 需要导入模块: from urllib import parse [as 别名]
# 或者: from urllib.parse import unquote_to_bytes [as 别名]
def decode_object(obj):
if len(obj) == 1:
try:
k = str(next(iter(obj.keys())))
except Exception:
return obj
v = obj[k]
if k.startswith('<vlcpjsonencode/') and k.endswith('>'):
classname = k[16:-1]
if classname == 'urlencoded-bytes':
return unquote_to_bytes(v)
elif classname == 'namedstruct.NamedStruct':
if 'target' in v:
target = v['target']
s = EmbeddedStruct.__new__(EmbeddedStruct)
s.__setstate__((base64.b64decode(v['data']), v['type'], target))
else:
s = NamedStruct.__new__(NamedStruct)
s.__setstate__((base64.b64decode(v['data']), v['type'], s))
return s
else:
dotpos = classname.rfind('.')
if dotpos == -1:
raise ValueError(repr(classname) + ' is not a valid type')
package = classname[:dotpos]
cname = classname[dotpos+1:]
p = None
cls = None
try:
p = sys.modules[package]
cls = getattr(p, cname)
except KeyError:
raise ValueError(repr(classname) + ' is forbidden because it is not loaded')
except AttributeError:
raise ValueError(repr(classname) + ' is not defined')
if hasattr(cls, 'jsondecode'):
return cls.jsondecode(v)
else:
raise ValueError(repr(classname) + ' is not JSON serializable')
else:
return obj
else:
return obj
示例14: parse_data_uri
# 需要导入模块: from urllib import parse [as 别名]
# 或者: from urllib.parse import unquote_to_bytes [as 别名]
def parse_data_uri(uri):
"""
Parse a data: URI, returning a 3-tuple of media type, dictionary of media
type parameters, and data.
"""
if not isinstance(uri, bytes):
uri = safe_url_string(uri).encode('ascii')
try:
scheme, uri = uri.split(b':', 1)
except ValueError:
raise ValueError("invalid URI")
if scheme.lower() != b'data':
raise ValueError("not a data URI")
# RFC 3986 section 2.1 allows percent encoding to escape characters that
# would be interpreted as delimiters, implying that actual delimiters
# should not be percent-encoded.
# Decoding before parsing will allow malformed URIs with percent-encoded
# delimiters, but it makes parsing easier and should not affect
# well-formed URIs, as the delimiters used in this URI scheme are not
# allowed, percent-encoded or not, in tokens.
if six.PY2:
uri = unquote(uri)
else:
uri = unquote_to_bytes(uri)
media_type = "text/plain"
media_type_params = {}
m = _mediatype_pattern.match(uri)
if m:
media_type = m.group().decode()
uri = uri[m.end():]
else:
media_type_params['charset'] = "US-ASCII"
while True:
m = _mediatype_parameter_pattern.match(uri)
if m:
attribute, value, value_quoted = m.groups()
if value_quoted:
value = re.sub(br'\\(.)', r'\1', value_quoted)
media_type_params[attribute.decode()] = value.decode()
uri = uri[m.end():]
else:
break
try:
is_base64, data = uri.split(b',', 1)
except ValueError:
raise ValueError("invalid data URI")
if is_base64:
if is_base64 != b";base64":
raise ValueError("invalid data URI")
data = base64.b64decode(data)
return _ParseDataURIResult(media_type, media_type_params, data)
示例15: parse_qsl_to_bytes
# 需要导入模块: from urllib import parse [as 别名]
# 或者: from urllib.parse import unquote_to_bytes [as 别名]
def parse_qsl_to_bytes(qs, keep_blank_values=False):
"""Parse a query given as a string argument.
Data are returned as a list of name, value pairs as bytes.
Arguments:
qs: percent-encoded query string to be parsed
keep_blank_values: flag indicating whether blank values in
percent-encoded queries should be treated as blank strings. A
true value indicates that blanks should be retained as blank
strings. The default false value indicates that blank values
are to be ignored and treated as if they were not included.
"""
# This code is the same as Python3's parse_qsl()
# (at https://hg.python.org/cpython/rev/c38ac7ab8d9a)
# except for the unquote(s, encoding, errors) calls replaced
# with unquote_to_bytes(s)
qs, _coerce_result = _coerce_args(qs)
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
r = []
for name_value in pairs:
if not name_value:
continue
nv = name_value.split('=', 1)
if len(nv) != 2:
# Handle case of a control-name with no equal sign
if keep_blank_values:
nv.append('')
else:
continue
if len(nv[1]) or keep_blank_values:
name = nv[0].replace('+', ' ')
name = unquote_to_bytes(name)
name = _coerce_result(name)
value = nv[1].replace('+', ' ')
value = unquote_to_bytes(value)
value = _coerce_result(value)
r.append((name, value))
return r