本文整理汇总了Python中codecs.html方法的典型用法代码示例。如果您正苦于以下问题:Python codecs.html方法的具体用法?Python codecs.html怎么用?Python codecs.html使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类codecs
的用法示例。
在下文中一共展示了codecs.html方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_special_case_params
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import html [as 别名]
def get_special_case_params():
# The windows and unix specific tests should be tested on both unix and Windows to detect crashes.
special_cases = [
[u"noexist", u"noexist"],
[u"tests/ascii/ex1", u"noexist"],
[u"noexist", u"tests/ascii/ex1"],
[u"tests/ascii/ex1", u"tests/ascii/ex1", "--outfile", "/dev/null"],
[u"tests/ascii/ex1", u"tests/ascii/ex2"],
[u"tests/utf_8/ex3", u"tests/utf_8/ex4"],
[u"tests/utf_8/ex3", u"tests/utf_8/ex4", u"--oldfile-encoding", u"\"utf-8\"", u"--newfile-encoding", u"\"utf-8\""],
[u"tests/utf_8/ex3", u"tests/utf_8/ex4", u"--oldfile-encoding", u"\"utf-8\"", u"--newfile-encoding", u"\"utf-8\"", u"--output-encoding", u"\"utf-8\""],
[u"tests/ascii/ex5", u"tests/ascii/ex6"],
[u"tests/ascii/ex7", u"tests/ascii/ex8"],
[u"tests/ascii/a.json", u"tests/ascii/b.json"],
[u"tests/ascii/a.json", u"tests/ascii/b.json", u"--push-delimiters", u"\"{\"", u"\"[\"", u"--pop-delimiters", u"\"}\"", u"\"]\"", u"--include-delimiters"],
[u"tests/utf_8/fancy1", u"tests/utf_8/fancy2", u"--delimiters", u"日本国", u"--include-delimiters", u"--parameters-encoding", u"\"utf-8\"", u"--output-encoding", u"\"utf-8\"", u"--newfile-encoding", u"\"utf-8\"", u"--oldfile-encoding", u"\"utf-8\""],
[u"tests/utf_8/fancy1", u"tests/utf_8/fancy2", u"--delimiters", u"\"\\u65e5\\u672c\\u56fd\"", u"--include-delimiters", u"--parameters-encoding", u"\"utf-8\"", u"--output-encoding", u"\"utf-8\"", u"--newfile-encoding", u"\"utf-8\"", u"--oldfile-encoding", u"\"utf-8\""],
[u"tests/utf_8/this-is-encoded-in-utf-8", u"tests/utf_16/this-is-encoded-in-utf-16", u"--output-encoding", u"\"utf-8\"", u"--newfile-encoding", u"\"utf-16\"", u"--oldfile-encoding", u"\"utf-8\"", u"--enable-mark"],
[u"tests/ascii/a.html", u"tests/ascii/b.html", u"-m", u"html"]
]
return special_cases[random.randint(0, len(special_cases)-1)]
示例2: normalize_codec_name
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import html [as 别名]
def normalize_codec_name(chardet_name):
"""
Normalizes chardet codec names to Python codec names.
:param chardet_name: chardet codec names
:return: Python codec names. See: https://docs.python.org/3.7/library/codecs.html#standard-encodings
"""
python_name = chardet_name.lower().replace('iso-', 'iso').replace('-', '_')
python_name = codecs.lookup(python_name).name
# Since chardet only recognized all GB-based target_encoding as 'gb2312', the decoding will fail when the text file
# contains certain special charaters. To make it more special-character-tolerant, we should
# upgrade the target_encoding to 'gb18030', which is a character set larger than gb2312.
if python_name == 'gb2312':
return 'gb18030'
return python_name
示例3: process_command
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import html [as 别名]
def process_command(self, cmd):
"""Processes a dot command. It is split into parts using the
`shlex.split
<http://docs.python.org/library/shlex.html#shlex.split>`__
function which is roughly the same method used by Unix/POSIX
shells.
"""
if self.echo:
self.write(self.stderr, cmd+"\n")
# broken with unicode on Python 2!!!
if sys.version_info<(3,0):
cmd=cmd.encode("utf8")
cmd=[c.decode("utf8") for c in shlex.split(cmd)]
else:
cmd=shlex.split(cmd)
assert cmd[0][0]=="."
cmd[0]=cmd[0][1:]
fn=getattr(self, "command_"+cmd[0], None)
if not fn:
raise self.Error("Unknown command \"%s\". Enter \".help\" for help" % (cmd[0],))
res=fn(cmd[1:])
###
### Commands start here
###
示例4: pop_output
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import html [as 别名]
def pop_output(self):
"""Restores most recently pushed output. There are many
output parameters such as nullvalue, mode
(list/tcl/html/insert etc), column widths, header etc. If you
temporarily need to change some settings then
:meth:`push_output`, change the settings and then pop the old
ones back.
A simple example is implementing a command like .dump. Push
the current output, change the mode to insert so we get SQL
inserts printed and then pop to go back to what was there
before.
"""
# first item should always be present
assert len(self._output_stack)
if len(self._output_stack)==1:
o=self._output_stack[0]
else:
o=self._output_stack.pop()
for k,v in o.items():
setattr(self,k,v)
示例5: error_handler
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import html [as 别名]
def error_handler(error):
"""Error handler for surrogateescape decoding.
Should be used with an ASCII-compatible encoding (e.g., 'latin-1' or 'utf-8').
Replaces any invalid byte sequences with surrogate code points.
As specified in
https://docs.python.org/2/library/codecs.html#codecs.register_error.
"""
# We can't use this with UnicodeEncodeError; the UTF-8 encoder doesn't raise
# an error for surrogates. Instead, use encode.
if not isinstance(error, UnicodeDecodeError):
raise error
result = []
for i in range(error.start, error.end):
byte = ord(error.object[i])
if byte < 128:
raise error
result.append(unichr(0xdc00 + byte))
return ''.join(result), error.end
示例6: _jbackslashreplace_error_handler
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import html [as 别名]
def _jbackslashreplace_error_handler(err):
"""
Encoding error handler which replaces invalid characters with Java-compliant Unicode escape sequences.
:param err: An `:exc:UnicodeEncodeError` instance.
:return: See https://docs.python.org/2/library/codecs.html?highlight=codecs#codecs.register_error
"""
if not isinstance(err, UnicodeEncodeError):
raise err
return _escape_non_ascii(err.object[err.start:err.end]), err.end
示例7: encode
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import html [as 别名]
def encode(input: str, errors: str = "strict") -> typing.Tuple[bytes, int]:
"""
return an encoded version of the string as a bytes object and its length.
Parameters:
input: the string to encode
errors: same meaning as the errors argument to pythons' `encode <https://docs.python.org/3/library/codecs.html#codecs.encode>`_ method
"""
# for the types of this method,
# see: https://github.com/python/typeshed/blob/f7d240f06e5608a20b2daac4e96fe085c0577239/stdlib/2and3/codecs.pyi#L21-L22
result = []
for position, c in enumerate(input):
idx = GSM7BitCodec.gsm_basic_charset_map.get(c)
if idx is not None:
result.append(chr(idx))
continue
idx = GSM7BitCodec.gsm_extension_map.get(c)
if idx is not None:
result.append(chr(27) + chr(idx))
else:
result.append(GSM7BitCodec._handle_encode_error(c, errors, position, input))
obj = "".join(result)
# this is equivalent to;
# import six; six.b('someString')
# see:
# https://github.com/benjaminp/six/blob/68112f3193c7d4bef5ad86ed1b6ed528edd9093d/six.py#L625
obj_bytes = obj.encode("latin-1")
return (obj_bytes, len(obj_bytes))
示例8: decode
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import html [as 别名]
def decode(input: bytes, errors: str = "strict") -> typing.Tuple[str, int]:
"""
return a string decoded from the given bytes and its length.
Parameters:
input: the bytes to decode
errors: same meaning as the errors argument to pythons' `encode <https://docs.python.org/3/library/codecs.html#codecs.encode>`_ method
"""
res = iter(input)
result = []
for position, c in enumerate(res):
try:
if c == 27:
c = next(res)
result.append(GSM7BitCodec.gsm_extension[c])
else:
result.append(GSM7BitCodec.gsm_basic_charset[c])
except IndexError as indexErrorException:
result.append(
GSM7BitCodec._handle_decode_error(
c, errors, position, input, indexErrorException
)
)
obj = "".join(result)
return (obj, len(obj))
示例9: register_codecs
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import html [as 别名]
def register_codecs(custom_codecs: typing.Union[None, typing.Dict[str, codecs.CodecInfo]] = None):
"""
Register codecs, both custom and naz inbuilt ones.
Custom codecs that have same encoding as inbuilt ones will take precedence.
Users should never have to use this directly,
instead; use `naz.Client(custom_codecs={"my_encoding": codecs.CodecInfo(name="my_encoding", encode=..., decode=...)})`
Parameters:
custom_codecs: a list of custom codecs to register.
"""
if custom_codecs is None:
custom_codecs = {}
# Note: Search function registration is not currently reversible,
# which may cause problems in some cases, such as unit testing or module reloading.
# https://docs.python.org/3.7/library/codecs.html#codecs.register
#
# Note: Encodings are first looked up in the registry's cache.
# thus if you call `register_codecs` and then call it again with different
# codecs, the second codecs may not take effect.
# ie; codecs.lookup(encoding) will return the first codecs since they were stored
# in the cache.
# There doesn't appear to be away to clear codec cache at runtime.
# see: https://docs.python.org/3/library/codecs.html#codecs.lookup
def _codec_search_function(_encoding):
"""
We should try and get codecs from the custom_codecs first.
This way, if someone had overridden an inbuilt codec, their
implementation is chosen first and cached.
"""
if custom_codecs.get(_encoding):
return custom_codecs.get(_encoding)
else:
return _INBUILT_CODECS.get(_encoding)
codecs.register(_codec_search_function)
示例10: usage
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import html [as 别名]
def usage(self):
"Returns the usage message. Make sure it is newline terminated"
msg="""
Usage: program [OPTIONS] FILENAME [SQL|CMD] [SQL|CMD]...
FILENAME is the name of a SQLite database. A new database is
created if the file does not exist.
OPTIONS include:
-init filename read/process named file
-echo print commands before execution
-[no]header turn headers on or off
-bail stop after hitting an error
-interactive force interactive I/O
-batch force batch I/O
-column set output mode to 'column'
-csv set output mode to 'csv'
-html set output mode to 'html'
-line set output mode to 'line'
-list set output mode to 'list'
-python set output mode to 'python'
-separator 'x' set output field separator (|)
-nullvalue 'text' set text string for NULL values
-version show SQLite version
-encoding 'name' the encoding to use for files
opened via .import, .read & .output
-nocolour disables colour output to screen
"""
return msg.lstrip()
###
### Value formatting routines. They take a value and return a
### text formatting of them. Mostly used by the various output's
### but also by random other pieces of code.
###
示例11: find_encodings
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import html [as 别名]
def find_encodings(enc=None, system=False):
"""Find functions for encoding translations for a specific codec.
:param str enc: The codec to find translation functions for. It will be
normalized by converting to lowercase, excluding
everything which is not ascii, and hyphens will be
converted to underscores.
:param bool system: If True, find encodings based on the system's stdin
encoding, otherwise assume utf-8.
:raises: :exc:LookupError if the normalized codec, ``enc``, cannot be
found in Python's encoding translation map.
"""
if not enc:
enc = 'utf-8'
if system:
if getattr(sys.stdin, 'encoding', None) is None:
enc = sys.stdin.encoding
log.debug("Obtained encoding from stdin: %s" % enc)
else:
enc = 'ascii'
## have to have lowercase to work, see
## http://docs.python.org/dev/library/codecs.html#standard-encodings
enc = enc.lower()
codec_alias = encodings.normalize_encoding(enc)
codecs.register(encodings.search_function)
coder = codecs.lookup(codec_alias)
return coder
示例12: b
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import html [as 别名]
def b(x):
"""See http://python3porting.com/problems.html#nicer-solutions"""
coder = find_encodings()
if isinstance(x, bytes):
return coder.encode(x.decode(coder.name))[0]
else:
return coder.encode(x)[0]
示例13: open_
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import html [as 别名]
def open_(filename, mode='r', encoding=None):
"""Open a text file with encoding and optional gzip compression.
Note that on legacy Python any encoding other than ``None`` or opening
GZipped files will return an unpicklable file-like object.
Parameters
----------
filename : str
The filename to read.
mode : str, optional
The mode with which to open the file. Defaults to `r`.
encoding : str, optional
The encoding to use (see the codecs documentation_ for supported
values). Defaults to ``None``.
.. _documentation:
https://docs.python.org/3/library/codecs.html#standard-encodings
"""
if filename.endswith('.gz'):
if six.PY2:
zf = io.BufferedReader(gzip.open(filename, mode))
if encoding:
return codecs.getreader(encoding)(zf)
else:
return zf
else:
return io.BufferedReader(gzip.open(filename, mode,
encoding=encoding))
if six.PY2:
if encoding:
return codecs.open(filename, mode, encoding=encoding)
else:
return open(filename, mode)
else:
return open(filename, mode, encoding=encoding)
示例14: encode
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import html [as 别名]
def encode(self, input, errors='strict'):
assert errors == 'strict'
#return codecs.encode(input, self.base_encoding, self.name), len(input)
# The above line could totally be all we needed, relying on the error
# handling to replace the unencodable Unicode characters with our extended
# byte sequences.
#
# However, there seems to be a design bug in Python (probably intentional):
# the error handler for encoding is supposed to return a **Unicode** character,
# that then needs to be encodable itself... Ugh.
#
# So we implement what codecs.encode() should have been doing: which is expect
# error handler to return bytes() to be added to the output.
#
# This seems to have been fixed in Python 3.3. We should try using that and
# use fallback only if that failed.
# https://docs.python.org/3.3/library/codecs.html#codecs.register_error
length = len(input)
out = b''
while input:
try:
part = codecs.encode(input, self.base_encoding)
out += part
input = '' # All converted
except UnicodeEncodeError as e:
# Convert the correct part
out += codecs.encode(input[:e.start], self.base_encoding)
replacement, pos = self.error(e)
out += replacement
input = input[pos:]
return out, length
示例15: decodePage
# 需要导入模块: import codecs [as 别名]
# 或者: from codecs import html [as 别名]
def decodePage(page, contentEncoding, contentType):
"""
Decode compressed/charset HTTP response
"""
if not page or (conf.nullConnection and len(page) < 2):
return getUnicode(page)
if isinstance(contentEncoding, basestring) and contentEncoding:
contentEncoding = contentEncoding.lower()
else:
contentEncoding = ""
if isinstance(contentType, basestring) and contentType:
contentType = contentType.lower()
else:
contentType = ""
if contentEncoding in ("gzip", "x-gzip", "deflate"):
if not kb.pageCompress:
return None
try:
if contentEncoding == "deflate":
data = StringIO.StringIO(zlib.decompress(page, -15)) # Reference: http://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations
else:
data = gzip.GzipFile("", "rb", 9, StringIO.StringIO(page))
size = struct.unpack("<l", page[-4:])[0] # Reference: http://pydoc.org/get.cgi/usr/local/lib/python2.5/gzip.py
if size > MAX_CONNECTION_TOTAL_SIZE:
raise Exception("size too large")
page = data.read()
except Exception, msg:
if "<html" not in page: # in some cases, invalid "Content-Encoding" appears for plain HTML (should be ignored)
errMsg = "detected invalid data for declared content "
errMsg += "encoding '%s' ('%s')" % (contentEncoding, msg)
singleTimeLogMessage(errMsg, logging.ERROR)
warnMsg = "turning off page compression"
singleTimeWarnMessage(warnMsg)
kb.pageCompress = False
raise SqlmapCompressionException