本文整理汇总了Python中six.unichr函数的典型用法代码示例。如果您正苦于以下问题:Python unichr函数的具体用法?Python unichr怎么用?Python unichr使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了unichr函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test___init__
def test___init__(self):
expected = b'\xc3\xa9\xe0\xaf\xb2\xe0\xbe\x84'
if six.PY3:
expected = expected.decode('utf-8')
message = six.unichr(233) + six.unichr(0x0bf2) + six.unichr(3972)
exc = exception.IronicException(message)
self.assertEqual(expected, exc.__str__())
示例2: test_dash_issue
def test_dash_issue(self):
html = '<strong>—</strong>'
self.server.response['get.data'] = html
grab = build_grab()
grab.go(self.server.get_url())
# By default &#[128-160]; are fixed
self.assertFalse(grab.doc.select('//strong/text()').text()
== six.unichr(151))
self.assertTrue(grab.doc.select('//strong/text()').text()
== six.unichr(8212))
# disable fix-behaviour
grab.setup(fix_special_entities=False)
grab.go(self.server.get_url())
# By default &#[128-160]; are fixed
self.assertTrue(grab.doc.select('//strong/text()').text()
== six.unichr(151))
self.assertFalse(grab.doc.select('//strong/text()').text()
== six.unichr(8212))
# Explicitly use unicode_body func
grab = build_grab()
grab.go(self.server.get_url())
#print(':::', grab.doc.unicode_body())
self.assertTrue('—' in grab.doc.unicode_body())
示例3: gen_utf8
def gen_utf8 ( file_bytes ):
global args
file_dict = {}
rows_dict = {}
# convert to utf-8
errors = 'strict'
if args.errors:
errors = 'replace'
try:
file_utf8 = file_bytes.decode('utf-8', errors)
pass
except UnicodeDecodeError:
print ( "file {0} is not valid utf-8, try analysing file as bytes using flag -b or enable error replacement with flag -e\n".format(source))
sys.exit ( 2 )
except:
traceback.print_exc()
sys.exit ( 1 )
for c in file_utf8:
if not c in file_dict:
file_dict[ c ] = 0
file_dict[ c ] += 1
if c > six.unichr(0x7f):
cp = ord( c )
cp = ( int( cp / 16 ) ) * 16
rows_dict[ six.unichr( cp ) ] = True
return ( file_dict, rows_dict )
示例4: setUp
def setUp(self):
# These tests verify the UTF-8 decoder/validator on the various test cases from
# http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
vs = []
for k in _create_utf8_test_sequences():
vs.extend(k[1])
# All Unicode code points
for i in range(
0, 0xffff): # should by 0x10ffff, but non-wide Python build is limited to 16-bits
if i < 0xD800 or i > 0xDFFF: # filter surrogate code points, which are disallowed to encode in UTF-8
vs.append((True, six.unichr(i).encode("utf-8")))
# FIXME: UnicodeEncodeError: 'utf-8' codec can't encode character '\ud800'
# in position 0: surrogates not allowed
if False:
# 5.1 Single UTF-16 surrogates
for i in range(0xD800, 0xDBFF): # high-surrogate
ss = six.unichr(i).encode("utf-8")
vs.append((False, ss))
for i in range(0xDC00, 0xDFFF): # low-surrogate
ss = six.unichr(i).encode("utf-8")
vs.append((False, ss))
# 5.2 Paired UTF-16 surrogates
for i in range(0xD800, 0xDBFF): # high-surrogate
for j in range(0xDC00, 0xDFFF): # low-surrogate
ss1 = six.unichr(i).encode("utf-8")
ss2 = six.unichr(j).encode("utf-8")
vs.append((False, ss1 + ss2))
vs.append((False, ss2 + ss1))
self._TEST_SEQUENCES = vs
示例5: test_render_response_utf8
def test_render_response_utf8(self):
req = apirequest.APIRequest("FakeAction", "FakeVersion", {})
resp = {
'utf8': six.unichr(40960) + u'abcd' + six.unichr(1972)
}
data = req._render_response(resp, 'uuid').decode()
self.assertIn('<utf8>ꀀabcd޴</utf8>', data)
示例6: _init_cache
def _init_cache():
'''Creates a mapping of wide->narrow and narrow->wide characters'''
global _wide_to_narrow
global _narrow_to_wide
_wide_to_narrow = {}
_narrow_to_wide = {}
char_names = {six.unichr(i): unicodedata.name(six.unichr(i), None)
for i in range(0, 65536)
}
for wide_ch, name in char_names.items():
if name is None:
continue
if name.upper().startswith('FULLWIDTH '):
half_name = name[len('FULLWIDTH '):]
else:
half_name = 'HALFWIDTH {}'.format(name)
try:
half_ch = unicodedata.lookup(half_name)
except KeyError:
pass
else:
_wide_to_narrow[wide_ch] = half_ch
_narrow_to_wide[half_ch] = wide_ch
logger.debug('Mapped %d characters from wide<->narrow',
len(_wide_to_narrow))
示例7: _mouse_handler
def _mouse_handler(self, cli, mouse_event):
"""
Handle mouse events in a pane. A click in a non-active pane will select
it, one in an active pane, will send the mouse event to the application
running inside it.
"""
process = self.process
x = mouse_event.position.x
y = mouse_event.position.y
# The containing Window translates coordinates to the absolute position
# of the whole screen, but in this case, we need the relative
# coordinates of the visible area.
y -= self.process.screen.line_offset
if not self.has_focus(cli):
# Focus this process when the mouse has been clicked.
if mouse_event.event_type == MouseEventTypes.MOUSE_UP:
self.set_focus(cli)
else:
# Already focussed, send event to application when it requested
# mouse support.
if process.screen.sgr_mouse_support_enabled:
# Xterm SGR mode.
ev, m = {
MouseEventTypes.MOUSE_DOWN: ('0', 'M'),
MouseEventTypes.MOUSE_UP: ('0', 'm'),
MouseEventTypes.SCROLL_UP: ('64', 'M'),
MouseEventTypes.SCROLL_DOWN: ('65', 'M'),
}.get(mouse_event.event_type)
self.process.write_input(
'\x1b[<%s;%s;%s%s' % (ev, x + 1, y + 1, m))
elif process.screen.urxvt_mouse_support_enabled:
# Urxvt mode.
ev = {
MouseEventTypes.MOUSE_DOWN: 32,
MouseEventTypes.MOUSE_UP: 35,
MouseEventTypes.SCROLL_UP: 96,
MouseEventTypes.SCROLL_DOWN: 97,
}.get(mouse_event.event_type)
self.process.write_input(
'\x1b[%s;%s;%sM' % (ev, x + 1, y + 1))
elif process.screen.mouse_support_enabled:
# Fall back to old mode.
if x < 96 and y < 96:
ev = {
MouseEventTypes.MOUSE_DOWN: 32,
MouseEventTypes.MOUSE_UP: 35,
MouseEventTypes.SCROLL_UP: 96,
MouseEventTypes.SCROLL_DOWN: 97,
}.get(mouse_event.event_type)
self.process.write_input('\x1b[M%s%s%s' % (
six.unichr(ev),
six.unichr(x + 33),
six.unichr(y + 33)))
示例8: test_reading_utf8_without_flag
def test_reading_utf8_without_flag(self):
with open('test/utf8_without_leader_flag.dat', 'rb') as fh:
reader = MARCReader(fh, to_unicode=False)
record = next(reader)
self.assertEqual(type(record), Record)
utitle = record['240']['a']
self.assertEqual(type(utitle), binary_type)
self.assertEqual(utitle,
b'De la solitude a\xcc\x80 la communaute\xcc\x81.')
with open('test/utf8_without_leader_flag.dat', 'rb') as fh:
reader = MARCReader(fh, to_unicode=True, hide_utf8_warnings=True)
record = next(reader)
self.assertEqual(type(record), Record)
utitle = record['240']['a']
self.assertEqual(type(utitle), text_type)
# unless you force utf-8 characters will get lost and
# warnings will appear in the terminal
self.assertEqual(utitle, 'De la solitude a la communaute .')
# force reading as utf-8
with open('test/utf8_without_leader_flag.dat', 'rb') as fh:
reader = MARCReader(fh, to_unicode=True, force_utf8=True,
hide_utf8_warnings=True)
record = next(reader)
self.assertEqual(type(record), Record)
utitle = record['240']['a']
self.assertEqual(type(utitle), text_type)
self.assertEqual(utitle, u'De la solitude a' + unichr(0x0300) +
' la communaute' + unichr(0x0301) + '.')
示例9: _build_illegal_xml_regex
def _build_illegal_xml_regex():
"""Constructs a regex to match all illegal xml characters.
Expects to be used against a unicode string."""
# Construct the range pairs of invalid unicode characters.
illegal_chars_u = [
(0x00, 0x08), (0x0B, 0x0C), (0x0E, 0x1F), (0x7F, 0x84),
(0x86, 0x9F), (0xFDD0, 0xFDDF), (0xFFFE, 0xFFFF)]
# For wide builds, we have more.
if sys.maxunicode >= 0x10000:
illegal_chars_u.extend(
[(0x1FFFE, 0x1FFFF), (0x2FFFE, 0x2FFFF), (0x3FFFE, 0x3FFFF),
(0x4FFFE, 0x4FFFF), (0x5FFFE, 0x5FFFF), (0x6FFFE, 0x6FFFF),
(0x7FFFE, 0x7FFFF), (0x8FFFE, 0x8FFFF), (0x9FFFE, 0x9FFFF),
(0xAFFFE, 0xAFFFF), (0xBFFFE, 0xBFFFF), (0xCFFFE, 0xCFFFF),
(0xDFFFE, 0xDFFFF), (0xEFFFE, 0xEFFFF), (0xFFFFE, 0xFFFFF),
(0x10FFFE, 0x10FFFF)])
# Build up an array of range expressions.
illegal_ranges = [
"%s-%s" % (six.unichr(low), six.unichr(high))
for (low, high) in illegal_chars_u]
# Compile the regex
return re.compile(six.u('[%s]') % six.u('').join(illegal_ranges))
示例10: decompress
def decompress(compressed):
"""Decompress a list of output ks to a string."""
# Build the dictionary.
dict_size = 0x10000
dictionary = dict((six.unichr(i), six.unichr(i)) for i in range(dict_size))
result = io.StringIO()
w = compressed.pop(0)
result.write(w)
for k in compressed:
if k in dictionary:
entry = dictionary[k]
elif k == dict_size:
entry = w + w[0]
else:
raise ValueError('Bad compressed k: %s' % k)
result.write(entry)
# Add w+entry[0] to the dictionary.
dictionary[dict_size] = w + entry[0]
dict_size += 1
w = entry
return result.getvalue()
示例11: _fuzz
def _fuzz(word, fuzziness=0.2):
"""Fuzz a word with noise.
Parameters
----------
word : str
A word to fuzz
fuzziness : float
How fuzzy to make the word
Returns
-------
str
A fuzzed word
"""
while True:
new_word = []
for ch in word:
if random() > fuzziness: # noqa: S311
new_word.append(ch)
else:
if random() > 0.5: # noqa: S311
new_word.append(choice(printable)) # noqa: S311
elif random() > 0.8: # noqa: S311
new_word.append(unichr(randint(0, 0x10FFFF))) # noqa: S311
else:
new_word.append(unichr(randint(0, 0xFFFF))) # noqa: S311
if random() > 0.5: # noqa: S311
new_word.append(ch)
new_word = ''.join(new_word)
if new_word != word:
return new_word
示例12: setUp
def setUp(self):
path = tests.get_data_path(('PP', 'simple_pp', 'global.pp'))
self.cube_2d = iris.load_cube(path)
# Generate the unicode cube up here now it's used in two tests.
unicode_str = six.unichr(40960) + u'abcd' + six.unichr(1972)
self.unicode_cube = iris.tests.stock.simple_1d()
self.unicode_cube.attributes['source'] = unicode_str
示例13: set_unichr
def set_unichr(x):
if isinstance(x, string_types):
return unichr(int(x, 16))
elif isinstance(x, integer_types):
return unichr(x)
else:
return unichr(int(x))
示例14: _next_code_point
def _next_code_point(val, val_iter, yield_char=False, to_int=lambda x: x):
"""Provides the next *code point* in the given Unicode sequence.
This generator function yields complete character code points, never incomplete surrogates. When a low surrogate is
found without following a high surrogate, this function raises ``ValueError`` for having encountered an unpaired
low surrogate. When the provided iterator ends on a high surrogate, this function yields ``None``. This is the
**only** case in which this function yields ``None``. When this occurs, the user may append additional data to the
input unicode sequence and resume iterating through another ``next`` on this generator. When this function receives
``next`` after yielding ``None``, it *reinitializes the unicode iterator*. This means that this feature can only
be used for values that contain an ``__iter__`` implementation that remains at the current position in the data
when called (e.g. :class:`BufferQueue`). At this point, there are only two possible outcomes:
* If next code point is a valid low surrogate, this function yields the combined code point represented by the
surrogate pair.
* Otherwise, this function raises ``ValueError`` for having encountered an unpaired high surrogate.
Args:
val (unicode|BufferQueue): A unicode sequence or unicode BufferQueue over which to iterate.
val_iter (Iterator[unicode|BufferQueue]): The unicode sequence iterator over ``val`` from which to generate the
next integer code point in the range ``0x0`` to ``0x10FFFF``.
yield_char (Optional[bool]): If True **and** the character code point resulted from a surrogate pair, this
function will yield a :class:`CodePoint` representing the character code point and containing the original
unicode character. This is useful when the original unicode character will be needed again because UCS2
Python builds will error when trying to convert code points greater than 0xFFFF back into their
unicode character representations. This avoids requiring the user to mathematically re-derive the
surrogate pair in order to successfully convert the code point back to a unicode character.
to_int (Optional[callable]): A function to call on each element of val_iter to convert that element to an int.
"""
high = next(val_iter)
low = None
code_point = to_int(high)
if _LOW_SURROGATE_START <= code_point <= _LOW_SURROGATE_END:
raise ValueError('Unpaired low surrogate in Unicode sequence: %d' % code_point)
elif _HIGH_SURROGATE_START <= code_point <= _HIGH_SURROGATE_END:
def combine_surrogates():
low_surrogate = next(val_iter)
low_code_point = to_int(low_surrogate)
if low_code_point < _LOW_SURROGATE_START or low_code_point > _LOW_SURROGATE_END:
raise ValueError('Unpaired high surrogate: %d' % code_point)
# Decode the surrogates
real_code_point = _NON_BMP_OFFSET
real_code_point += (code_point - _HIGH_SURROGATE_START) << 10
real_code_point += (low_code_point - _LOW_SURROGATE_START)
return real_code_point, low_surrogate
try:
code_point, low = combine_surrogates()
except StopIteration:
yield None
val_iter = iter(val) # More data has appeared in val.
code_point, low = combine_surrogates()
if yield_char and low is not None:
out = CodePoint(code_point)
if isinstance(val, six.text_type):
# Iterating over a text type returns text types.
out.char = high + low
else:
out.char = six.unichr(high) + six.unichr(low)
else:
out = code_point
yield out
示例15: format_bar
def format_bar(cnt):
scaled = cnt*width/max_count
full = int(floor(scaled))
eighths = int(ceil((scaled-full)*8))
if eighths:
return full*six.unichr(0x2588) + six.unichr(0x2588+(8-eighths))
else:
return full*six.unichr(0x2588)