本文整理汇总了Python中six.unichr方法的典型用法代码示例。如果您正苦于以下问题:Python six.unichr方法的具体用法?Python six.unichr怎么用?Python six.unichr使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类six
的用法示例。
在下文中一共展示了six.unichr方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _unescape_token
# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def _unescape_token(escaped_token):
"""Inverse of _escape_token().
Args:
escaped_token: a unicode string
Returns:
token: a unicode string
"""
def match(m):
if m.group(1) is None:
return u"_" if m.group(0) == u"\\u" else u"\\"
try:
return six.unichr(int(m.group(1)))
except (ValueError, OverflowError) as _:
return u"\u3013" # Unicode for undefined character.
trimmed = escaped_token[:-1] if escaped_token.endswith("_") else escaped_token
return _UNESCAPE_REGEX.sub(match, trimmed)
示例2: getRaw
# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def getRaw(self, idx):
if idx < 0 or self.m_stringOffsets == [] or idx >= len(self.m_stringOffsets):
return None
offset = self.m_stringOffsets[ idx ].get_value()
length = self.getShort(self.m_strings, offset)
data = ""
while length > 0:
offset += 2
# get the unicode character as the apk might contain non-ASCII label
data += six.unichr(self.getShort(self.m_strings, offset))
# FIXME
if data[-1] == "&":
data = data[:-1]
length -= 1
return data
示例3: handle_charref
# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def handle_charref(self, name):
"""
Called when a char ref like '—' or '—' is found
`name` is the char ref without ampersand and semicolon (e.g. `#8212` or
`#x2014`)
"""
try:
if name.startswith('x'):
codepoint = int(name[1:], 16)
else:
codepoint = int(name)
char = six.unichr(codepoint)
except (ValueError, OverflowError):
char = ''
self._handle_ref('#' + name, char)
示例4: _unescape_token
# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def _unescape_token(escaped_token):
"""Inverse of _escape_token().
Args:
escaped_token: a unicode string
Returns:
token: a unicode string
"""
def match(m):
if m.group(1) is None:
return u"_" if m.group(0) == u"\\u" else u"\\"
try:
return six.unichr(int(m.group(1)))
except (ValueError, OverflowError) as _:
return u"\u3013" # Unicode for undefined character.
trimmed = escaped_token[:-
1] if escaped_token.endswith("_") else escaped_token
return _UNESCAPE_REGEX.sub(match, trimmed)
示例5: join_phonemes
# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def join_phonemes(*args):
"""Joins a Hangul letter from Korean phonemes."""
# Normalize arguments as onset, nucleus, coda.
if len(args) == 1:
# tuple of (onset, nucleus[, coda])
args = args[0]
if len(args) == 2:
args += (CODAS[0],)
try:
onset, nucleus, coda = args
except ValueError:
raise TypeError('join_phonemes() takes at most 3 arguments')
offset = (
(ONSETS.index(onset) * NUM_NUCLEUSES + NUCLEUSES.index(nucleus)) *
NUM_CODAS + CODAS.index(coda)
)
return unichr(FIRST_HANGUL_OFFSET + offset)
示例6: test_import_file_path_utf8_encoded_ascii_latin_nbsp
# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def test_import_file_path_utf8_encoded_ascii_latin_nbsp():
temporary_folder, folder = helper.create_working_folder()
temporary_folder_destination, folder_destination = helper.create_working_folder()
origin = text_type(folder)+u'/unicode'+six_unichr(160)+u'filename.txt'
# encode the unicode string to ascii
origin = origin.encode('utf-8')
shutil.copyfile(helper.get_file('valid.txt'), origin)
helper.reset_dbs()
dest_path = elodie.import_file(origin, folder_destination, False, False, False)
helper.restore_dbs()
shutil.rmtree(folder)
shutil.rmtree(folder_destination)
assert helper.path_tz_fix(os.path.join('2016-04-Apr','London',u'2016-04-07_11-15-26-unicode\xa0filename-sample-title.txt')) in dest_path, dest_path
示例7: test_import_file_path_unicode_latin_nbsp
# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def test_import_file_path_unicode_latin_nbsp():
temporary_folder, folder = helper.create_working_folder()
temporary_folder_destination, folder_destination = helper.create_working_folder()
origin = text_type(folder)+u'/unicode'+six_unichr(160)+u'filename.txt'
shutil.copyfile(helper.get_file('valid.txt'), origin)
helper.reset_dbs()
dest_path = elodie.import_file(origin, folder_destination, False, False, False)
helper.restore_dbs()
shutil.rmtree(folder)
shutil.rmtree(folder_destination)
assert helper.path_tz_fix(os.path.join('2016-04-Apr','London',u'2016-04-07_11-15-26-unicode\xa0filename-sample-title.txt')) in dest_path, dest_path
示例8: _narrow_unichr
# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def _narrow_unichr(code_point):
"""Retrieves the unicode character representing any given code point, in a way that won't break on narrow builds.
This is necessary because the built-in unichr function will fail for ordinals above 0xFFFF on narrow builds (UCS2);
ordinals above 0xFFFF would require recalculating and combining surrogate pairs. This avoids that by retrieving the
unicode character that was initially read.
Args:
code_point (int|CodePoint): An int or a subclass of int that contains the unicode character representing its
code point in an attribute named 'char'.
"""
try:
if len(code_point.char) > 1:
return code_point.char
except AttributeError:
pass
return six.unichr(code_point)
示例9: test_exception_converted
# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def test_exception_converted(self):
self.assertRaises(exception.InvalidInput,
utils.validate_integer,
"im-not-an-int", "not-an-int")
self.assertRaises(exception.InvalidInput,
utils.validate_integer,
3.14, "Pie")
self.assertRaises(exception.InvalidInput,
utils.validate_integer,
"299", "Sparta no-show",
min_value=300, max_value=300)
self.assertRaises(exception.InvalidInput,
utils.validate_integer,
55, "doing 55 in a 54",
max_value=54)
self.assertRaises(exception.InvalidInput,
utils.validate_integer,
six.unichr(129), "UnicodeError",
max_value=1000)
示例10: _unescape_token
# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def _unescape_token(escaped_token):
"""Inverse of _escape_token().
Args:
escaped_token: a unicode string
Returns:
token: a unicode string
"""
def match(m):
if m.group(1) is None:
return u"_" if m.group(0) == u"\\u" else u"\\"
try:
return six.unichr(int(m.group(1)))
except (ValueError, OverflowError) as _:
return ""
trimmed = escaped_token[:-1] if escaped_token.endswith("_") else escaped_token
return _UNESCAPE_REGEX.sub(match, trimmed)
示例11: test_unicode_combining_characters
# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def test_unicode_combining_characters(self):
# Some unicode characters can be represented in multiple ways - for
# example, an accented character may be a single code point (with the
# accent baked in), or it may be the "normal" letter with a combining
# code point. See https://docs.python.org/2/library/unicodedata.html.
# The points below are for a capital C with a cedilla, first as a
# composite character, second as a pairing of C and the cedilla
# combining character.
composite = six.unichr(0xC7)
combining = six.unichr(0x43) + six.unichr(0x0327)
# Test combinations of search and character
for text in composite, combining:
searcher = TextSearcher(text)
self.assertIsNotNone(searcher.search(composite))
self.assertIsNotNone(searcher.search(combining))
示例12: ntou
# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def ntou(n, encoding='ISO-8859-1'):
"""Return the native string as Unicode with the given encoding."""
assert_native(n)
# In Python 2, the native string type is bytes.
# First, check for the special encoding 'escape'. The test suite uses
# this to signal that it wants to pass a string with embedded \uXXXX
# escapes, but without having to prefix it with u'' for Python 2,
# but no prefix for Python 3.
if encoding == 'escape':
return re.sub(
r'\\u([0-9a-zA-Z]{4})',
lambda m: six.unichr(int(m.group(1), 16)),
n.decode('ISO-8859-1'),
)
# Assume it's already in the given encoding, which for ISO-8859-1
# is almost always what was intended.
return n.decode(encoding)
示例13: compose
# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def compose(chosung, joongsung, jongsung=u''):
"""This function returns a Hangul letter by composing the specified chosung, joongsung, and jongsung.
@param chosung
@param joongsung
@param jongsung the terminal Hangul letter. This is optional if you do not need a jongsung."""
if jongsung is None: jongsung = u''
try:
chosung_index = CHO.index(chosung)
joongsung_index = JOONG.index(joongsung)
jongsung_index = JONG.index(jongsung)
except Exception:
raise NotHangulException('No valid Hangul character index')
return unichr(0xAC00 + chosung_index * NUM_JOONG * NUM_JONG + joongsung_index * NUM_JONG + jongsung_index)
示例14: property_chars
# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def property_chars(self, prefix):
return "".join(six.unichr(x) for x in range(sys.maxunicode)
if unicodedata.category(six.unichr(x)).startswith(prefix))
示例15: test_invertibility_on_random_strings
# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def test_invertibility_on_random_strings(self):
for _ in range(1000):
s = u"".join(six.unichr(random.randint(0, 65535)) for _ in range(10))
self.assertEqual(s, tokenizer.decode(tokenizer.encode(s)))