本文整理汇总了Python中unicodedata.lookup方法的典型用法代码示例。如果您正苦于以下问题:Python unicodedata.lookup方法的具体用法?Python unicodedata.lookup怎么用?Python unicodedata.lookup使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类unicodedata
的用法示例。
在下文中一共展示了unicodedata.lookup方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: replace_unicode
# 需要导入模块: import unicodedata [as 别名]
# 或者: from unicodedata import lookup [as 别名]
def replace_unicode(self, m):
"""Replace escapes."""
groups = m.groupdict()
esc = m.group(0)
if groups.get('fesc'):
value = m.group(0)
elif groups.get('format'):
value = ' '
elif groups.get('special'):
value = BACK_SLASH_TRANSLATION[esc]
elif groups.get('char'):
try:
value = chr(int(esc[2:], 16))
except Exception:
value = esc
elif groups.get('oct'):
value = chr(int(esc[1:], 8))
elif groups.get('name'):
try:
value = unicodedata.lookup(esc[3:-1])
except Exception:
value = esc
return value.replace('\x00', '\n')
示例2: start_unichar
# 需要导入模块: import unicodedata [as 别名]
# 或者: from unicodedata import lookup [as 别名]
def start_unichar(self, attr):
if 'name' in attr:
if 'code' in attr:
self._syntax_error('<unichar/> invalid with both name and code attributes')
try:
v = unicodedata.lookup(attr['name'])
except KeyError:
self._syntax_error('<unichar/> invalid name attribute\n"%s"' % ascii(attr['name']))
v = '\0'
elif 'code' in attr:
try:
v = int(eval(attr['code']))
v = chr(v) if isPy3 else unichr(v)
except:
self._syntax_error('<unichar/> invalid code attribute %s' % ascii(attr['code']))
v = '\0'
else:
v = None
if attr:
self._syntax_error('<unichar/> invalid attribute %s' % list(attr.keys())[0])
if v is not None:
self.handle_data(v)
self._push('unichar',_selfClosingTag='unichar')
示例3: test_aliases
# 需要导入模块: import unicodedata [as 别名]
# 或者: from unicodedata import lookup [as 别名]
def test_aliases(self):
# Check that the aliases defined in the NameAliases.txt file work.
# This should be updated when new aliases are added or the file
# should be downloaded and parsed instead. See #12753.
aliases = [
('LATIN CAPITAL LETTER GHA', 0x01A2),
('LATIN SMALL LETTER GHA', 0x01A3),
('KANNADA LETTER LLLA', 0x0CDE),
('LAO LETTER FO FON', 0x0E9D),
('LAO LETTER FO FAY', 0x0E9F),
('LAO LETTER RO', 0x0EA3),
('LAO LETTER LO', 0x0EA5),
('TIBETAN MARK BKA- SHOG GI MGO RGYAN', 0x0FD0),
('YI SYLLABLE ITERATION MARK', 0xA015),
('PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET', 0xFE18),
('BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS', 0x1D0C5)
]
for alias, codepoint in aliases:
self.checkletter(alias, chr(codepoint))
name = unicodedata.name(chr(codepoint))
self.assertNotEqual(name, alias)
self.assertEqual(unicodedata.lookup(alias),
unicodedata.lookup(name))
with self.assertRaises(KeyError):
unicodedata.ucd_3_2_0.lookup(alias)
示例4: greek_letter_name_to_unicode
# 需要导入模块: import unicodedata [as 别名]
# 或者: from unicodedata import lookup [as 别名]
def greek_letter_name_to_unicode(letter):
# type: (str) -> str
"""
Return a greek letter name as a Unicode character.
Examples
--------
Lamda -> Λ (Unicodedata library uses "lamda" for "lambda" :S!)
Omega -> Ω
omega -> ω
"""
return unicodedata.lookup(
"GREEK {case} LETTER {name}".format(
case="SMALL" if letter == letter.lower() else "CAPITAL", name=letter.upper()
)
)
示例5: test_named_sequences_full
# 需要导入模块: import unicodedata [as 别名]
# 或者: from unicodedata import lookup [as 别名]
def test_named_sequences_full(self):
# Check all the named sequences
url = ("http://www.pythontest.net/unicode/%s/NamedSequences.txt" %
unicodedata.unidata_version)
try:
testdata = support.open_urlresource(url, encoding="utf-8",
check=check_version)
except (OSError, HTTPException):
self.skipTest("Could not retrieve " + url)
self.addCleanup(testdata.close)
for line in testdata:
line = line.strip()
if not line or line.startswith('#'):
continue
seqname, codepoints = line.split(';')
codepoints = ''.join(chr(int(cp, 16)) for cp in codepoints.split())
self.assertEqual(unicodedata.lookup(seqname), codepoints)
with self.assertRaises(SyntaxError):
self.checkletter(seqname, None)
with self.assertRaises(KeyError):
unicodedata.ucd_3_2_0.lookup(seqname)
示例6: start_unichar
# 需要导入模块: import unicodedata [as 别名]
# 或者: from unicodedata import lookup [as 别名]
def start_unichar(self, attr):
if 'name' in attr:
if 'code' in attr:
self._syntax_error('<unichar/> invalid with both name and code attributes')
try:
v = unicodedata.lookup(attr['name']).encode('utf8')
except KeyError:
self._syntax_error('<unichar/> invalid name attribute\n"%s"' % name)
v = '\0'
elif 'code' in attr:
try:
v = unichr(int(eval(attr['code']))).encode('utf8')
except:
self._syntax_error('<unichar/> invalid code attribute %s' % attr['code'])
v = '\0'
else:
v = None
if attr:
self._syntax_error('<unichar/> invalid attribute %s' % attr.keys()[0])
if v is not None:
self.handle_data(v)
self._push(_selfClosingTag='unichar')
示例7: _greekletters
# 需要导入模块: import unicodedata [as 别名]
# 或者: from unicodedata import lookup [as 别名]
def _greekletters(letterlist):
for l in letterlist:
ucharname = l.upper()
if ucharname == 'LAMBDA':
ucharname = 'LAMDA'
smallname = "GREEK SMALL LETTER "+ucharname
if ucharname == 'EPSILON':
smallname = "GREEK LUNATE EPSILON SYMBOL"
if ucharname == 'PHI':
smallname = "GREEK PHI SYMBOL"
_latex_specs_base['macros'].append(
MacroTextSpec(l, unicodedata.lookup(smallname))
)
_latex_specs_base['macros'].append(
MacroTextSpec(l[0].upper()+l[1:], unicodedata.lookup("GREEK CAPITAL LETTER "+ucharname))
)
示例8: test_unicode_whitespace
# 需要导入模块: import unicodedata [as 别名]
# 或者: from unicodedata import lookup [as 别名]
def test_unicode_whitespace(self):
# Test for http://bugs.jython.org/issue2226
ws_re = re.compile(r'\s', re.UNICODE)
not_ws_re = re.compile(r'\S', re.UNICODE)
separator_categories = set(['Zl', 'Zp', 'Zs'])
separators = {chr(c) for c in [28, 29, 30, 31]}
special = set([
unicodedata.lookup('MONGOLIAN VOWEL SEPARATOR'),
u'\u0085', # NEXT LINE (NEL)
])
cpython_whitespace = set(' \t\n\r\f\v') | separators | special
for i in xrange(0xFFFF): # could test to sys.maxunicode, but does not appear to be necessary
if i >= 0xD800 and i <= 0xDFFF:
continue
c = unichr(i)
if c in cpython_whitespace or category(c) in separator_categories:
self.assertRegexpMatches(c, ws_re)
self.assertNotRegexpMatches(c, not_ws_re)
else:
self.assertNotRegexpMatches(c, ws_re)
self.assertRegexpMatches(c, not_ws_re)
示例9: _greekletters
# 需要导入模块: import unicodedata [as 别名]
# 或者: from unicodedata import lookup [as 别名]
def _greekletters(letterlist):
for l in letterlist:
ucharname = l.upper()
if (ucharname == 'LAMBDA'):
ucharname = 'LAMDA'
smallname = "GREEK SMALL LETTER "+ucharname;
if (ucharname == 'EPSILON'):
smallname = "GREEK LUNATE EPSILON SYMBOL"
if (ucharname == 'PHI'):
smallname = "GREEK PHI SYMBOL"
_default_macro_list.append(
(l, unicodedata.lookup(smallname))
);
_default_macro_list.append(
(l[0].upper()+l[1:], unicodedata.lookup("GREEK CAPITAL LETTER "+ucharname))
);
示例10: test_unicode
# 需要导入模块: import unicodedata [as 别名]
# 或者: from unicodedata import lookup [as 别名]
def test_unicode(self, sparse):
# See GH 6885 - get_dummies chokes on unicode values
import unicodedata
e = 'e'
eacute = unicodedata.lookup('LATIN SMALL LETTER E WITH ACUTE')
s = [e, eacute, eacute]
res = get_dummies(s, prefix='letter', sparse=sparse)
exp = DataFrame({'letter_e': [1, 0, 0],
u('letter_%s') % eacute: [0, 1, 1]},
dtype=np.uint8)
if sparse:
exp = exp.apply(pd.SparseArray, fill_value=0)
assert_frame_equal(res, exp)
示例11: test_ascii_letters
# 需要导入模块: import unicodedata [as 别名]
# 或者: from unicodedata import lookup [as 别名]
def test_ascii_letters(self):
import unicodedata
for char in "".join(map(chr, xrange(ord("a"), ord("z")))):
name = "LATIN SMALL LETTER %s" % char.upper()
code = unicodedata.lookup(name)
self.assertEqual(unicodedata.name(code), name)
示例12: test_bmp_characters
# 需要导入模块: import unicodedata [as 别名]
# 或者: from unicodedata import lookup [as 别名]
def test_bmp_characters(self):
import unicodedata
count = 0
for code in xrange(0x10000):
char = unichr(code)
name = unicodedata.name(char, None)
if name is not None:
self.assertEqual(unicodedata.lookup(name), char)
count += 1
示例13: test_errors
# 需要导入模块: import unicodedata [as 别名]
# 或者: from unicodedata import lookup [as 别名]
def test_errors(self):
import unicodedata
self.assertRaises(TypeError, unicodedata.name)
self.assertRaises(TypeError, unicodedata.name, u'xx')
self.assertRaises(TypeError, unicodedata.lookup)
self.assertRaises(KeyError, unicodedata.lookup, u'unknown')
示例14: test_unicode
# 需要导入模块: import unicodedata [as 别名]
# 或者: from unicodedata import lookup [as 别名]
def test_unicode(self, sparse):
# See GH 6885 - get_dummies chokes on unicode values
import unicodedata
e = 'e'
eacute = unicodedata.lookup('LATIN SMALL LETTER E WITH ACUTE')
s = [e, eacute, eacute]
res = get_dummies(s, prefix='letter', sparse=sparse)
exp = DataFrame({'letter_e': [1, 0, 0],
u('letter_%s') % eacute: [0, 1, 1]},
dtype=np.uint8)
assert_frame_equal(res, exp)
示例15: U
# 需要导入模块: import unicodedata [as 别名]
# 或者: from unicodedata import lookup [as 别名]
def U(name):
"""unicode character by name or None if not found"""
try:
u = unicodedata.lookup(name)
except KeyError:
u = None
global unicode_warnings
unicode_warnings += 'No \'%s\' in unicodedata\n' % name
return u