本文整理匯總了Python中unicodedata.lookup方法的典型用法代碼示例。如果您正苦於以下問題:Python unicodedata.lookup方法的具體用法?Python unicodedata.lookup怎麽用?Python unicodedata.lookup使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類unicodedata
的用法示例。
在下文中一共展示了unicodedata.lookup方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: replace_unicode
# 需要導入模塊: import unicodedata [as 別名]
# 或者: from unicodedata import lookup [as 別名]
def replace_unicode(self, m):
"""Replace escapes."""
groups = m.groupdict()
esc = m.group(0)
if groups.get('fesc'):
value = m.group(0)
elif groups.get('format'):
value = ' '
elif groups.get('special'):
value = BACK_SLASH_TRANSLATION[esc]
elif groups.get('char'):
try:
value = chr(int(esc[2:], 16))
except Exception:
value = esc
elif groups.get('oct'):
value = chr(int(esc[1:], 8))
elif groups.get('name'):
try:
value = unicodedata.lookup(esc[3:-1])
except Exception:
value = esc
return value.replace('\x00', '\n')
示例2: start_unichar
# 需要導入模塊: import unicodedata [as 別名]
# 或者: from unicodedata import lookup [as 別名]
def start_unichar(self, attr):
if 'name' in attr:
if 'code' in attr:
self._syntax_error('<unichar/> invalid with both name and code attributes')
try:
v = unicodedata.lookup(attr['name'])
except KeyError:
self._syntax_error('<unichar/> invalid name attribute\n"%s"' % ascii(attr['name']))
v = '\0'
elif 'code' in attr:
try:
v = int(eval(attr['code']))
v = chr(v) if isPy3 else unichr(v)
except:
self._syntax_error('<unichar/> invalid code attribute %s' % ascii(attr['code']))
v = '\0'
else:
v = None
if attr:
self._syntax_error('<unichar/> invalid attribute %s' % list(attr.keys())[0])
if v is not None:
self.handle_data(v)
self._push('unichar',_selfClosingTag='unichar')
示例3: test_aliases
# 需要導入模塊: import unicodedata [as 別名]
# 或者: from unicodedata import lookup [as 別名]
def test_aliases(self):
# Check that the aliases defined in the NameAliases.txt file work.
# This should be updated when new aliases are added or the file
# should be downloaded and parsed instead. See #12753.
aliases = [
('LATIN CAPITAL LETTER GHA', 0x01A2),
('LATIN SMALL LETTER GHA', 0x01A3),
('KANNADA LETTER LLLA', 0x0CDE),
('LAO LETTER FO FON', 0x0E9D),
('LAO LETTER FO FAY', 0x0E9F),
('LAO LETTER RO', 0x0EA3),
('LAO LETTER LO', 0x0EA5),
('TIBETAN MARK BKA- SHOG GI MGO RGYAN', 0x0FD0),
('YI SYLLABLE ITERATION MARK', 0xA015),
('PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET', 0xFE18),
('BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS', 0x1D0C5)
]
for alias, codepoint in aliases:
self.checkletter(alias, chr(codepoint))
name = unicodedata.name(chr(codepoint))
self.assertNotEqual(name, alias)
self.assertEqual(unicodedata.lookup(alias),
unicodedata.lookup(name))
with self.assertRaises(KeyError):
unicodedata.ucd_3_2_0.lookup(alias)
示例4: greek_letter_name_to_unicode
# 需要導入模塊: import unicodedata [as 別名]
# 或者: from unicodedata import lookup [as 別名]
def greek_letter_name_to_unicode(letter):
# type: (str) -> str
"""
Return a greek letter name as a Unicode character.
Examples
--------
Lamda -> Λ (Unicodedata library uses "lamda" for "lambda" :S!)
Omega -> Ω
omega -> ω
"""
return unicodedata.lookup(
"GREEK {case} LETTER {name}".format(
case="SMALL" if letter == letter.lower() else "CAPITAL", name=letter.upper()
)
)
示例5: test_named_sequences_full
# 需要導入模塊: import unicodedata [as 別名]
# 或者: from unicodedata import lookup [as 別名]
def test_named_sequences_full(self):
# Check all the named sequences
url = ("http://www.pythontest.net/unicode/%s/NamedSequences.txt" %
unicodedata.unidata_version)
try:
testdata = support.open_urlresource(url, encoding="utf-8",
check=check_version)
except (OSError, HTTPException):
self.skipTest("Could not retrieve " + url)
self.addCleanup(testdata.close)
for line in testdata:
line = line.strip()
if not line or line.startswith('#'):
continue
seqname, codepoints = line.split(';')
codepoints = ''.join(chr(int(cp, 16)) for cp in codepoints.split())
self.assertEqual(unicodedata.lookup(seqname), codepoints)
with self.assertRaises(SyntaxError):
self.checkletter(seqname, None)
with self.assertRaises(KeyError):
unicodedata.ucd_3_2_0.lookup(seqname)
示例6: start_unichar
# 需要導入模塊: import unicodedata [as 別名]
# 或者: from unicodedata import lookup [as 別名]
def start_unichar(self, attr):
if 'name' in attr:
if 'code' in attr:
self._syntax_error('<unichar/> invalid with both name and code attributes')
try:
v = unicodedata.lookup(attr['name']).encode('utf8')
except KeyError:
self._syntax_error('<unichar/> invalid name attribute\n"%s"' % name)
v = '\0'
elif 'code' in attr:
try:
v = unichr(int(eval(attr['code']))).encode('utf8')
except:
self._syntax_error('<unichar/> invalid code attribute %s' % attr['code'])
v = '\0'
else:
v = None
if attr:
self._syntax_error('<unichar/> invalid attribute %s' % attr.keys()[0])
if v is not None:
self.handle_data(v)
self._push(_selfClosingTag='unichar')
示例7: _greekletters
# 需要導入模塊: import unicodedata [as 別名]
# 或者: from unicodedata import lookup [as 別名]
def _greekletters(letterlist):
for l in letterlist:
ucharname = l.upper()
if ucharname == 'LAMBDA':
ucharname = 'LAMDA'
smallname = "GREEK SMALL LETTER "+ucharname
if ucharname == 'EPSILON':
smallname = "GREEK LUNATE EPSILON SYMBOL"
if ucharname == 'PHI':
smallname = "GREEK PHI SYMBOL"
_latex_specs_base['macros'].append(
MacroTextSpec(l, unicodedata.lookup(smallname))
)
_latex_specs_base['macros'].append(
MacroTextSpec(l[0].upper()+l[1:], unicodedata.lookup("GREEK CAPITAL LETTER "+ucharname))
)
示例8: test_unicode_whitespace
# 需要導入模塊: import unicodedata [as 別名]
# 或者: from unicodedata import lookup [as 別名]
def test_unicode_whitespace(self):
# Test for http://bugs.jython.org/issue2226
ws_re = re.compile(r'\s', re.UNICODE)
not_ws_re = re.compile(r'\S', re.UNICODE)
separator_categories = set(['Zl', 'Zp', 'Zs'])
separators = {chr(c) for c in [28, 29, 30, 31]}
special = set([
unicodedata.lookup('MONGOLIAN VOWEL SEPARATOR'),
u'\u0085', # NEXT LINE (NEL)
])
cpython_whitespace = set(' \t\n\r\f\v') | separators | special
for i in xrange(0xFFFF): # could test to sys.maxunicode, but does not appear to be necessary
if i >= 0xD800 and i <= 0xDFFF:
continue
c = unichr(i)
if c in cpython_whitespace or category(c) in separator_categories:
self.assertRegexpMatches(c, ws_re)
self.assertNotRegexpMatches(c, not_ws_re)
else:
self.assertNotRegexpMatches(c, ws_re)
self.assertRegexpMatches(c, not_ws_re)
示例9: _greekletters
# 需要導入模塊: import unicodedata [as 別名]
# 或者: from unicodedata import lookup [as 別名]
def _greekletters(letterlist):
for l in letterlist:
ucharname = l.upper()
if (ucharname == 'LAMBDA'):
ucharname = 'LAMDA'
smallname = "GREEK SMALL LETTER "+ucharname;
if (ucharname == 'EPSILON'):
smallname = "GREEK LUNATE EPSILON SYMBOL"
if (ucharname == 'PHI'):
smallname = "GREEK PHI SYMBOL"
_default_macro_list.append(
(l, unicodedata.lookup(smallname))
);
_default_macro_list.append(
(l[0].upper()+l[1:], unicodedata.lookup("GREEK CAPITAL LETTER "+ucharname))
);
示例10: test_unicode
# 需要導入模塊: import unicodedata [as 別名]
# 或者: from unicodedata import lookup [as 別名]
def test_unicode(self, sparse):
# See GH 6885 - get_dummies chokes on unicode values
import unicodedata
e = 'e'
eacute = unicodedata.lookup('LATIN SMALL LETTER E WITH ACUTE')
s = [e, eacute, eacute]
res = get_dummies(s, prefix='letter', sparse=sparse)
exp = DataFrame({'letter_e': [1, 0, 0],
u('letter_%s') % eacute: [0, 1, 1]},
dtype=np.uint8)
if sparse:
exp = exp.apply(pd.SparseArray, fill_value=0)
assert_frame_equal(res, exp)
示例11: test_ascii_letters
# 需要導入模塊: import unicodedata [as 別名]
# 或者: from unicodedata import lookup [as 別名]
def test_ascii_letters(self):
import unicodedata
for char in "".join(map(chr, xrange(ord("a"), ord("z")))):
name = "LATIN SMALL LETTER %s" % char.upper()
code = unicodedata.lookup(name)
self.assertEqual(unicodedata.name(code), name)
示例12: test_bmp_characters
# 需要導入模塊: import unicodedata [as 別名]
# 或者: from unicodedata import lookup [as 別名]
def test_bmp_characters(self):
import unicodedata
count = 0
for code in xrange(0x10000):
char = unichr(code)
name = unicodedata.name(char, None)
if name is not None:
self.assertEqual(unicodedata.lookup(name), char)
count += 1
示例13: test_errors
# 需要導入模塊: import unicodedata [as 別名]
# 或者: from unicodedata import lookup [as 別名]
def test_errors(self):
import unicodedata
self.assertRaises(TypeError, unicodedata.name)
self.assertRaises(TypeError, unicodedata.name, u'xx')
self.assertRaises(TypeError, unicodedata.lookup)
self.assertRaises(KeyError, unicodedata.lookup, u'unknown')
示例14: test_unicode
# 需要導入模塊: import unicodedata [as 別名]
# 或者: from unicodedata import lookup [as 別名]
def test_unicode(self, sparse):
# See GH 6885 - get_dummies chokes on unicode values
import unicodedata
e = 'e'
eacute = unicodedata.lookup('LATIN SMALL LETTER E WITH ACUTE')
s = [e, eacute, eacute]
res = get_dummies(s, prefix='letter', sparse=sparse)
exp = DataFrame({'letter_e': [1, 0, 0],
u('letter_%s') % eacute: [0, 1, 1]},
dtype=np.uint8)
assert_frame_equal(res, exp)
示例15: U
# 需要導入模塊: import unicodedata [as 別名]
# 或者: from unicodedata import lookup [as 別名]
def U(name):
"""unicode character by name or None if not found"""
try:
u = unicodedata.lookup(name)
except KeyError:
u = None
global unicode_warnings
unicode_warnings += 'No \'%s\' in unicodedata\n' % name
return u