當前位置: 首頁>>代碼示例>>Python>>正文


Python unicodedata.decomposition方法代碼示例

本文整理匯總了Python中unicodedata.decomposition方法的典型用法代碼示例。如果您正苦於以下問題:Python unicodedata.decomposition方法的具體用法?Python unicodedata.decomposition怎麽用?Python unicodedata.decomposition使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在unicodedata的用法示例。


在下文中一共展示了unicodedata.decomposition方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: test_ipy2_gh357

# 需要導入模塊: import unicodedata [as 別名]
# 或者: from unicodedata import decomposition [as 別名]
def test_ipy2_gh357(self):
        """https://github.com/IronLanguages/ironpython2/issues/357"""

        import unicodedata

        if is_cli:
            self.assertEqual(unicodedata.name(u'\u4e2d'), '<CJK IDEOGRAPH, FIRST>..<CJK IDEOGRAPH, LAST>')
        else:
            self.assertEqual(unicodedata.name(u'\u4e2d'), 'CJK UNIFIED IDEOGRAPH-4E2D')

        self.assertRaises(ValueError, unicodedata.decimal, u'\u4e2d')
        self.assertEqual(unicodedata.decimal(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.digit, u'\u4e2d')
        self.assertEqual(unicodedata.digit(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.numeric, u'\u4e2d')
        self.assertEqual(unicodedata.numeric(u'\u4e2d', 0), 0)
        self.assertEqual(unicodedata.category(u'\u4e2d'), 'Lo')
        self.assertEqual(unicodedata.bidirectional(u'\u4e2d'), 'L')
        self.assertEqual(unicodedata.combining(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.east_asian_width(u'\u4e2d'), 'W')
        self.assertEqual(unicodedata.mirrored(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.decomposition(u'\u4e2d'), '') 
開發者ID:IronLanguages,項目名稱:ironpython2,代碼行數:24,代碼來源:test_regressions.py

示例2: test_urlsplit_normalization

# 需要導入模塊: import unicodedata [as 別名]
# 或者: from unicodedata import decomposition [as 別名]
def test_urlsplit_normalization(self):
        # Certain characters should never occur in the netloc,
        # including under normalization.
        # Ensure that ALL of them are detected and cause an error
        illegal_chars = '/:#?@'
        hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars}
        denorm_chars = [
            c for c in map(chr, range(128, sys.maxunicode))
            if (hex_chars & set(unicodedata.decomposition(c).split()))
            and c not in illegal_chars
        ]
        # Sanity check that we found at least one such character
        self.assertIn('\u2100', denorm_chars)
        self.assertIn('\uFF03', denorm_chars)

        for scheme in ["http", "https", "ftp"]:
            for c in denorm_chars:
                url = "{}://netloc{}false.netloc/path".format(scheme, c)
                with self.subTest(url=url, char='{:04X}'.format(ord(c))):
                    with self.assertRaises(ValueError):
                        urllib.parse.urlsplit(url) 
開發者ID:bkerler,項目名稱:android_universal,代碼行數:23,代碼來源:test_urlparse.py

示例3: test_urlsplit_normalization

# 需要導入模塊: import unicodedata [as 別名]
# 或者: from unicodedata import decomposition [as 別名]
def test_urlsplit_normalization(self):
        # Certain characters should never occur in the netloc,
        # including under normalization.
        # Ensure that ALL of them are detected and cause an error
        illegal_chars = '/:#?@'
        hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars}
        maxunicode = 0xffff if sys.implementation.name == "ironpython" else sys.maxunicode # https://github.com/IronLanguages/ironpython3/issues/252
        denorm_chars = [
            c for c in map(chr, range(128, maxunicode))
            if (hex_chars & set(unicodedata.decomposition(c).split()))
            and c not in illegal_chars
        ]
        # Sanity check that we found at least one such character
        self.assertIn('\u2100', denorm_chars)
        self.assertIn('\uFF03', denorm_chars)

        # https://github.com/IronLanguages/ironpython3/issues/614
        is_mono = False
        mono_issue_chars = ("\ufe13", "\ufe16", "\ufe5f")
        if sys.implementation.name == "ironpython":
            import clr
            is_mono = clr.IsMono

        for scheme in ["http", "https", "ftp"]:
            for c in denorm_chars:
                url = "{}://netloc{}false.netloc/path".format(scheme, c)
                with self.subTest(url=url, char='{:04X}'.format(ord(c))):
                    if is_mono and c in mono_issue_chars:
                        urllib.parse.urlsplit(url) # ensure we fail if this ever gets fixed
                        continue
                    with self.assertRaises(ValueError):
                        urllib.parse.urlsplit(url) 
開發者ID:IronLanguages,項目名稱:ironpython3,代碼行數:34,代碼來源:test_urlparse.py

示例4: clean_unicode

# 需要導入模塊: import unicodedata [as 別名]
# 或者: from unicodedata import decomposition [as 別名]
def clean_unicode(s):
    s = s.replace("\u00ad", "")  # soft hyphen
    s = s.replace("\u2010", "-")  # hyphen

    # Some sources encode an i with an accent above using dotless i,
    # which must be converted to normal i
    s = list(s)
    for i in range(len(s) - 1):
        # bug: we should only be looking for accents above, not
        # below
        if s[i] == "ı" and unicodedata.category(s[i + 1]) == "Mn":
            s[i] = "i"
    s = "".join(s)

    # Selectively apply compatibility decomposition.
    # This converts, e.g., fi to fi and : to :, but not ² to 2.
    # Unsure: … to ...
    # More classes could be added here.
    def decompose(c):
        d = unicodedata.decomposition(c)
        if d and d.split(None, 1)[0] in ["<compat>", "<wide>", "<narrow>", "<noBreak>"]:
            return unicodedata.normalize("NFKD", c)
        else:
            return c

    s = "".join(map(decompose, s))

    # Convert combining characters when possible
    s = unicodedata.normalize("NFC", s)

    return s 
開發者ID:acl-org,項目名稱:acl-anthology,代碼行數:33,代碼來源:normalize_anth.py

示例5: __missing__

# 需要導入模塊: import unicodedata [as 別名]
# 或者: from unicodedata import decomposition [as 別名]
def __missing__(self, key):
        ch = self.get(key)
        if ch is not None:
            return ch
        de = unicodedata.decomposition(chr(key))
        if de:
            try:
                ch = int(de.split(None, 1)[0], 16)
            except (IndexError, ValueError):
                ch = key
        else:
            ch = key
        self[key] = ch
        return ch 
開發者ID:PacktPublishing,項目名稱:Modern-Python-Standard-Library-Cookbook,代碼行數:16,代碼來源:text_07.py

示例6: test_urlsplit_normalization

# 需要導入模塊: import unicodedata [as 別名]
# 或者: from unicodedata import decomposition [as 別名]
def test_urlsplit_normalization(self):
        # Certain characters should never occur in the netloc,
        # including under normalization.
        # Ensure that ALL of them are detected and cause an error
        illegal_chars = u'/:#?@'
        hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars}
        denorm_chars = [
            c for c in map(unichr, range(128, sys.maxunicode))
            if (hex_chars & set(unicodedata.decomposition(c).split()))
            and c not in illegal_chars
        ]
        # Sanity check that we found at least one such character
        self.assertIn(u'\u2100', denorm_chars)
        self.assertIn(u'\uFF03', denorm_chars)

        # https://github.com/IronLanguages/ironpython3/issues/614
        is_mono = False
        mono_issue_chars = (u"\ufe13", u"\ufe16", u"\ufe5f")
        if sys.platform == "cli":
            import clr
            is_mono = clr.IsMono

        # bpo-36742: Verify port separators are ignored when they
        # existed prior to decomposition
        urlparse.urlsplit(u'http://\u30d5\u309a:80')
        with self.assertRaises(ValueError):
            urlparse.urlsplit(u'http://\u30d5\u309a\ufe1380')
            if is_mono: raise ValueError
        if is_mono: urlparse.urlsplit(u'http://\u30d5\u309a\ufe1380') # ensure we fail if this ever gets fixed

        for scheme in [u"http", u"https", u"ftp"]:
            for netloc in [u"netloc{}false.netloc", u"n{}user@netloc"]:
                for c in denorm_chars:
                    url = u"{}://{}/path".format(scheme, netloc.format(c))
                    if test_support.verbose:
                        print "Checking %r" % url
                    if is_mono and c in mono_issue_chars:
                        urlparse.urlsplit(url) # ensure we fail if this ever gets fixed
                        continue
                    with self.assertRaises(ValueError):
                        urlparse.urlsplit(url)

        # check error message: invalid netloc must be formated with repr()
        # to get an ASCII error message
        with self.assertRaises(ValueError) as cm:
            urlparse.urlsplit(u'http://example.com\uFF03@bing.com')
        self.assertEqual(str(cm.exception),
                         "netloc u'example.com\\uff03@bing.com' contains invalid characters "
                         "under NFKC normalization")
        self.assertIsInstance(cm.exception.args[0], str) 
開發者ID:IronLanguages,項目名稱:ironpython2,代碼行數:52,代碼來源:test_urlparse.py


注:本文中的unicodedata.decomposition方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。