当前位置: 首页>>代码示例>>Python>>正文


Python unicodedata.decomposition方法代码示例

本文整理汇总了Python中unicodedata.decomposition方法的典型用法代码示例。如果您正苦于以下问题:Python unicodedata.decomposition方法的具体用法?Python unicodedata.decomposition怎么用?Python unicodedata.decomposition使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在unicodedata的用法示例。


在下文中一共展示了unicodedata.decomposition方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_ipy2_gh357

# 需要导入模块: import unicodedata [as 别名]
# 或者: from unicodedata import decomposition [as 别名]
def test_ipy2_gh357(self):
        """https://github.com/IronLanguages/ironpython2/issues/357"""

        import unicodedata

        if is_cli:
            self.assertEqual(unicodedata.name(u'\u4e2d'), '<CJK IDEOGRAPH, FIRST>..<CJK IDEOGRAPH, LAST>')
        else:
            self.assertEqual(unicodedata.name(u'\u4e2d'), 'CJK UNIFIED IDEOGRAPH-4E2D')

        self.assertRaises(ValueError, unicodedata.decimal, u'\u4e2d')
        self.assertEqual(unicodedata.decimal(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.digit, u'\u4e2d')
        self.assertEqual(unicodedata.digit(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.numeric, u'\u4e2d')
        self.assertEqual(unicodedata.numeric(u'\u4e2d', 0), 0)
        self.assertEqual(unicodedata.category(u'\u4e2d'), 'Lo')
        self.assertEqual(unicodedata.bidirectional(u'\u4e2d'), 'L')
        self.assertEqual(unicodedata.combining(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.east_asian_width(u'\u4e2d'), 'W')
        self.assertEqual(unicodedata.mirrored(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.decomposition(u'\u4e2d'), '') 
开发者ID:IronLanguages,项目名称:ironpython2,代码行数:24,代码来源:test_regressions.py

示例2: test_urlsplit_normalization

# 需要导入模块: import unicodedata [as 别名]
# 或者: from unicodedata import decomposition [as 别名]
def test_urlsplit_normalization(self):
        # Certain characters should never occur in the netloc,
        # including under normalization.
        # Ensure that ALL of them are detected and cause an error
        illegal_chars = '/:#?@'
        hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars}
        denorm_chars = [
            c for c in map(chr, range(128, sys.maxunicode))
            if (hex_chars & set(unicodedata.decomposition(c).split()))
            and c not in illegal_chars
        ]
        # Sanity check that we found at least one such character
        self.assertIn('\u2100', denorm_chars)
        self.assertIn('\uFF03', denorm_chars)

        for scheme in ["http", "https", "ftp"]:
            for c in denorm_chars:
                url = "{}://netloc{}false.netloc/path".format(scheme, c)
                with self.subTest(url=url, char='{:04X}'.format(ord(c))):
                    with self.assertRaises(ValueError):
                        urllib.parse.urlsplit(url) 
开发者ID:bkerler,项目名称:android_universal,代码行数:23,代码来源:test_urlparse.py

示例3: test_urlsplit_normalization

# 需要导入模块: import unicodedata [as 别名]
# 或者: from unicodedata import decomposition [as 别名]
def test_urlsplit_normalization(self):
        # Certain characters should never occur in the netloc,
        # including under normalization.
        # Ensure that ALL of them are detected and cause an error
        illegal_chars = '/:#?@'
        hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars}
        maxunicode = 0xffff if sys.implementation.name == "ironpython" else sys.maxunicode # https://github.com/IronLanguages/ironpython3/issues/252
        denorm_chars = [
            c for c in map(chr, range(128, maxunicode))
            if (hex_chars & set(unicodedata.decomposition(c).split()))
            and c not in illegal_chars
        ]
        # Sanity check that we found at least one such character
        self.assertIn('\u2100', denorm_chars)
        self.assertIn('\uFF03', denorm_chars)

        # https://github.com/IronLanguages/ironpython3/issues/614
        is_mono = False
        mono_issue_chars = ("\ufe13", "\ufe16", "\ufe5f")
        if sys.implementation.name == "ironpython":
            import clr
            is_mono = clr.IsMono

        for scheme in ["http", "https", "ftp"]:
            for c in denorm_chars:
                url = "{}://netloc{}false.netloc/path".format(scheme, c)
                with self.subTest(url=url, char='{:04X}'.format(ord(c))):
                    if is_mono and c in mono_issue_chars:
                        urllib.parse.urlsplit(url) # ensure we fail if this ever gets fixed
                        continue
                    with self.assertRaises(ValueError):
                        urllib.parse.urlsplit(url) 
开发者ID:IronLanguages,项目名称:ironpython3,代码行数:34,代码来源:test_urlparse.py

示例4: clean_unicode

# 需要导入模块: import unicodedata [as 别名]
# 或者: from unicodedata import decomposition [as 别名]
def clean_unicode(s):
    s = s.replace("\u00ad", "")  # soft hyphen
    s = s.replace("\u2010", "-")  # hyphen

    # Some sources encode an i with an accent above using dotless i,
    # which must be converted to normal i
    s = list(s)
    for i in range(len(s) - 1):
        # bug: we should only be looking for accents above, not
        # below
        if s[i] == "ı" and unicodedata.category(s[i + 1]) == "Mn":
            s[i] = "i"
    s = "".join(s)

    # Selectively apply compatibility decomposition.
    # This converts, e.g., fi to fi and : to :, but not ² to 2.
    # Unsure: … to ...
    # More classes could be added here.
    def decompose(c):
        d = unicodedata.decomposition(c)
        if d and d.split(None, 1)[0] in ["<compat>", "<wide>", "<narrow>", "<noBreak>"]:
            return unicodedata.normalize("NFKD", c)
        else:
            return c

    s = "".join(map(decompose, s))

    # Convert combining characters when possible
    s = unicodedata.normalize("NFC", s)

    return s 
开发者ID:acl-org,项目名称:acl-anthology,代码行数:33,代码来源:normalize_anth.py

示例5: __missing__

# 需要导入模块: import unicodedata [as 别名]
# 或者: from unicodedata import decomposition [as 别名]
def __missing__(self, key):
        ch = self.get(key)
        if ch is not None:
            return ch
        de = unicodedata.decomposition(chr(key))
        if de:
            try:
                ch = int(de.split(None, 1)[0], 16)
            except (IndexError, ValueError):
                ch = key
        else:
            ch = key
        self[key] = ch
        return ch 
开发者ID:PacktPublishing,项目名称:Modern-Python-Standard-Library-Cookbook,代码行数:16,代码来源:text_07.py

示例6: test_urlsplit_normalization

# 需要导入模块: import unicodedata [as 别名]
# 或者: from unicodedata import decomposition [as 别名]
def test_urlsplit_normalization(self):
        # Certain characters should never occur in the netloc,
        # including under normalization.
        # Ensure that ALL of them are detected and cause an error
        illegal_chars = u'/:#?@'
        hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars}
        denorm_chars = [
            c for c in map(unichr, range(128, sys.maxunicode))
            if (hex_chars & set(unicodedata.decomposition(c).split()))
            and c not in illegal_chars
        ]
        # Sanity check that we found at least one such character
        self.assertIn(u'\u2100', denorm_chars)
        self.assertIn(u'\uFF03', denorm_chars)

        # https://github.com/IronLanguages/ironpython3/issues/614
        is_mono = False
        mono_issue_chars = (u"\ufe13", u"\ufe16", u"\ufe5f")
        if sys.platform == "cli":
            import clr
            is_mono = clr.IsMono

        # bpo-36742: Verify port separators are ignored when they
        # existed prior to decomposition
        urlparse.urlsplit(u'http://\u30d5\u309a:80')
        with self.assertRaises(ValueError):
            urlparse.urlsplit(u'http://\u30d5\u309a\ufe1380')
            if is_mono: raise ValueError
        if is_mono: urlparse.urlsplit(u'http://\u30d5\u309a\ufe1380') # ensure we fail if this ever gets fixed

        for scheme in [u"http", u"https", u"ftp"]:
            for netloc in [u"netloc{}false.netloc", u"n{}user@netloc"]:
                for c in denorm_chars:
                    url = u"{}://{}/path".format(scheme, netloc.format(c))
                    if test_support.verbose:
                        print "Checking %r" % url
                    if is_mono and c in mono_issue_chars:
                        urlparse.urlsplit(url) # ensure we fail if this ever gets fixed
                        continue
                    with self.assertRaises(ValueError):
                        urlparse.urlsplit(url)

        # check error message: invalid netloc must be formated with repr()
        # to get an ASCII error message
        with self.assertRaises(ValueError) as cm:
            urlparse.urlsplit(u'http://example.com\uFF03@bing.com')
        self.assertEqual(str(cm.exception),
                         "netloc u'example.com\\uff03@bing.com' contains invalid characters "
                         "under NFKC normalization")
        self.assertIsInstance(cm.exception.args[0], str) 
开发者ID:IronLanguages,项目名称:ironpython2,代码行数:52,代码来源:test_urlparse.py


注:本文中的unicodedata.decomposition方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。