当前位置: 首页>>代码示例>>Python>>正文


Python six.unichr方法代码示例

本文整理汇总了Python中six.unichr方法的典型用法代码示例。如果您正苦于以下问题:Python six.unichr方法的具体用法?Python six.unichr怎么用?Python six.unichr使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在six的用法示例。


在下文中一共展示了six.unichr方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _unescape_token

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def _unescape_token(escaped_token):
  """Inverse of _escape_token().

  Args:
    escaped_token: a unicode string

  Returns:
    token: a unicode string
  """

  def match(m):
    if m.group(1) is None:
      return u"_" if m.group(0) == u"\\u" else u"\\"

    try:
      return six.unichr(int(m.group(1)))
    except (ValueError, OverflowError) as _:
      return u"\u3013"  # Unicode for undefined character.

  trimmed = escaped_token[:-1] if escaped_token.endswith("_") else escaped_token
  return _UNESCAPE_REGEX.sub(match, trimmed) 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:23,代码来源:text_encoder.py

示例2: getRaw

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def getRaw(self, idx):
        if idx < 0 or self.m_stringOffsets == [] or idx >= len(self.m_stringOffsets):
            return None

        offset = self.m_stringOffsets[ idx ].get_value()
        length = self.getShort(self.m_strings, offset)

        data = ""

        while length > 0:
            offset += 2
            # get the unicode character as the apk might contain non-ASCII label
            data += six.unichr(self.getShort(self.m_strings, offset))

            # FIXME
            if data[-1] == "&":
                data = data[:-1]

            length -= 1

        return data 
开发者ID:AirtestProject,项目名称:Airtest,代码行数:23,代码来源:stringblock.py

示例3: handle_charref

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def handle_charref(self, name):
        """
        Called when a char ref like '&#8212;' or '&#x2014' is found

        `name` is the char ref without ampersand and semicolon (e.g. `#8212` or
        `#x2014`)
        """
        try:
            if name.startswith('x'):
                codepoint = int(name[1:], 16)
            else:
                codepoint = int(name)
            char = six.unichr(codepoint)
        except (ValueError, OverflowError):
            char = ''
        self._handle_ref('#' + name, char) 
开发者ID:honmaple,项目名称:maple-blog,代码行数:18,代码来源:markup.py

示例4: _unescape_token

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def _unescape_token(escaped_token):
    """Inverse of _escape_token().

    Args:
        escaped_token: a unicode string

    Returns:
        token: a unicode string
    """

    def match(m):
        if m.group(1) is None:
            return u"_" if m.group(0) == u"\\u" else u"\\"

        try:
            return six.unichr(int(m.group(1)))
        except (ValueError, OverflowError) as _:
            return u"\u3013"    # Unicode for undefined character.

    trimmed = escaped_token[:-
                            1] if escaped_token.endswith("_") else escaped_token
    return _UNESCAPE_REGEX.sub(match, trimmed) 
开发者ID:at16k,项目名称:at16k,代码行数:24,代码来源:text_encoder.py

示例5: join_phonemes

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def join_phonemes(*args):
    """Joins a Hangul letter from Korean phonemes."""
    # Normalize arguments as onset, nucleus, coda.
    if len(args) == 1:
        # tuple of (onset, nucleus[, coda])
        args = args[0]
    if len(args) == 2:
        args += (CODAS[0],)
    try:
        onset, nucleus, coda = args
    except ValueError:
        raise TypeError('join_phonemes() takes at most 3 arguments')
    offset = (
        (ONSETS.index(onset) * NUM_NUCLEUSES + NUCLEUSES.index(nucleus)) *
        NUM_CODAS + CODAS.index(coda)
    )
    return unichr(FIRST_HANGUL_OFFSET + offset) 
开发者ID:what-studio,项目名称:tossi,代码行数:19,代码来源:hangul.py

示例6: test_import_file_path_utf8_encoded_ascii_latin_nbsp

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def test_import_file_path_utf8_encoded_ascii_latin_nbsp():
    temporary_folder, folder = helper.create_working_folder()
    temporary_folder_destination, folder_destination = helper.create_working_folder()

    origin = text_type(folder)+u'/unicode'+six_unichr(160)+u'filename.txt'
    # encode the unicode string to ascii
    origin = origin.encode('utf-8')

    shutil.copyfile(helper.get_file('valid.txt'), origin)

    helper.reset_dbs()
    dest_path = elodie.import_file(origin, folder_destination, False, False, False)
    helper.restore_dbs()

    shutil.rmtree(folder)
    shutil.rmtree(folder_destination)

    assert helper.path_tz_fix(os.path.join('2016-04-Apr','London',u'2016-04-07_11-15-26-unicode\xa0filename-sample-title.txt')) in dest_path, dest_path 
开发者ID:jmathai,项目名称:elodie,代码行数:20,代码来源:elodie_test.py

示例7: test_import_file_path_unicode_latin_nbsp

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def test_import_file_path_unicode_latin_nbsp():
    temporary_folder, folder = helper.create_working_folder()
    temporary_folder_destination, folder_destination = helper.create_working_folder()

    origin = text_type(folder)+u'/unicode'+six_unichr(160)+u'filename.txt'

    shutil.copyfile(helper.get_file('valid.txt'), origin)

    helper.reset_dbs()
    dest_path = elodie.import_file(origin, folder_destination, False, False, False)
    helper.restore_dbs()

    shutil.rmtree(folder)
    shutil.rmtree(folder_destination)

    assert helper.path_tz_fix(os.path.join('2016-04-Apr','London',u'2016-04-07_11-15-26-unicode\xa0filename-sample-title.txt')) in dest_path, dest_path 
开发者ID:jmathai,项目名称:elodie,代码行数:18,代码来源:elodie_test.py

示例8: _narrow_unichr

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def _narrow_unichr(code_point):
    """Retrieves the unicode character representing any given code point, in a way that won't break on narrow builds.

    This is necessary because the built-in unichr function will fail for ordinals above 0xFFFF on narrow builds (UCS2);
    ordinals above 0xFFFF would require recalculating and combining surrogate pairs. This avoids that by retrieving the
    unicode character that was initially read.

    Args:
        code_point (int|CodePoint): An int or a subclass of int that contains the unicode character representing its
            code point in an attribute named 'char'.
    """
    try:
        if len(code_point.char) > 1:
            return code_point.char
    except AttributeError:
        pass
    return six.unichr(code_point) 
开发者ID:amzn,项目名称:ion-python,代码行数:19,代码来源:reader.py

示例9: test_exception_converted

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def test_exception_converted(self):
        self.assertRaises(exception.InvalidInput,
                          utils.validate_integer,
                          "im-not-an-int", "not-an-int")
        self.assertRaises(exception.InvalidInput,
                          utils.validate_integer,
                          3.14, "Pie")
        self.assertRaises(exception.InvalidInput,
                          utils.validate_integer,
                          "299", "Sparta no-show",
                          min_value=300, max_value=300)
        self.assertRaises(exception.InvalidInput,
                          utils.validate_integer,
                          55, "doing 55 in a 54",
                          max_value=54)
        self.assertRaises(exception.InvalidInput,
                          utils.validate_integer,
                          six.unichr(129), "UnicodeError",
                          max_value=1000) 
开发者ID:openstack,项目名称:masakari,代码行数:21,代码来源:test_utils.py

示例10: _unescape_token

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def _unescape_token(escaped_token):
  """Inverse of _escape_token().

  Args:
    escaped_token: a unicode string

  Returns:
    token: a unicode string
  """

  def match(m):
    if m.group(1) is None:
      return u"_" if m.group(0) == u"\\u" else u"\\"

    try:
      return six.unichr(int(m.group(1)))
    except (ValueError, OverflowError) as _:
      return ""

  trimmed = escaped_token[:-1] if escaped_token.endswith("_") else escaped_token
  return _UNESCAPE_REGEX.sub(match, trimmed) 
开发者ID:eBay,项目名称:Sequence-Semantic-Embedding,代码行数:23,代码来源:text_encoder.py

示例11: test_unicode_combining_characters

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def test_unicode_combining_characters(self):
        # Some unicode characters can be represented in multiple ways - for
        # example, an accented character may be a single code point (with the
        # accent baked in), or it may be the "normal" letter with a combining
        # code point. See https://docs.python.org/2/library/unicodedata.html.
        # The points below are for a capital C with a cedilla, first as a
        # composite character, second as a pairing of C and the cedilla
        # combining character.
        composite = six.unichr(0xC7)
        combining = six.unichr(0x43) + six.unichr(0x0327)

        # Test combinations of search and character
        for text in composite, combining:
            searcher = TextSearcher(text)
            self.assertIsNotNone(searcher.search(composite))
            self.assertIsNotNone(searcher.search(combining)) 
开发者ID:digidotcom,项目名称:python-streamexpect,代码行数:18,代码来源:test_streamexpect.py

示例12: ntou

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def ntou(n, encoding='ISO-8859-1'):
        """Return the native string as Unicode with the given encoding."""
        assert_native(n)
        # In Python 2, the native string type is bytes.
        # First, check for the special encoding 'escape'. The test suite uses
        # this to signal that it wants to pass a string with embedded \uXXXX
        # escapes, but without having to prefix it with u'' for Python 2,
        # but no prefix for Python 3.
        if encoding == 'escape':
            return re.sub(
                r'\\u([0-9a-zA-Z]{4})',
                lambda m: six.unichr(int(m.group(1), 16)),
                n.decode('ISO-8859-1'),
            )
        # Assume it's already in the given encoding, which for ISO-8859-1
        # is almost always what was intended.
        return n.decode(encoding) 
开发者ID:cherrypy,项目名称:cheroot,代码行数:19,代码来源:_compat.py

示例13: compose

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def compose(chosung, joongsung, jongsung=u''):
    """This function returns a Hangul letter by composing the specified chosung, joongsung, and jongsung.
    @param chosung
    @param joongsung
    @param jongsung the terminal Hangul letter. This is optional if you do not need a jongsung."""

    if jongsung is None: jongsung = u''

    try:
        chosung_index = CHO.index(chosung)
        joongsung_index = JOONG.index(joongsung)
        jongsung_index = JONG.index(jongsung)
    except Exception:
        raise NotHangulException('No valid Hangul character index')

    return unichr(0xAC00 + chosung_index * NUM_JOONG * NUM_JONG + joongsung_index * NUM_JONG + jongsung_index) 
开发者ID:bluedisk,项目名称:hangul-toolkit,代码行数:18,代码来源:letter.py

示例14: property_chars

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def property_chars(self, prefix):
    return "".join(six.unichr(x) for x in range(sys.maxunicode)
                   if unicodedata.category(six.unichr(x)).startswith(prefix)) 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:5,代码来源:bleu_hook.py

示例15: test_invertibility_on_random_strings

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def test_invertibility_on_random_strings(self):
    for _ in range(1000):
      s = u"".join(six.unichr(random.randint(0, 65535)) for _ in range(10))
      self.assertEqual(s, tokenizer.decode(tokenizer.encode(s))) 
开发者ID:akzaidi,项目名称:fine-lm,代码行数:6,代码来源:tokenizer_test.py


注:本文中的six.unichr方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。