Python six.unichr方法代码示例

本文整理汇总了Python中six.unichr方法的典型用法代码示例。如果您正苦于以下问题：Python six.unichr方法的具体用法？Python six.unichr怎么用？Python six.unichr使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类six的用法示例。

在下文中一共展示了six.unichr方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _unescape_token

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def _unescape_token(escaped_token):
  """Inverse of _escape_token().

  Args:
    escaped_token: a unicode string

  Returns:
    token: a unicode string
  """

  def match(m):
    if m.group(1) is None:
      return u"_" if m.group(0) == u"\\u" else u"\\"

    try:
      return six.unichr(int(m.group(1)))
    except (ValueError, OverflowError) as _:
      return u"\u3013"  # Unicode for undefined character.

  trimmed = escaped_token[:-1] if escaped_token.endswith("_") else escaped_token
  return _UNESCAPE_REGEX.sub(match, trimmed)

开发者ID:akzaidi，项目名称:fine-lm，代码行数:23，代码来源:text_encoder.py

示例2: getRaw

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def getRaw(self, idx):
        if idx < 0 or self.m_stringOffsets == [] or idx >= len(self.m_stringOffsets):
            return None

        offset = self.m_stringOffsets[ idx ].get_value()
        length = self.getShort(self.m_strings, offset)

        data = ""

        while length > 0:
            offset += 2
            # get the unicode character as the apk might contain non-ASCII label
            data += six.unichr(self.getShort(self.m_strings, offset))

            # FIXME
            if data[-1] == "&":
                data = data[:-1]

            length -= 1

        return data

开发者ID:AirtestProject，项目名称:Airtest，代码行数:23，代码来源:stringblock.py

示例3: handle_charref

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def handle_charref(self, name):
        """
        Called when a char ref like '&#8212;' or '&#x2014' is found

        `name` is the char ref without ampersand and semicolon (e.g. `#8212` or
        `#x2014`)
        """
        try:
            if name.startswith('x'):
                codepoint = int(name[1:], 16)
            else:
                codepoint = int(name)
            char = six.unichr(codepoint)
        except (ValueError, OverflowError):
            char = ''
        self._handle_ref('#' + name, char)

开发者ID:honmaple，项目名称:maple-blog，代码行数:18，代码来源:markup.py

示例4: _unescape_token

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def _unescape_token(escaped_token):
    """Inverse of _escape_token().

    Args:
        escaped_token: a unicode string

    Returns:
        token: a unicode string
    """

    def match(m):
        if m.group(1) is None:
            return u"_" if m.group(0) == u"\\u" else u"\\"

        try:
            return six.unichr(int(m.group(1)))
        except (ValueError, OverflowError) as _:
            return u"\u3013"    # Unicode for undefined character.

    trimmed = escaped_token[:-
                            1] if escaped_token.endswith("_") else escaped_token
    return _UNESCAPE_REGEX.sub(match, trimmed)

开发者ID:at16k，项目名称:at16k，代码行数:24，代码来源:text_encoder.py

示例5: join_phonemes

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def join_phonemes(*args):
    """Joins a Hangul letter from Korean phonemes."""
    # Normalize arguments as onset, nucleus, coda.
    if len(args) == 1:
        # tuple of (onset, nucleus[, coda])
        args = args[0]
    if len(args) == 2:
        args += (CODAS[0],)
    try:
        onset, nucleus, coda = args
    except ValueError:
        raise TypeError('join_phonemes() takes at most 3 arguments')
    offset = (
        (ONSETS.index(onset) * NUM_NUCLEUSES + NUCLEUSES.index(nucleus)) *
        NUM_CODAS + CODAS.index(coda)
    )
    return unichr(FIRST_HANGUL_OFFSET + offset)

开发者ID:what-studio，项目名称:tossi，代码行数:19，代码来源:hangul.py

示例6: test_import_file_path_utf8_encoded_ascii_latin_nbsp

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def test_import_file_path_utf8_encoded_ascii_latin_nbsp():
    temporary_folder, folder = helper.create_working_folder()
    temporary_folder_destination, folder_destination = helper.create_working_folder()

    origin = text_type(folder)+u'/unicode'+six_unichr(160)+u'filename.txt'
    # encode the unicode string to ascii
    origin = origin.encode('utf-8')

    shutil.copyfile(helper.get_file('valid.txt'), origin)

    helper.reset_dbs()
    dest_path = elodie.import_file(origin, folder_destination, False, False, False)
    helper.restore_dbs()

    shutil.rmtree(folder)
    shutil.rmtree(folder_destination)

    assert helper.path_tz_fix(os.path.join('2016-04-Apr','London',u'2016-04-07_11-15-26-unicode\xa0filename-sample-title.txt')) in dest_path, dest_path

开发者ID:jmathai，项目名称:elodie，代码行数:20，代码来源:elodie_test.py

示例7: test_import_file_path_unicode_latin_nbsp

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def test_import_file_path_unicode_latin_nbsp():
    temporary_folder, folder = helper.create_working_folder()
    temporary_folder_destination, folder_destination = helper.create_working_folder()

    origin = text_type(folder)+u'/unicode'+six_unichr(160)+u'filename.txt'

    shutil.copyfile(helper.get_file('valid.txt'), origin)

    helper.reset_dbs()
    dest_path = elodie.import_file(origin, folder_destination, False, False, False)
    helper.restore_dbs()

    shutil.rmtree(folder)
    shutil.rmtree(folder_destination)

    assert helper.path_tz_fix(os.path.join('2016-04-Apr','London',u'2016-04-07_11-15-26-unicode\xa0filename-sample-title.txt')) in dest_path, dest_path

开发者ID:jmathai，项目名称:elodie，代码行数:18，代码来源:elodie_test.py

示例8: _narrow_unichr

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def _narrow_unichr(code_point):
    """Retrieves the unicode character representing any given code point, in a way that won't break on narrow builds.

    This is necessary because the built-in unichr function will fail for ordinals above 0xFFFF on narrow builds (UCS2);
    ordinals above 0xFFFF would require recalculating and combining surrogate pairs. This avoids that by retrieving the
    unicode character that was initially read.

    Args:
        code_point (int|CodePoint): An int or a subclass of int that contains the unicode character representing its
            code point in an attribute named 'char'.
    """
    try:
        if len(code_point.char) > 1:
            return code_point.char
    except AttributeError:
        pass
    return six.unichr(code_point)

开发者ID:amzn，项目名称:ion-python，代码行数:19，代码来源:reader.py

示例9: test_exception_converted

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def test_exception_converted(self):
        self.assertRaises(exception.InvalidInput,
                          utils.validate_integer,
                          "im-not-an-int", "not-an-int")
        self.assertRaises(exception.InvalidInput,
                          utils.validate_integer,
                          3.14, "Pie")
        self.assertRaises(exception.InvalidInput,
                          utils.validate_integer,
                          "299", "Sparta no-show",
                          min_value=300, max_value=300)
        self.assertRaises(exception.InvalidInput,
                          utils.validate_integer,
                          55, "doing 55 in a 54",
                          max_value=54)
        self.assertRaises(exception.InvalidInput,
                          utils.validate_integer,
                          six.unichr(129), "UnicodeError",
                          max_value=1000)

开发者ID:openstack，项目名称:masakari，代码行数:21，代码来源:test_utils.py

示例10: _unescape_token

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def _unescape_token(escaped_token):
  """Inverse of _escape_token().

  Args:
    escaped_token: a unicode string

  Returns:
    token: a unicode string
  """

  def match(m):
    if m.group(1) is None:
      return u"_" if m.group(0) == u"\\u" else u"\\"

    try:
      return six.unichr(int(m.group(1)))
    except (ValueError, OverflowError) as _:
      return ""

  trimmed = escaped_token[:-1] if escaped_token.endswith("_") else escaped_token
  return _UNESCAPE_REGEX.sub(match, trimmed)

开发者ID:eBay，项目名称:Sequence-Semantic-Embedding，代码行数:23，代码来源:text_encoder.py

示例11: test_unicode_combining_characters

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def test_unicode_combining_characters(self):
        # Some unicode characters can be represented in multiple ways - for
        # example, an accented character may be a single code point (with the
        # accent baked in), or it may be the "normal" letter with a combining
        # code point. See https://docs.python.org/2/library/unicodedata.html.
        # The points below are for a capital C with a cedilla, first as a
        # composite character, second as a pairing of C and the cedilla
        # combining character.
        composite = six.unichr(0xC7)
        combining = six.unichr(0x43) + six.unichr(0x0327)

        # Test combinations of search and character
        for text in composite, combining:
            searcher = TextSearcher(text)
            self.assertIsNotNone(searcher.search(composite))
            self.assertIsNotNone(searcher.search(combining))

开发者ID:digidotcom，项目名称:python-streamexpect，代码行数:18，代码来源:test_streamexpect.py

示例12: ntou

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def ntou(n, encoding='ISO-8859-1'):
        """Return the native string as Unicode with the given encoding."""
        assert_native(n)
        # In Python 2, the native string type is bytes.
        # First, check for the special encoding 'escape'. The test suite uses
        # this to signal that it wants to pass a string with embedded \uXXXX
        # escapes, but without having to prefix it with u'' for Python 2,
        # but no prefix for Python 3.
        if encoding == 'escape':
            return re.sub(
                r'\\u([0-9a-zA-Z]{4})',
                lambda m: six.unichr(int(m.group(1), 16)),
                n.decode('ISO-8859-1'),
            )
        # Assume it's already in the given encoding, which for ISO-8859-1
        # is almost always what was intended.
        return n.decode(encoding)

开发者ID:cherrypy，项目名称:cheroot，代码行数:19，代码来源:_compat.py

示例13: compose

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def compose(chosung, joongsung, jongsung=u''):
    """This function returns a Hangul letter by composing the specified chosung, joongsung, and jongsung.
    @param chosung
    @param joongsung
    @param jongsung the terminal Hangul letter. This is optional if you do not need a jongsung."""

    if jongsung is None: jongsung = u''

    try:
        chosung_index = CHO.index(chosung)
        joongsung_index = JOONG.index(joongsung)
        jongsung_index = JONG.index(jongsung)
    except Exception:
        raise NotHangulException('No valid Hangul character index')

    return unichr(0xAC00 + chosung_index * NUM_JOONG * NUM_JONG + joongsung_index * NUM_JONG + jongsung_index)

开发者ID:bluedisk，项目名称:hangul-toolkit，代码行数:18，代码来源:letter.py

示例14: property_chars

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def property_chars(self, prefix):
    return "".join(six.unichr(x) for x in range(sys.maxunicode)
                   if unicodedata.category(six.unichr(x)).startswith(prefix))

开发者ID:akzaidi，项目名称:fine-lm，代码行数:5，代码来源:bleu_hook.py

示例15: test_invertibility_on_random_strings

# 需要导入模块: import six [as 别名]
# 或者: from six import unichr [as 别名]
def test_invertibility_on_random_strings(self):
    for _ in range(1000):
      s = u"".join(six.unichr(random.randint(0, 65535)) for _ in range(10))
      self.assertEqual(s, tokenizer.decode(tokenizer.encode(s)))

开发者ID:akzaidi，项目名称:fine-lm，代码行数:6，代码来源:tokenizer_test.py

注：本文中的six.unichr方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。