当前位置: 首页>>代码示例>>Python>>正文


Python string_ops.unicode_transcode函数代码示例

本文整理汇总了Python中tensorflow.python.ops.string_ops.unicode_transcode函数的典型用法代码示例。如果您正苦于以下问题:Python unicode_transcode函数的具体用法?Python unicode_transcode怎么用?Python unicode_transcode使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了unicode_transcode函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_transcode_utf8_simple

  def test_transcode_utf8_simple(self):
    strings = [[b"a", b"abc"], [b"ABC", b"DEF"]]

    with self.cached_session() as sess:
      outputs = string_ops.unicode_transcode(
          strings,
          input_encoding="UTF-8",
          output_encoding="UTF-8",
          errors="replace",
          replacement_char=ord(" "),
          replace_control_characters=False)
      values = self.evaluate(outputs)
      self.assertAllEqual(values, strings)

      outputs = string_ops.unicode_transcode(
          strings,
          input_encoding="ISO-8859-1",
          output_encoding="UTF-8",
          errors="replace",
          replacement_char=ord(" "),
          replace_control_characters=False)
      values = self.evaluate(outputs)
      self.assertAllEqual(values, strings)

      outputs = string_ops.unicode_transcode(
          strings,
          input_encoding="US-ASCII",
          output_encoding="UTF-8",
          errors="replace",
          replacement_char=ord(" "),
          replace_control_characters=False)
      values = self.evaluate(outputs)
      self.assertAllEqual(values, strings)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:33,代码来源:unicode_transcode_op_test.py

示例2: test_invalid_encoding_causes_errors

  def test_invalid_encoding_causes_errors(self):
    strings = [[b"a", b"abc"], [b"ABC", b"DEF"]]

    with self.cached_session() as sess:
      outputs = string_ops.unicode_transcode(
          strings,
          input_encoding="invalid",
          output_encoding="UTF-8",
          errors="replace",
          replacement_char=ord(" "),
          replace_control_characters=False)
      with self.assertRaisesOpError(
          "Could not create converter for input encoding: invalid"):
        self.evaluate(outputs)

    with self.assertRaisesRegexp(ValueError, "Op passed string 'invalid'"):
      with self.cached_session() as sess:
        outputs = string_ops.unicode_transcode(
            strings,
            input_encoding="UTF-8",
            output_encoding="invalid",
            errors="replace",
            replacement_char=ord(" "),
            replace_control_characters=False)
        self.evaluate(outputs)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:25,代码来源:unicode_transcode_op_test.py

示例3: test_cjk_encodings

  def test_cjk_encodings(self):
    strings_ja = [
        b"\x5c\x5c",  # Yen sign
        b"\x8f\x70",  # kanji character "waza"
        b"\x83\x4f"
    ]  # katakana character "gu"
    strings_zh_cn = [b"\xca\xf5"]  # simplified "shu4"
    strings_zh_tw = [b"\xb3\x4e"]  # traditional "shu4"
    strings_ko = [b"\xc7\xd1\xb9\xce"]  # hangul "hanmin"

    expected_ja = [s.decode("shift_jis").encode("UTF-8") for s in strings_ja]
    expected_zh_cn = [
        s.decode("gb18030").encode("UTF-8") for s in strings_zh_cn
    ]
    expected_zh_tw = [s.decode("big5").encode("UTF-8") for s in strings_zh_tw]
    expected_ko = [s.decode("euc_kr").encode("UTF-8") for s in strings_ko]

    with self.cached_session() as sess:
      outputs_ja = string_ops.unicode_transcode(
          strings_ja,
          input_encoding="shift_jis",
          output_encoding="UTF-8",
          replacement_char=ord(" "),
          replace_control_characters=False)

      outputs_zh_cn = string_ops.unicode_transcode(
          strings_zh_cn,
          input_encoding="gb18030",
          output_encoding="UTF-8",
          replacement_char=ord(" "),
          replace_control_characters=False)

      outputs_zh_tw = string_ops.unicode_transcode(
          strings_zh_tw,
          input_encoding="big5",
          output_encoding="UTF-8",
          replacement_char=ord(" "),
          replace_control_characters=False)

      outputs_ko = string_ops.unicode_transcode(
          strings_ko,
          input_encoding="euc_kr",
          output_encoding="UTF-8",
          replacement_char=ord(" "),
          replace_control_characters=False)

      result_ja, result_zh_cn, result_zh_tw, result_ko = sess.run(
          [outputs_ja, outputs_zh_cn, outputs_zh_tw, outputs_ko])

      self.assertAllEqual(result_ja, expected_ja)
      self.assertAllEqual(result_zh_cn, expected_zh_cn)
      self.assertAllEqual(result_zh_tw, expected_zh_tw)
      self.assertAllEqual(result_ko, expected_ko)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:53,代码来源:unicode_transcode_op_test.py

示例4: test_transcode_utf8_with_bom

  def test_transcode_utf8_with_bom(self):
    bom_string = b"\xef\xbb\xbfabcdefg"
    with self.cached_session() as sess:
      outputs = string_ops.unicode_transcode(
          bom_string, input_encoding="UTF-8", output_encoding="UTF-8")
      values = self.evaluate(outputs)
      self.assertAllEqual(values, b"\xef\xbb\xbfabcdefg")  # BOM preserved

      outputs = string_ops.unicode_transcode(
          bom_string, input_encoding="UTF-8", output_encoding="UTF-16-BE")
      values = self.evaluate(outputs)
      utf16expected = bom_string.decode("UTF-8").encode("UTF-16-BE")
      self.assertAllEqual(values, utf16expected)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:13,代码来源:unicode_transcode_op_test.py

示例5: test_transcode_utf8_with_replacement_char

  def test_transcode_utf8_with_replacement_char(self):
    strings = [b"a\xef\xbf\xbd"]
    with self.cached_session() as sess:
      outputs = string_ops.unicode_transcode(
          strings, input_encoding="UTF-8", output_encoding="UTF-8",
          errors="strict")
      values = self.evaluate(outputs)
      self.assertAllEqual(values, [b"a\xef\xbf\xbd"])

      outputs = string_ops.unicode_transcode(
          strings, input_encoding="UTF-8", output_encoding="UTF-8",
          errors="replace", replacement_char=ord("?"))
      values = self.evaluate(outputs)
      self.assertAllEqual(values, [b"a\xef\xbf\xbd"])
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:14,代码来源:unicode_transcode_op_test.py

示例6: test_transcode_bad_utf8_termination_with_defaults

 def test_transcode_bad_utf8_termination_with_defaults(self):
   bad_string = b"a\xf0"
   with self.cached_session() as sess:
     outputs = string_ops.unicode_transcode(
         bad_string, input_encoding="UTF-8", output_encoding="UTF-8")
     values = self.evaluate(outputs)
     self.assertAllEqual(values, b"a\xef\xbf\xbd")   # 0xFFFD
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:7,代码来源:unicode_transcode_op_test.py

示例7: test_transcode_bad_utf8_with_defaults

 def test_transcode_bad_utf8_with_defaults(self):
   bad_string = b"\x00\xff"
   with self.cached_session() as sess:
     outputs = string_ops.unicode_transcode(
         bad_string, input_encoding="UTF-8", output_encoding="UTF-8")
     values = sess.run(outputs)
     self.assertAllEqual(values, b"\x00\xef\xbf\xbd")
开发者ID:abhinav-upadhyay,项目名称:tensorflow,代码行数:7,代码来源:unicode_transcode_op_test.py

示例8: test_transcode_bad_utf8_with_space_replacement

 def test_transcode_bad_utf8_with_space_replacement(self):
   bad_string = b"\x00\xff"
   with self.cached_session() as sess:
     outputs = string_ops.unicode_transcode(
         bad_string, input_encoding="UTF-8", output_encoding="UTF-8",
         replacement_char=ord(" "))
     values = self.evaluate(outputs)
     self.assertAllEqual(values, b"\x00 ")
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:8,代码来源:unicode_transcode_op_test.py

示例9: test_transcode_bad_utf8_with_elision_of_malformatting

 def test_transcode_bad_utf8_with_elision_of_malformatting(self):
   bad_string = b"\x00\xff"
   with self.cached_session() as sess:
     outputs = string_ops.unicode_transcode(
         bad_string,
         input_encoding="UTF-8",
         output_encoding="UTF-8",
         errors="ignore")
     values = self.evaluate(outputs)
     self.assertAllEqual(values, b"\x00")
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:10,代码来源:unicode_transcode_op_test.py

示例10: test_transcode_bad_utf8_with_elision_including_control_chars

 def test_transcode_bad_utf8_with_elision_including_control_chars(self):
   bad_string = b"\x00\xff"
   with self.cached_session() as sess:
     outputs = string_ops.unicode_transcode(
         bad_string,
         input_encoding="UTF-8",
         output_encoding="UTF-8",
         errors="ignore",
         replace_control_characters=True)
     values = self.evaluate(outputs)
     self.assertAllEqual(values, b"")
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:11,代码来源:unicode_transcode_op_test.py

示例11: test_transcode_bad_utf8_start_with_strict_errors

 def test_transcode_bad_utf8_start_with_strict_errors(self):
   bad_string = b"\xffabcd"
   with self.cached_session() as sess:
     outputs = string_ops.unicode_transcode(
         bad_string,
         input_encoding="UTF-8",
         output_encoding="UTF-8",
         errors="strict")
     with self.assertRaisesOpError(
         "Invalid formatting on input string"):
       self.evaluate(outputs)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:11,代码来源:unicode_transcode_op_test.py

示例12: test_transcode_utf16_le_be_with_bom

  def test_transcode_utf16_le_be_with_bom(self):
    bom_string = b"\xfe\xff\x00\x61"  # Big-endian BOM with 'a' encoded
    with self.cached_session() as sess:
      outputs = string_ops.unicode_transcode(
          bom_string, input_encoding="UTF-16-BE", output_encoding="UTF-8")
      values = self.evaluate(outputs)
      # BOM is preserved in output
      self.assertAllEqual(values, b"\xef\xbb\xbfa")

      outputs = string_ops.unicode_transcode(
          bom_string, input_encoding="UTF-16-LE", output_encoding="UTF-8")
      values = self.evaluate(outputs)
      # mangled BOM and value from (incorrect) LE encoding
      self.assertAllEqual(values, b"\xef\xbf\xbe\xe6\x84\x80")

      bom_string = b"\xff\xfe\x61\x00"  # Little-endian BOM with 'a' encoded
      outputs = string_ops.unicode_transcode(
          bom_string, input_encoding="UTF-16-LE", output_encoding="UTF-8")
      values = self.evaluate(outputs)
      self.assertAllEqual(values, b"\xef\xbb\xbfa")
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:20,代码来源:unicode_transcode_op_test.py

示例13: test_transcode_ascii_with_shift_chars

 def test_transcode_ascii_with_shift_chars(self):
   strings = [b"\x0e\x0e", b"\x0f\x0f"]
   with self.cached_session() as sess:
     outputs = string_ops.unicode_transcode(
         strings,
         input_encoding="US-ASCII",
         output_encoding="UTF-8",
         replacement_char=ord(" "),
         replace_control_characters=False)
     values = self.evaluate(outputs)
     self.assertAllEqual(values, strings)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:11,代码来源:unicode_transcode_op_test.py

示例14: test_transcode_bad_utf8_with_some_good

 def test_transcode_bad_utf8_with_some_good(self):
   bad_string = b"abc\xffabcdefg"
   with self.cached_session() as sess:
     outputs = string_ops.unicode_transcode(
         bad_string,
         input_encoding="UTF-8",
         output_encoding="UTF-8",
         errors="replace",
         replacement_char=ord(" "),
         replace_control_characters=False)
     values = self.evaluate(outputs)
     self.assertAllEqual(values, b"abc abcdefg")
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:12,代码来源:unicode_transcode_op_test.py

示例15: test_transcode_utf8_to_utf32

 def test_transcode_utf8_to_utf32(self):
   strings = [b"ab\xe2\x82\xac", b"\xf0\x90\x90\xb7"]
   expected = [s.decode("UTF-8").encode("UTF-32-BE") for s in strings]
   with self.cached_session() as sess:
     outputs = string_ops.unicode_transcode(
         strings,
         input_encoding="UTF-8",
         output_encoding="UTF-32-BE",
         replacement_char=ord(" "),
         replace_control_characters=False)
     values = self.evaluate(outputs)
     self.assertAllEqual(values, expected)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:12,代码来源:unicode_transcode_op_test.py


注:本文中的tensorflow.python.ops.string_ops.unicode_transcode函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。