本文整理汇总了Python中tensorflow.python.ops.string_ops.unicode_transcode函数的典型用法代码示例。如果您正苦于以下问题:Python unicode_transcode函数的具体用法?Python unicode_transcode怎么用?Python unicode_transcode使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了unicode_transcode函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_transcode_utf8_simple
def test_transcode_utf8_simple(self):
strings = [[b"a", b"abc"], [b"ABC", b"DEF"]]
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
strings,
input_encoding="UTF-8",
output_encoding="UTF-8",
errors="replace",
replacement_char=ord(" "),
replace_control_characters=False)
values = self.evaluate(outputs)
self.assertAllEqual(values, strings)
outputs = string_ops.unicode_transcode(
strings,
input_encoding="ISO-8859-1",
output_encoding="UTF-8",
errors="replace",
replacement_char=ord(" "),
replace_control_characters=False)
values = self.evaluate(outputs)
self.assertAllEqual(values, strings)
outputs = string_ops.unicode_transcode(
strings,
input_encoding="US-ASCII",
output_encoding="UTF-8",
errors="replace",
replacement_char=ord(" "),
replace_control_characters=False)
values = self.evaluate(outputs)
self.assertAllEqual(values, strings)
示例2: test_invalid_encoding_causes_errors
def test_invalid_encoding_causes_errors(self):
strings = [[b"a", b"abc"], [b"ABC", b"DEF"]]
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
strings,
input_encoding="invalid",
output_encoding="UTF-8",
errors="replace",
replacement_char=ord(" "),
replace_control_characters=False)
with self.assertRaisesOpError(
"Could not create converter for input encoding: invalid"):
self.evaluate(outputs)
with self.assertRaisesRegexp(ValueError, "Op passed string 'invalid'"):
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
strings,
input_encoding="UTF-8",
output_encoding="invalid",
errors="replace",
replacement_char=ord(" "),
replace_control_characters=False)
self.evaluate(outputs)
示例3: test_cjk_encodings
def test_cjk_encodings(self):
strings_ja = [
b"\x5c\x5c", # Yen sign
b"\x8f\x70", # kanji character "waza"
b"\x83\x4f"
] # katakana character "gu"
strings_zh_cn = [b"\xca\xf5"] # simplified "shu4"
strings_zh_tw = [b"\xb3\x4e"] # traditional "shu4"
strings_ko = [b"\xc7\xd1\xb9\xce"] # hangul "hanmin"
expected_ja = [s.decode("shift_jis").encode("UTF-8") for s in strings_ja]
expected_zh_cn = [
s.decode("gb18030").encode("UTF-8") for s in strings_zh_cn
]
expected_zh_tw = [s.decode("big5").encode("UTF-8") for s in strings_zh_tw]
expected_ko = [s.decode("euc_kr").encode("UTF-8") for s in strings_ko]
with self.cached_session() as sess:
outputs_ja = string_ops.unicode_transcode(
strings_ja,
input_encoding="shift_jis",
output_encoding="UTF-8",
replacement_char=ord(" "),
replace_control_characters=False)
outputs_zh_cn = string_ops.unicode_transcode(
strings_zh_cn,
input_encoding="gb18030",
output_encoding="UTF-8",
replacement_char=ord(" "),
replace_control_characters=False)
outputs_zh_tw = string_ops.unicode_transcode(
strings_zh_tw,
input_encoding="big5",
output_encoding="UTF-8",
replacement_char=ord(" "),
replace_control_characters=False)
outputs_ko = string_ops.unicode_transcode(
strings_ko,
input_encoding="euc_kr",
output_encoding="UTF-8",
replacement_char=ord(" "),
replace_control_characters=False)
result_ja, result_zh_cn, result_zh_tw, result_ko = sess.run(
[outputs_ja, outputs_zh_cn, outputs_zh_tw, outputs_ko])
self.assertAllEqual(result_ja, expected_ja)
self.assertAllEqual(result_zh_cn, expected_zh_cn)
self.assertAllEqual(result_zh_tw, expected_zh_tw)
self.assertAllEqual(result_ko, expected_ko)
示例4: test_transcode_utf8_with_bom
def test_transcode_utf8_with_bom(self):
bom_string = b"\xef\xbb\xbfabcdefg"
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
bom_string, input_encoding="UTF-8", output_encoding="UTF-8")
values = self.evaluate(outputs)
self.assertAllEqual(values, b"\xef\xbb\xbfabcdefg") # BOM preserved
outputs = string_ops.unicode_transcode(
bom_string, input_encoding="UTF-8", output_encoding="UTF-16-BE")
values = self.evaluate(outputs)
utf16expected = bom_string.decode("UTF-8").encode("UTF-16-BE")
self.assertAllEqual(values, utf16expected)
示例5: test_transcode_utf8_with_replacement_char
def test_transcode_utf8_with_replacement_char(self):
strings = [b"a\xef\xbf\xbd"]
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
strings, input_encoding="UTF-8", output_encoding="UTF-8",
errors="strict")
values = self.evaluate(outputs)
self.assertAllEqual(values, [b"a\xef\xbf\xbd"])
outputs = string_ops.unicode_transcode(
strings, input_encoding="UTF-8", output_encoding="UTF-8",
errors="replace", replacement_char=ord("?"))
values = self.evaluate(outputs)
self.assertAllEqual(values, [b"a\xef\xbf\xbd"])
示例6: test_transcode_bad_utf8_termination_with_defaults
def test_transcode_bad_utf8_termination_with_defaults(self):
bad_string = b"a\xf0"
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
bad_string, input_encoding="UTF-8", output_encoding="UTF-8")
values = self.evaluate(outputs)
self.assertAllEqual(values, b"a\xef\xbf\xbd") # 0xFFFD
示例7: test_transcode_bad_utf8_with_defaults
def test_transcode_bad_utf8_with_defaults(self):
bad_string = b"\x00\xff"
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
bad_string, input_encoding="UTF-8", output_encoding="UTF-8")
values = sess.run(outputs)
self.assertAllEqual(values, b"\x00\xef\xbf\xbd")
示例8: test_transcode_bad_utf8_with_space_replacement
def test_transcode_bad_utf8_with_space_replacement(self):
bad_string = b"\x00\xff"
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
bad_string, input_encoding="UTF-8", output_encoding="UTF-8",
replacement_char=ord(" "))
values = self.evaluate(outputs)
self.assertAllEqual(values, b"\x00 ")
示例9: test_transcode_bad_utf8_with_elision_of_malformatting
def test_transcode_bad_utf8_with_elision_of_malformatting(self):
bad_string = b"\x00\xff"
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
bad_string,
input_encoding="UTF-8",
output_encoding="UTF-8",
errors="ignore")
values = self.evaluate(outputs)
self.assertAllEqual(values, b"\x00")
示例10: test_transcode_bad_utf8_with_elision_including_control_chars
def test_transcode_bad_utf8_with_elision_including_control_chars(self):
bad_string = b"\x00\xff"
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
bad_string,
input_encoding="UTF-8",
output_encoding="UTF-8",
errors="ignore",
replace_control_characters=True)
values = self.evaluate(outputs)
self.assertAllEqual(values, b"")
示例11: test_transcode_bad_utf8_start_with_strict_errors
def test_transcode_bad_utf8_start_with_strict_errors(self):
bad_string = b"\xffabcd"
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
bad_string,
input_encoding="UTF-8",
output_encoding="UTF-8",
errors="strict")
with self.assertRaisesOpError(
"Invalid formatting on input string"):
self.evaluate(outputs)
示例12: test_transcode_utf16_le_be_with_bom
def test_transcode_utf16_le_be_with_bom(self):
bom_string = b"\xfe\xff\x00\x61" # Big-endian BOM with 'a' encoded
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
bom_string, input_encoding="UTF-16-BE", output_encoding="UTF-8")
values = self.evaluate(outputs)
# BOM is preserved in output
self.assertAllEqual(values, b"\xef\xbb\xbfa")
outputs = string_ops.unicode_transcode(
bom_string, input_encoding="UTF-16-LE", output_encoding="UTF-8")
values = self.evaluate(outputs)
# mangled BOM and value from (incorrect) LE encoding
self.assertAllEqual(values, b"\xef\xbf\xbe\xe6\x84\x80")
bom_string = b"\xff\xfe\x61\x00" # Little-endian BOM with 'a' encoded
outputs = string_ops.unicode_transcode(
bom_string, input_encoding="UTF-16-LE", output_encoding="UTF-8")
values = self.evaluate(outputs)
self.assertAllEqual(values, b"\xef\xbb\xbfa")
示例13: test_transcode_ascii_with_shift_chars
def test_transcode_ascii_with_shift_chars(self):
strings = [b"\x0e\x0e", b"\x0f\x0f"]
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
strings,
input_encoding="US-ASCII",
output_encoding="UTF-8",
replacement_char=ord(" "),
replace_control_characters=False)
values = self.evaluate(outputs)
self.assertAllEqual(values, strings)
示例14: test_transcode_bad_utf8_with_some_good
def test_transcode_bad_utf8_with_some_good(self):
bad_string = b"abc\xffabcdefg"
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
bad_string,
input_encoding="UTF-8",
output_encoding="UTF-8",
errors="replace",
replacement_char=ord(" "),
replace_control_characters=False)
values = self.evaluate(outputs)
self.assertAllEqual(values, b"abc abcdefg")
示例15: test_transcode_utf8_to_utf32
def test_transcode_utf8_to_utf32(self):
strings = [b"ab\xe2\x82\xac", b"\xf0\x90\x90\xb7"]
expected = [s.decode("UTF-8").encode("UTF-32-BE") for s in strings]
with self.cached_session() as sess:
outputs = string_ops.unicode_transcode(
strings,
input_encoding="UTF-8",
output_encoding="UTF-32-BE",
replacement_char=ord(" "),
replace_control_characters=False)
values = self.evaluate(outputs)
self.assertAllEqual(values, expected)