本文整理汇总了PHP中unicode_to_utf8函数的典型用法代码示例。如果您正苦于以下问题:PHP unicode_to_utf8函数的具体用法?PHP unicode_to_utf8怎么用?PHP unicode_to_utf8使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了unicode_to_utf8函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: gbk_to_utf8
/**
* gbk转utf8
* @param $gbstr
*/
function gbk_to_utf8($gbstr)
{
global $CODETABLE;
if (empty($CODETABLE)) {
$filename = CODETABLEDIR . 'gb-unicode.table';
$fp = fopen($filename, 'rb');
while ($l = fgets($fp, 15)) {
$CODETABLE[hexdec(substr($l, 0, 6))] = substr($l, 7, 6);
}
fclose($fp);
}
$ret = '';
$utf8 = '';
while ($gbstr) {
if (ord(substr($gbstr, 0, 1)) > 0x80) {
$thisW = substr($gbstr, 0, 2);
$gbstr = substr($gbstr, 2, strlen($gbstr));
$utf8 = '';
@($utf8 = unicode_to_utf8(hexdec($CODETABLE[hexdec(bin2hex($thisW)) - 0x8080])));
if ($utf8 != '') {
for ($i = 0; $i < strlen($utf8); $i += 3) {
$ret .= chr(substr($utf8, $i, 3));
}
}
} else {
$ret .= substr($gbstr, 0, 1);
$gbstr = substr($gbstr, 1, strlen($gbstr));
}
}
return $ret;
}
示例2: convToUTF8
/**
* Convert unicode decimla list to utf-8 encoded character
*
* @param string $strDecVal [comma separated list of] decimal character value(s)
* @return string
*/
function convToUTF8($strDecVal)
{
$codes = explode(',', str_replace(' ', '', $strDecVal));
$symbol = '';
foreach ($codes as $code) {
$symbol .= unicode_to_utf8(intval($code));
}
return $symbol;
}
示例3: decode
function decode($text)
{
global $base, $tmin, $tmax, $skew, $damp, $initial_bias, $initial_n, $prefix, $delim;
$n = $initial_n;
$i = 0;
$bias = $initial_bias;
$output = array();
if (substr($text, 0, strlen($prefix)) != $prefix) {
return $text;
} else {
$text = str_replace($prefix, "", $text);
}
$delim_pos = strrpos($text, $delim);
if ($delim_pos !== false) {
for ($j = 0; $j < $delim_pos; $j++) {
array_push($output, $text[$j]);
}
$text = substr($text, $delim_pos + 1);
}
for (; strlen($text) > 0;) {
$oldi = $i;
$w = 1;
for ($k = $base; 1; $k = $k + $base) {
$digit = decode_digit($text[0]);
$text = substr($text, 1);
$i = $i + $digit * $w;
$t = 0;
if ($k <= $bias + $tmin) {
$t = $tmin;
} elseif ($k >= $bias + $tmax) {
$t = $tmax;
} else {
$t = $k - $bias;
}
if ($digit < $t) {
break;
}
$w = $w * ($base - $t);
}
$bias = adapt($i - $oldi, sizeof($output) + 1, $oldi == 0);
$n = $n + floor($i / (sizeof($output) + 1));
$i = $i % (sizeof($output) + 1);
$tmp = $output;
$output = array();
$j = 0;
for ($j = 0; $j < $i; $j++) {
array_push($output, $tmp[$j]);
}
array_push($output, unicode_to_utf8($n));
for ($j = $j; $j < sizeof($tmp); $j++) {
array_push($output, $tmp[$j]);
}
$i++;
}
return implode($output);
}
示例4: utf8_keepalphanum
function utf8_keepalphanum($string)
{
global $UTF8_ALPHA_CHARS;
$chars = utf8_to_unicode($string);
for ($i = 0, $size = count($chars); $i < $size; ++$i) {
if (!in_array($chars[$i], $UTF8_ALPHA_CHARS)) {
unset($chars[$i]);
}
}
return unicode_to_utf8($chars);
}
示例5: utf8_keepalphanum
function utf8_keepalphanum($string)
{
// a-z A-Z . _ -, extended latin chars, Cyrillic and Greek
static $UTF8_ALPHA_CHARS = array(0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x2e, 0x2d, 0x5f, 0x20, 0xc1, 0xe1, 0x106, 0x107, 0xc9, 0xe9, 0xcd, 0xed, 0x139, 0x13a, 0x143, 0x144, 0xd3, 0xf3, 0x154, 0x155, 0x15a, 0x15b, 0xda, 0xfa, 0xdd, 0xfd, 0x179, 0x17a, 0x10f, 0x13d, 0x13e, 0x165, 0x102, 0x103, 0x11e, 0x11f, 0x16c, 0x16d, 0x10c, 0x10d, 0x10e, 0x11a, 0x11b, 0x147, 0x148, 0x158, 0x159, 0x160, 0x161, 0x164, 0x17d, 0x17e, 0xc7, 0xe7, 0x122, 0x123, 0x136, 0x137, 0x13b, 0x13c, 0x145, 0x146, 0x156, 0x157, 0x15e, 0x15f, 0x162, 0x163, 0xc2, 0xe2, 0x108, 0x109, 0xca, 0xea, 0x11c, 0x11d, 0x124, 0x125, 0xce, 0xee, 0x134, 0x135, 0xd4, 0xf4, 0x15c, 0x15d, 0xdb, 0xfb, 0x174, 0x175, 0x176, 0x177, 0xc4, 0xe4, 0xcb, 0xeb, 0xcf, 0xef, 0xd6, 0xf6, 0xdc, 0xfc, 0x178, 0xff, 0x10a, 0x10b, 0x116, 0x117, 0x120, 0x121, 0x130, 0x131, 0x17b, 0x17c, 0x150, 0x151, 0x170, 0x171, 0xc0, 0xe0, 0xc8, 0xe8, 0xcc, 0xec, 0xd2, 0xf2, 0xd9, 0xf9, 0x1a0, 0x1a1, 0x1af, 0x1b0, 0x100, 0x101, 0x112, 0x113, 0x12a, 0x12b, 0x14c, 0x14d, 0x16a, 0x16b, 0x104, 0x105, 0x118, 0x119, 0x12e, 0x12f, 0x172, 0x173, 0xc5, 0xe5, 0x16e, 0x16f, 0x110, 0x111, 0x126, 0x127, 0x141, 0x142, 0xd8, 0xf8, 0xc3, 0xe3, 0xd1, 0xf1, 0xd5, 0xf5, 0xc6, 0xe6, 0x152, 0x153, 0xd0, 0xf0, 0xde, 0xfe, 0xdf, 0x17f, 0x391, 0x392, 0x393, 0x394, 0x395, 0x396, 0x397, 0x398, 0x399, 0x39a, 0x39b, 0x39c, 0x39d, 0x39e, 0x39f, 0x3a0, 0x3a1, 0x3a3, 0x3a4, 0x3a5, 0x3a6, 0x3a7, 0x3a8, 0x3a9, 0x386, 0x388, 0x389, 0x38a, 0x38c, 0x38e, 0x38f, 0x3aa, 0x3ab, 0x3b1, 0x3b2, 0x3b3, 0x3b4, 0x3b5, 0x3b6, 0x3b7, 0x3b8, 0x3b9, 0x3ba, 0x3bb, 0x3bc, 0x3bd, 0x3be, 0x3bf, 0x3c0, 0x3c1, 0x3c3, 0x3c2, 0x3c4, 0x3c5, 0x3c6, 0x3c7, 0x3c8, 0x3c9, 0x3ac, 0x3ad, 0x3ae, 0x3af, 0x3cc, 0x3cd, 0x3ce, 0x3ca, 0x3cb, 0x390, 0x3b0, 0x410, 0x411, 0x412, 0x413, 0x414, 0x415, 0x401, 0x416, 0x417, 0x406, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e, 0x41f, 0x420, 0x421, 0x422, 0x423, 0x40e, 0x424, 0x425, 0x426, 0x427, 0x428, 0x42b, 0x42c, 0x42d, 0x42e, 0x42f, 0x430, 0x431, 0x432, 0x433, 0x434, 0x435, 0x451, 0x436, 0x437, 0x456, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e, 0x43f, 0x440, 0x441, 0x442, 0x443, 0x45e, 0x444, 0x445, 0x446, 0x447, 0x448, 0x44b, 0x44c, 0x44d, 0x44e, 0x44f, 0x418, 0x429, 0x42a, 0x438, 0x449, 0x44a, 0x403, 0x405, 0x408, 0x409, 0x40a, 0x40c, 0x40f, 0x453, 0x455, 0x458, 0x459, 0x45a, 0x45c, 0x45f, 0x402, 0x40b, 0x452, 0x45b, 0x490, 0x404, 0x407, 0x491, 0x454, 0x457, 0x4e8, 0x4ae, 0x4e9, 0x4af);
$chars = utf8_to_unicode($string);
for ($i = 0, $size = count($chars); $i < $size; ++$i) {
if (!in_array($chars[$i], $UTF8_ALPHA_CHARS)) {
unset($chars[$i]);
}
}
return unicode_to_utf8($chars);
}
示例6: utf8_strtoupper
function utf8_strtoupper($string)
{
static $lower_to_upper;
if ($lower_to_upper == null) {
$lower_to_upper = array(0x61 => 0x41, 0x3c6 => 0x3a6, 0x163 => 0x162, 0xe5 => 0xc5, 0x62 => 0x42, 0x13a => 0x139, 0xe1 => 0xc1, 0x142 => 0x141, 0x3cd => 0x38e, 0x101 => 0x100, 0x491 => 0x490, 0x3b4 => 0x394, 0x15b => 0x15a, 0x64 => 0x44, 0x3b3 => 0x393, 0xf4 => 0xd4, 0x44a => 0x42a, 0x439 => 0x419, 0x113 => 0x112, 0x43c => 0x41c, 0x15f => 0x15e, 0x144 => 0x143, 0xee => 0xce, 0x45e => 0x40e, 0x44f => 0x42f, 0x3ba => 0x39a, 0x155 => 0x154, 0x69 => 0x49, 0x73 => 0x53, 0x1e1f => 0x1e1e, 0x135 => 0x134, 0x447 => 0x427, 0x3c0 => 0x3a0, 0x438 => 0x418, 0xf3 => 0xd3, 0x440 => 0x420, 0x454 => 0x404, 0x435 => 0x415, 0x449 => 0x429, 0x14b => 0x14a, 0x431 => 0x411, 0x459 => 0x409, 0x1e03 => 0x1e02, 0xf6 => 0xd6, 0xf9 => 0xd9, 0x6e => 0x4e, 0x451 => 0x401, 0x3c4 => 0x3a4, 0x443 => 0x423, 0x15d => 0x15c, 0x453 => 0x403, 0x3c8 => 0x3a8, 0x159 => 0x158, 0x67 => 0x47, 0xe4 => 0xc4, 0x3ac => 0x386, 0x3ae => 0x389, 0x167 => 0x166, 0x3be => 0x39e, 0x165 => 0x164, 0x117 => 0x116, 0x109 => 0x108, 0x76 => 0x56, 0xfe => 0xde, 0x157 => 0x156, 0xfa => 0xda, 0x1e61 => 0x1e60, 0x1e83 => 0x1e82, 0xe2 => 0xc2, 0x119 => 0x118, 0x146 => 0x145, 0x70 => 0x50, 0x151 => 0x150, 0x44e => 0x42e, 0x129 => 0x128, 0x3c7 => 0x3a7, 0x13e => 0x13d, 0x442 => 0x422, 0x7a => 0x5a, 0x448 => 0x428, 0x3c1 => 0x3a1, 0x1e81 => 0x1e80, 0x16d => 0x16c, 0xf5 => 0xd5, 0x75 => 0x55, 0x177 => 0x176, 0xfc => 0xdc, 0x1e57 => 0x1e56, 0x3c3 => 0x3a3, 0x43a => 0x41a, 0x6d => 0x4d, 0x16b => 0x16a, 0x171 => 0x170, 0x444 => 0x424, 0xec => 0xcc, 0x169 => 0x168, 0x3bf => 0x39f, 0x6b => 0x4b, 0xf2 => 0xd2, 0xe0 => 0xc0, 0x434 => 0x414, 0x3c9 => 0x3a9, 0x1e6b => 0x1e6a, 0xe3 => 0xc3, 0x44d => 0x42d, 0x436 => 0x416, 0x1a1 => 0x1a0, 0x10d => 0x10c, 0x11d => 0x11c, 0xf0 => 0xd0, 0x13c => 0x13b, 0x45f => 0x40f, 0x45a => 0x40a, 0xe8 => 0xc8, 0x3c5 => 0x3a5, 0x66 => 0x46, 0xfd => 0xdd, 0x63 => 0x43, 0x21b => 0x21a, 0xea => 0xca, 0x3b9 => 0x399, 0x17a => 0x179, 0xef => 0xcf, 0x1b0 => 0x1af, 0x65 => 0x45, 0x3bb => 0x39b, 0x3b8 => 0x398, 0x3bc => 0x39c, 0x45c => 0x40c, 0x43f => 0x41f, 0x44c => 0x42c, 0xfe => 0xde, 0xf0 => 0xd0, 0x1ef3 => 0x1ef2, 0x68 => 0x48, 0xeb => 0xcb, 0x111 => 0x110, 0x433 => 0x413, 0x12f => 0x12e, 0xe6 => 0xc6, 0x78 => 0x58, 0x161 => 0x160, 0x16f => 0x16e, 0x3b1 => 0x391, 0x457 => 0x407, 0x173 => 0x172, 0xff => 0x178, 0x6f => 0x4f, 0x43b => 0x41b, 0x3b5 => 0x395, 0x445 => 0x425, 0x121 => 0x120, 0x17e => 0x17d, 0x17c => 0x17b, 0x3b6 => 0x396, 0x3b2 => 0x392, 0x3ad => 0x388, 0x1e85 => 0x1e84, 0x175 => 0x174, 0x71 => 0x51, 0x437 => 0x417, 0x1e0b => 0x1e0a, 0x148 => 0x147, 0x105 => 0x104, 0x458 => 0x408, 0x14d => 0x14c, 0xed => 0xcd, 0x79 => 0x59, 0x10b => 0x10a, 0x3ce => 0x38f, 0x72 => 0x52, 0x430 => 0x410, 0x455 => 0x405, 0x452 => 0x402, 0x127 => 0x126, 0x137 => 0x136, 0x12b => 0x12a, 0x3af => 0x38a, 0x44b => 0x42b, 0x6c => 0x4c, 0x3b7 => 0x397, 0x125 => 0x124, 0x219 => 0x218, 0xfb => 0xdb, 0x11f => 0x11e, 0x43e => 0x41e, 0x1e41 => 0x1e40, 0x3bd => 0x39d, 0x107 => 0x106, 0x3cb => 0x3ab, 0x446 => 0x426, 0xfe => 0xde, 0xe7 => 0xc7, 0x3ca => 0x3aa, 0x441 => 0x421, 0x432 => 0x412, 0x10f => 0x10e, 0xf8 => 0xd8, 0x77 => 0x57, 0x11b => 0x11a, 0x74 => 0x54, 0x6a => 0x4a, 0x45b => 0x40b, 0x456 => 0x406, 0x103 => 0x102, 0x3bb => 0x39b, 0xf1 => 0xd1, 0x43d => 0x41d, 0x3cc => 0x38c, 0xe9 => 0xc9, 0xf0 => 0xd0, 0x457 => 0x407, 0x123 => 0x122);
}
$unicode = utf8_to_unicode($string);
if (!$unicode) {
return false;
}
for ($i = 0; $i < count($unicode); $i++) {
if (isset($lower_to_upper[$unicode[$i]])) {
$unicode[$i] = $lower_to_upper[$unicode[$i]];
}
}
return unicode_to_utf8($unicode);
}
示例7: utf16be_to_utf8
/**
* UTF-8 to UTF-16BE conversion.
*
* Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
*/
protected function utf16be_to_utf8(&$str)
{
$uni = unpack('n*', $str);
return unicode_to_utf8($uni);
}
示例8: unicode_to_ansi
function unicode_to_ansi($string)
{
if (!strlen($string)) {
return '';
}
// check for unicode length validness
if (strlen($string) % 2 != 0) {
return '';
} else {
return Utf8ToWin(unicode_to_utf8($string));
}
// alternative
//return mb_convert_encoding($string, "cp1251", "UTF-16LE");
}
示例9: utf8_stripspecials
/**
* Removes special characters (nonalphanumeric) from a UTF-8 string
*
* Be sure to specify all specialchars you give in $repl in $keep, too
* or it won't work.
*
* This function adds the controlchars 0x00 to 0x19 to the array of
* stripped chars (they are not included in $UTF8_SPECIAL_CHARS)
*
* @author Andreas Gohr <andi@splitbrain.org>
* @param string $string The UTF8 string to strip of special chars
* @param string $repl Replace special with this string
* @param string $keep Special chars to keep (in UTF8)
*/
function utf8_stripspecials($string, $repl = '', $keep = '')
{
global $UTF8_SPECIAL_CHARS;
if ($keep != '') {
$specials = array_diff($UTF8_SPECIAL_CHARS, utf8_to_unicode($keep));
} else {
$specials = $UTF8_SPECIAL_CHARS;
}
$specials = unicode_to_utf8($specials);
$specials = preg_quote($specials, '/');
return preg_replace('/[\\x00-\\x19' . $specials . ']/u', $repl, $string);
}
示例10: gbk_to_utf8
/**
* gbk转utf8
* @param $gbstr
*/
function gbk_to_utf8($gbstr)
{
$filename = EXTENSION_DIR . 'encoding' . DIRECTORY_SEPARATOR . 'gb-unicode.table';
$CODETABLE = array();
$fp = fopen($filename, 'rb');
while ($l = fgets($fp, 15)) {
$CODETABLE[hexdec(substr($l, 0, 6))] = substr($l, 7, 6);
}
fclose($fp);
$ret = '';
$utf8 = '';
while ($gbstr) {
if (ord(substr($gbstr, 0, 1)) > 0x80) {
$thisW = substr($gbstr, 0, 2);
$gbstr = substr($gbstr, 2, strlen($gbstr));
$utf8 = '';
@($utf8 = unicode_to_utf8(hexdec($CODETABLE[hexdec(bin2hex($thisW)) - 0x8080])));
if ($utf8 != '') {
for ($i = 0; $i < strlen($utf8); $i += 3) {
$ret .= chr(substr($utf8, $i, 3));
}
}
} else {
$ret .= substr($gbstr, 0, 1);
$gbstr = substr($gbstr, 1, strlen($gbstr));
}
}
return $ret;
}
示例11: decode
/**
* decoding process
* - split the string into substrings at any occurrence of pre or post indicator characters
* - check the first character of the substring
* - if its not a pre_indicator character
* - if previous character was converted, skip over post_indicator character
* - copy codepoint values of remaining characters to the output array
* - clear any converted flag
* (continue to next substring)
*
* _ else (its a pre_indicator character)
* - if string length is 1, copy the post_indicator character to the output array
* (continue to next substring)
*
* - else (string length > 1)
* - skip the pre-indicator character and convert remaining string from base36 to base10
* - increase codepoint value for non-printable ASCII characters (add 0x20)
* - append codepoint to output array
* (continue to next substring)
*
* @param string $filename a 'safe' encoded ASCII string,
* @return string decoded utf8 representation of $filename
*
* @author Christopher Smith <chris@jalakai.co.uk>
*/
public function decode($filename)
{
return unicode_to_utf8(self::safe_to_unicode(strtolower($filename)));
}
示例12: while
//$i = 0;
while (strpos($buffer, '$') !== false) {
//echo $i++ . ": strpos=". (string)strpos($buffer, '$') ."\nbuffer: $buffer\n";
$replaced = false;
foreach ($translation as $symbol => $character) {
$sym_pos = strpos($buffer, $symbol);
if ($sym_pos !== false) {
$sym_length = strlen($symbol);
$piece1 = substr($buffer, 0, $sym_pos);
if ($character['switch']) {
// the character after the special charater needs to come before it
$partnerchar = utf8_encode($buffer[$sym_pos + $sym_length]);
$piece2 = unicode_to_utf8(array_merge(utf8_to_unicode($partnerchar), $character['unicode']));
$piece3start = $sym_pos + $sym_length + 1;
} else {
$piece2 = unicode_to_utf8($character['unicode']);
$piece3start = $sym_pos + $sym_length;
}
$piece2 = utf8_decode(UtfNormal::NFKC($piece2));
// strip out any ? characters, which are characters not existing in ISO-8859-1
$piece2 = str_replace('?', '', $piece2);
$piece3 = substr($buffer, $piece3start);
$buffer = $piece1 . $piece2 . $piece3;
$replaced = true;
continue;
}
}
if (!$replaced) {
// we've encountered some character that we have no translation for
echo "unable to find a translation to transform this buffer, the untranslatable code will be stripped out:\n{$buffer}\n";
$pieces = preg_split('/\\$\\d*/', $buffer, 2);
示例13: define
<?php
define('UNICODE_EMOJI_PATH', 'http://unicode.org/Public/UNIDATA/EmojiSources.txt');
define('JSON_WRITE_PATH', 'EmojiSources.json');
$contents = file_get_contents(UNICODE_EMOJI_PATH);
$pattern = '/^([0-9A-F\\s]+);([0-9A-F]+)?;([0-9A-F]+)?;([0-9A-F]+)?$/m';
$emojiList = array();
if (preg_match_all($pattern, $contents, $matches)) {
$j = sizeof($matches[1]);
for ($i = 0; $i < $j; $i++) {
$unicode = trim($matches[1][$i]);
if (strpos($unicode, ' ') !== FALSE) {
$array = explode(' ', $unicode);
$utf8hex = unicode_to_utf8($array[0]) . unicode_to_utf8($array[1]);
} else {
$utf8hex = unicode_to_utf8($unicode);
}
$map = array();
$map['unicode'] = $unicode;
$map['utf8hex'] = $utf8hex;
$map['sjis_docomo'] = $matches[2][$i];
$map['sjis_kddi'] = $matches[3][$i];
$map['sjis_softbank'] = $matches[4][$i];
$emojiList[] = $map;
}
$jsonData = json_encode($emojiList);
file_put_contents(JSON_WRITE_PATH, $jsonData);
printf("Created a file. [%s]\n", JSON_WRITE_PATH);
} else {
printf("[ERROR] Failed parse file. [%s]\n", UNICODE_EMOJI_PATH);
}
示例14: utf8_hangul_getSearchRule
function utf8_hangul_getSearchRule($str, $lastchar = 1, $use_unicode = true)
{
$rule = '';
$val = utf8_to_unicode($str);
$len = sizeof($val);
if ($lastchar and $len > 1) {
// make a regex using with the last char
$last = array_pop($val);
$rule = unicode_to_utf8($val);
$val = array($last);
$len = sizeof($val);
}
for ($i = 0; $i < $len; $i++) {
$ch = $val[$i];
$wch = array();
$ustart = array();
$uend = array();
if ($ch >= 0xac00 and $ch <= 0xd7a3 or $ch >= 0x3130 and $ch <= 0x318f) {
$wch = hangul_to_jamo(array($ch));
} else {
$rule .= unicode_to_utf8(array($ch));
continue;
}
$wlen = sizeof($wch);
$ket = '';
if ($wlen >= 3) {
// 종각 => 종(각|가[가-깋])
$mrule = array();
$mrule[] = unicode_to_utf8(array($ch));
$save = $wch[2];
unset($wch[2]);
$tmp = jamo_to_syllable($wch);
$mrule[] = unicode_to_utf8($tmp);
$save = hangul_jongseong_to_cjamo($save);
$wch = hangul_to_jamo($save);
$wlen = sizeof($wch);
$rule .= '(' . implode('|', $mrule);
$ket = ')';
if ($wlen > 1) {
$rule .= ')';
continue;
}
}
if ($wlen == 1) {
if ($wch[0] >= 0x1100 and $wch[0] <= 0x1112) {
$wch[1] = 0x1161;
$start = jamo_to_syllable($wch);
$ustart = unicode_to_utf8($start);
$wch[1] = 0x1175;
$wch[2] = 0x11c2;
$end = jamo_to_syllable($wch);
$uend = unicode_to_utf8($end);
} else {
$rule .= unicode_to_utf8($wch) . $ket;
continue;
}
} else {
if ($wlen == 2) {
if ($wch[0] >= 0x1100 and $wch[0] <= 0x1112) {
$start = jamo_to_syllable($wch);
$ustart = unicode_to_utf8($start);
$wch[2] = 0x11c2;
$end = jamo_to_syllable($wch);
$uend = unicode_to_utf8($end);
} else {
$rule .= unicode_to_utf8($wch);
continue;
}
}
}
if ($use_unicode) {
$crule = '[' . $ustart . '-' . $uend . ']';
} else {
$rule .= sprintf("\\x%02X", ord($ustart[0]));
$crule = '';
if ($ustart[1] == $uend[1]) {
$crule .= sprintf("\\x%02X", ord($ustart[1]));
$crule .= sprintf("[\\x%02X-\\x%02X]", ord($ustart[2]), ord($uend[2]));
} else {
$sch = ord($ustart[1]);
$ech = ord($uend[1]);
$subrule = array();
$subrule[] = sprintf("\\x%02X[\\x%02X-\\xBF]", $sch, ord($ustart[2]));
if ($sch + 1 == $ech - 1) {
$subrule[] = sprintf("\\x%02X[\\x80-\\xBF]", $sch + 1);
} else {
if ($sch + 1 != $ech) {
$subrule[] = sprintf("[\\x%02X-\\x%02X][\\x80-\\xBF]", $sch + 1, $ech - 1);
}
}
$subrule[] = sprintf("\\x%02X[\\x80-\\x%02X]", ord($uend[1]), ord($uend[2]));
$crule .= '(' . implode('|', $subrule) . ')';
}
}
$rule .= $crule . $ket;
}
return $rule;
}
示例15: unicode_to_utf8
function unicode_to_utf8($str)
{
return unicode_to_utf8($str);
}