本文整理汇总了PHP中UtfNormal::loadData方法的典型用法代码示例。如果您正苦于以下问题:PHP UtfNormal::loadData方法的具体用法?PHP UtfNormal::loadData怎么用?PHP UtfNormal::loadData使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类UtfNormal
的用法示例。
在下文中一共展示了UtfNormal::loadData方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: die
if (PHP_SAPI != 'cli') {
die("Run me from the command line please.\n");
}
if (isset($_SERVER['argv']) && in_array('--icu', $_SERVER['argv'])) {
dl('php_utfnormal.so');
}
require_once 'UtfNormalDefines.php';
require_once 'UtfNormalUtil.php';
require_once 'UtfNormal.php';
define('BENCH_CYCLES', 1);
define('BIGSIZE', 1024 * 1024 * 10);
// 10m
ini_set('memory_limit', BIGSIZE + 120 * 1024 * 1024);
$testfiles = array('testdata/washington.txt' => 'English text', 'testdata/berlin.txt' => 'German text', 'testdata/bulgakov.txt' => 'Russian text', 'testdata/tokyo.txt' => 'Japanese text', 'testdata/young.txt' => 'Korean text');
$normalizer = new UtfNormal();
UtfNormal::loadData();
foreach ($testfiles as $file => $desc) {
benchmarkTest($normalizer, $file, $desc);
}
# -------
function benchmarkTest(&$u, $filename, $desc)
{
print "Testing {$filename} ({$desc})...\n";
$data = file_get_contents($filename);
$all = $data;
while (strlen($all) < BIGSIZE) {
$all .= $all;
}
$data = $all;
echo "Data is " . strlen($data) . " bytes.\n";
$forms = array('quickIsNFCVerify', 'cleanUp');
示例2: fastCompose
/**
* Produces canonically composed sequences, i.e. normal form C or KC.
*
* @private
* @param $string String: a valid UTF-8 string in sorted normal form D or KD. Input is not validated.
* @return string a UTF-8 string with canonical precomposed characters used where possible
*/
static function fastCompose($string)
{
UtfNormal::loadData();
$len = strlen($string);
$out = '';
$lastClass = -1;
$lastHangul = 0;
$startChar = '';
$combining = '';
$x1 = ord(substr(UTF8_HANGUL_VBASE, 0, 1));
$x2 = ord(substr(UTF8_HANGUL_TEND, 0, 1));
for ($i = 0; $i < $len; $i++) {
$c = $string[$i];
$n = ord($c);
if ($n < 0x80) {
# No combining characters here...
$out .= $startChar;
$out .= $combining;
$startChar = $c;
$combining = '';
$lastClass = 0;
continue;
} elseif ($n >= 0xf0) {
$c = substr($string, $i, 4);
$i += 3;
} elseif ($n >= 0xe0) {
$c = substr($string, $i, 3);
$i += 2;
} elseif ($n >= 0xc0) {
$c = substr($string, $i, 2);
$i++;
}
$pair = $startChar . $c;
if ($n > 0x80) {
if (isset(self::$utfCombiningClass[$c])) {
# A combining char; see what we can do with it
$class = self::$utfCombiningClass[$c];
if (!empty($startChar) && $lastClass < $class && $class > 0 && isset(self::$utfCanonicalComp[$pair])) {
$startChar = self::$utfCanonicalComp[$pair];
$class = 0;
} else {
$combining .= $c;
}
$lastClass = $class;
$lastHangul = 0;
continue;
}
}
# New start char
if ($lastClass == 0) {
if (isset(self::$utfCanonicalComp[$pair])) {
$startChar = self::$utfCanonicalComp[$pair];
$lastHangul = 0;
continue;
}
if ($n >= $x1 && $n <= $x2) {
# WARNING: Hangul code is painfully slow.
# I apologize for this ugly, ugly code; however
# performance is even more teh suck if we call
# out to nice clean functions. Lookup tables are
# marginally faster, but require a lot of space.
#
if ($c >= UTF8_HANGUL_VBASE && $c <= UTF8_HANGUL_VEND && $startChar >= UTF8_HANGUL_LBASE && $startChar <= UTF8_HANGUL_LEND) {
#
#$lIndex = utf8ToCodepoint( $startChar ) - UNICODE_HANGUL_LBASE;
#$vIndex = utf8ToCodepoint( $c ) - UNICODE_HANGUL_VBASE;
$lIndex = ord($startChar[2]) - 0x80;
$vIndex = ord($c[2]) - 0xa1;
$hangulPoint = UNICODE_HANGUL_FIRST + UNICODE_HANGUL_TCOUNT * (UNICODE_HANGUL_VCOUNT * $lIndex + $vIndex);
# Hardcode the limited-range UTF-8 conversion:
$startChar = chr($hangulPoint >> 12 & 0xf | 0xe0) . chr($hangulPoint >> 6 & 0x3f | 0x80) . chr($hangulPoint & 0x3f | 0x80);
$lastHangul = 0;
continue;
} elseif ($c >= UTF8_HANGUL_TBASE && $c <= UTF8_HANGUL_TEND && $startChar >= UTF8_HANGUL_FIRST && $startChar <= UTF8_HANGUL_LAST && !$lastHangul) {
# $tIndex = utf8ToCodepoint( $c ) - UNICODE_HANGUL_TBASE;
$tIndex = ord($c[2]) - 0xa7;
if ($tIndex < 0) {
$tIndex = ord($c[2]) - 0x80 + (0x11c0 - 0x11a7);
}
# Increment the code point by $tIndex, without
# the function overhead of decoding and recoding UTF-8
#
$tail = ord($startChar[2]) + $tIndex;
if ($tail > 0xbf) {
$tail -= 0x40;
$mid = ord($startChar[1]) + 1;
if ($mid > 0xbf) {
$startChar[0] = chr(ord($startChar[0]) + 1);
$mid -= 0x40;
}
$startChar[1] = chr($mid);
}
$startChar[2] = chr($tail);
//.........这里部分代码省略.........