本文整理汇总了PHP中codepointToUtf8函数的典型用法代码示例。如果您正苦于以下问题:PHP codepointToUtf8函数的具体用法?PHP codepointToUtf8怎么用?PHP codepointToUtf8使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了codepointToUtf8函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: hexSequenceToUtf8
/**
* Take a series of space-separated hexadecimal numbers representing
* Unicode code points and return a UTF-8 string composed of those
* characters. Used by UTF-8 data generation and testing routines.
*
* @param $sequence String
* @return String
* @private
*/
function hexSequenceToUtf8($sequence)
{
$utf = '';
foreach (explode(' ', $sequence) as $hex) {
$n = hexdec($hex);
$utf .= codepointToUtf8($n);
}
return $utf;
}
示例2: onCategoryMultisortSortkeys_getRadical
function onCategoryMultisortSortkeys_getRadical($data, $chcp)
{
if (!array_key_exists($chcp, $data->radicalStrokeCounts)) {
return '';
} else {
list($radicalId, $rest) = $data->radicalStrokeCounts[$chcp];
$radicalCp = $data->radicals[$radicalId];
return sprintf('%s%03d', codepointToUtf8($radicalCp), $rest);
}
}
示例3: XtestAllChars
/**
* This test is *very* expensive!
* @todo document
*/
function XtestAllChars()
{
$rep = UTF8_REPLACEMENT;
for ($i = 0x0; $i < UNICODE_MAX; $i++) {
$char = codepointToUtf8($i);
$clean = UtfNormal::cleanUp($char);
$x = sprintf("%04X", $i);
if ($i % 0x1000 == 0) {
echo "U+{$x}\n";
}
if ($i == 0x9 || $i == 0xa || $i == 0xd || $i > 0x1f && $i < UNICODE_SURROGATE_FIRST || $i > UNICODE_SURROGATE_LAST && $i < 0xfffe || $i > 0xffff && $i <= UNICODE_MAX) {
if (isset(UtfNormal::$utfCanonicalComp[$char]) || isset(UtfNormal::$utfCanonicalDecomp[$char])) {
$comp = UtfNormal::NFC($char);
$this->assertEquals(bin2hex($comp), bin2hex($clean), "U+{$x} should be decomposed");
} else {
$this->assertEquals(bin2hex($char), bin2hex($clean), "U+{$x} should be intact");
}
} else {
$this->assertEquals(bin2hex($rep), bin2hex($clean), $x);
}
}
}
示例4: fclose
print "{$total} ";
}
}
fclose($in);
$ok = reportResults($total, $success, $failure) && $ok;
$in = fopen("UnicodeData.txt", "rt");
if (!$in) {
print "Can't open UnicodeData.txt for reading.\n";
print "If necessary, fetch this file from the internet:\n";
print "http://www.unicode.org/Public/UNIDATA/UnicodeData.txt\n";
exit(-1);
}
print "Now testing invariants...\n";
while (false !== ($line = fgets($in))) {
$cols = explode(';', $line);
$char = codepointToUtf8(hexdec($cols[0]));
$desc = $cols[0] . ": " . $cols[1];
if ($char < " " || $char >= UTF8_SURROGATE_FIRST && $char <= UTF8_SURROGATE_LAST) {
# Can't check NULL with the ICU plugin, as null bytes fail in C land.
# Skip other control characters, as we strip them for XML safety.
# Surrogates are illegal on their own or in UTF-8, ignore.
continue;
}
if (empty($testedChars[$char])) {
$total++;
if (testInvariant($normalizer, $char, $desc)) {
$success++;
} else {
$failure++;
}
if ($total % 100 == 0) {
示例5: generateFirstChars
//.........这里部分代码省略.........
if (!isset($this->weights[$cp])) {
// Non-printable, ignore
continue;
}
foreach (StringUtils::explode('[', $allWeights) as $weightStr) {
preg_match_all('/[*.]([0-9A-F]+)/', $weightStr, $m);
if (!empty($m[1])) {
if ($m[1][0] !== '0000') {
$primary .= '.' . $m[1][0];
}
if ($m[1][2] !== '0000') {
$tertiary .= '.' . $m[1][2];
}
}
}
$this->weights[$cp] = $primary;
if ($tertiary === '.0008' || $tertiary === '.000E') {
$goodTertiaryChars[$cp] = true;
}
}
fclose($file);
// Identify groups of characters with the same primary weight
$this->groups = array();
asort($this->weights, SORT_STRING);
$prevWeight = reset($this->weights);
$group = array();
foreach ($this->weights as $cp => $weight) {
if ($weight !== $prevWeight) {
$this->groups[$prevWeight] = $group;
$prevWeight = $weight;
if (isset($this->groups[$weight])) {
$group = $this->groups[$weight];
} else {
$group = array();
}
}
$group[] = $cp;
}
if ($group) {
$this->groups[$prevWeight] = $group;
}
// If one character has a given primary weight sequence, and a second
// character has a longer primary weight sequence with an initial
// portion equal to the first character, then remove the second
// character. This avoids having characters like U+A732 (double A)
// polluting the basic latin sort area.
foreach ($this->groups as $weight => $group) {
if (preg_match('/(\\.[0-9A-F]*)\\./', $weight, $m)) {
if (isset($this->groups[$m[1]])) {
unset($this->groups[$weight]);
}
}
}
ksort($this->groups, SORT_STRING);
// Identify the header character in each group
$headerChars = array();
$prevChar = "";
$tertiaryCollator = new Collator('root');
$primaryCollator = new Collator('root');
$primaryCollator->setStrength(Collator::PRIMARY);
$numOutOfOrder = 0;
foreach ($this->groups as $weight => $group) {
$uncomposedChars = array();
$goodChars = array();
foreach ($group as $cp) {
if (isset($goodTertiaryChars[$cp])) {
$goodChars[] = $cp;
}
if (!isset($this->mappedChars[$cp])) {
$uncomposedChars[] = $cp;
}
}
$x = array_intersect($goodChars, $uncomposedChars);
if (!$x) {
$x = $uncomposedChars;
if (!$x) {
$x = $group;
}
}
// Use ICU to pick the lowest sorting character in the selection
$tertiaryCollator->sort($x);
$cp = $x[0];
$char = codepointToUtf8($cp);
$headerChars[] = $char;
if ($primaryCollator->compare($char, $prevChar) <= 0) {
$numOutOfOrder++;
/*
printf( "Out of order: U+%05X > U+%05X\n",
utf8ToCodepoint( $prevChar ),
utf8ToCodepoint( $char ) );
*/
}
$prevChar = $char;
if ($this->debugOutFile) {
fwrite($this->debugOutFile, sprintf("%05X %s %s (%s)\n", $cp, $weight, $char, implode(' ', array_map('codepointToUtf8', $group))));
}
}
print "Out of order: {$numOutOfOrder} / " . count($headerChars) . "\n";
fwrite($outFile, serialize($headerChars));
}
示例6: prepareWindows1252
/**
* Prepare a conversion array for converting Windows Code Page 1252 to
* UTF-8. This should provide proper conversion of text that was miscoded
* as Windows-1252 by naughty user-agents, and doesn't rely on an outside
* iconv library.
*
* @return array
* @access private
*/
function prepareWindows1252()
{
# Mappings from:
# http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT
static $cp1252 = array(0x80 => 0x20ac, 0x81 => 0xfffd, 0x82 => 0x201a, 0x83 => 0x192, 0x84 => 0x201e, 0x85 => 0x2026, 0x86 => 0x2020, 0x87 => 0x2021, 0x88 => 0x2c6, 0x89 => 0x2030, 0x8a => 0x160, 0x8b => 0x2039, 0x8c => 0x152, 0x8d => 0xfffd, 0x8e => 0x17d, 0x8f => 0xfffd, 0x90 => 0xfffd, 0x91 => 0x2018, 0x92 => 0x2019, 0x93 => 0x201c, 0x94 => 0x201d, 0x95 => 0x2022, 0x96 => 0x2013, 0x97 => 0x2014, 0x98 => 0x2dc, 0x99 => 0x2122, 0x9a => 0x161, 0x9b => 0x203a, 0x9c => 0x153, 0x9d => 0xfffd, 0x9e => 0x17e, 0x9f => 0x178);
$pairs = array();
for ($i = 0; $i < 0x100; $i++) {
$unicode = isset($cp1252[$i]) ? $cp1252[$i] : $i;
$pairs[chr($i)] = codepointToUtf8($unicode);
}
return $pairs;
}
示例7: array
}
$wikiUpperChars = array();
$wikiLowerChars = array();
print "Reading character definitions...\n";
while (false !== ($line = fgets($in))) {
$columns = explode(';', $line);
$codepoint = $columns[0];
$name = $columns[1];
$simpleUpper = $columns[12];
$simpleLower = $columns[13];
$source = codepointToUtf8(hexdec($codepoint));
if ($simpleUpper) {
$wikiUpperChars[$source] = codepointToUtf8(hexdec($simpleUpper));
}
if ($simpleLower) {
$wikiLowerChars[$source] = codepointToUtf8(hexdec($simpleLower));
}
}
fclose($in);
$out = fopen("Utf8Case.php", "wt");
if ($out) {
$outUpperChars = escapeArray($wikiUpperChars);
$outLowerChars = escapeArray($wikiLowerChars);
$outdata = "<" . "?php\n/**\n * Simple 1:1 upper/lowercase switching arrays for utf-8 text.\n * Won't get context-sensitive things yet.\n *\n * Hack for bugs in ucfirst() and company\n *\n * These are pulled from memcached if possible, as this is faster than filling\n * up a big array manually.\n *\n * @file\n * @ingroup Language\n */\n\n/**\n * Translation array to get upper case character\n */\n\$wikiUpperChars = {$outUpperChars};\n\n/**\n * Translation array to get lower case character\n */\n\$wikiLowerChars = {$outLowerChars};\n";
fputs($out, $outdata);
fclose($out);
print "Wrote out Utf8Case.php\n";
} else {
print "Can't create file Utf8Case.php\n";
exit(-1);
}
示例8: getItemParsedCallback
/**
* callback for replacement of unicode notations
* @param array preg matches
* @return string replacement string
*/
private function getItemParsedCallback($matches)
{
require_once "include/Unicode/UtfNormalUtil.php";
return codepointToUtf8(hexdec(substr($matches[0], 2)));
}
示例9: execute
public function execute()
{
$dir = __DIR__;
$endl = "\n";
$lines = file("{$dir}/equivset.in");
if (!$lines) {
$this->error("Unable to open equivset.in\n", 1);
}
$setsFile = fopen("{$dir}/equivset.txt", 'w');
if (!$setsFile) {
$this->error("Unable to open equivset.txt for writing\n", 1);
}
fwrite($setsFile, <<<EOT
# This file is generated by generateEquivset.php
# It shows sets of equivalent characters, one set per line, with characters
# separated by whitespace. This file is not used by MediaWiki, rather it is
# intended as a human-readable version of equivset.php, for debugging and
# review purposes.
EOT
);
$outputFile = fopen("{$dir}/equivset.php", 'w');
if (!$outputFile) {
$this->error("Unable to open equivset.php for writing\n", 1);
}
fwrite($outputFile, "<?" . "php{$endl}" . <<<EOT
# This file is generated by generateEquivset.php
# It contains a map of characters, encoded in UTF-8, such that running strtr()
# on a string with this map will cause confusable characters to be reduced to
# a canonical representation. The same array is also available in serialized
# form, in equivset.ser.
EOT
);
$serializedFile = fopen("{$dir}/equivset.ser", 'w');
if (!$serializedFile) {
$this->error("Unable to open equivset.ser for writing\n", 1);
}
# \s matches \xa0 in non-unicode mode, which is not what we want
# So we need to make our own whitespace class
$sp = '[\\ \\t]';
$lineNum = 0;
$setsByChar = array();
$sets = array();
$exitStatus = 0;
foreach ($lines as $line) {
++$lineNum;
$mapToEmpty = false;
# Whether the line ends with a null character
$mapToEmpty = strpos($line, "") === strlen($line) - 2;
$line = trim($line);
# Filter comments
if (!$line || $line[0] == '#') {
continue;
}
# Process line
if (!preg_match("/^(?P<hexleft> [A-F0-9]+) {$sp}+ (?P<charleft> .+?) {$sp}+ => {$sp}+ (?:(?P<hexright> [A-F0-9]+) {$sp}+|) (?P<charright> .+?) {$sp}* (?: \\#.*|) \$ /x", $line, $m)) {
$this->output("Error: invalid entry at line {$lineNum}: {$line}\n");
$exitStatus = 1;
continue;
}
$error = false;
if ($mapToEmpty) {
$m['charright'] = '';
} else {
if (codepointToUtf8(hexdec($m['hexleft'])) != $m['charleft']) {
$actual = utf8ToCodepoint($m['charleft']);
if ($actual === false) {
$this->output("Bytes: " . strlen($m['charleft']) . "\n");
$this->output(bin2hex($line) . "\n");
$hexForm = bin2hex($m['charleft']);
$this->output("Invalid UTF-8 character \"{$m['charleft']}\" ({$hexForm}) at line {$lineNum}: {$line}\n");
} else {
$this->output("Error: left number ({$m['hexleft']}) does not match left character ({$actual}) " . "at line {$lineNum}: {$line}\n");
}
$error = true;
}
if (!empty($m['hexright']) && codepointToUtf8(hexdec($m['hexright'])) != $m['charright']) {
$actual = utf8ToCodepoint($m['charright']);
if ($actual === false) {
$hexForm = bin2hex($m['charright']);
$this->output("Invalid UTF-8 character \"{$m['charleft']}\" ({$hexForm}) at line {$lineNum}: {$line}\n");
} else {
$this->output("Error: right number ({$m['hexright']}) does not match right character ({$actual}) " . "at line {$lineNum}: {$line}\n");
}
$error = true;
}
if ($error) {
$exitStatus = 1;
continue;
}
}
# Find the set for the right character, add a new one if necessary
if (isset($setsByChar[$m['charright']])) {
$setName = $setsByChar[$m['charright']];
} else {
# New set
$setName = $m['charright'];
$sets[$setName] = array($m['charright']);
$setsByChar[$setName] = $setName;
//.........这里部分代码省略.........
示例10: badCharErr
private static function badCharErr($msgId, $point)
{
$symbol = codepointToUtf8($point);
// Combining marks are combined with the previous character. If abusing character is a
// combining mark, prepend it with space to show them correctly.
if (self::getScriptCode($point) == "SCRIPT_COMBINING_MARKS") {
$symbol = ' ' . $symbol;
}
$code = sprintf('U+%04X', $point);
if (preg_match('/\\A\\p{C}\\z/u', $symbol)) {
$char = wfMessage('antispoof-bad-char-non-printable', $code)->text();
} else {
$char = wfMessage('antispoof-bad-char', $symbol, $code)->text();
}
return array("ERROR", wfMessage($msgId, $char)->text());
}
示例11: js_unescape
/**
* Function converts an Javascript escaped string back into a string with
* specified charset (default is UTF-8).
* Modified function from http://pure-essence.net/stuff/code/utf8RawUrlDecode.phps
*
* @param $source String escaped with Javascript's escape() function
* @param $iconv_to String destination character set will be used as second parameter
* in the iconv function. Default is UTF-8.
* @return string
*/
function js_unescape($source, $iconv_to = 'UTF-8')
{
$decodedStr = '';
$pos = 0;
$len = strlen($source);
while ($pos < $len) {
$charAt = substr($source, $pos, 1);
if ($charAt == '%') {
$pos++;
$charAt = substr($source, $pos, 1);
if ($charAt == 'u') {
// we got a unicode character
$pos++;
$unicodeHexVal = substr($source, $pos, 4);
$unicode = hexdec($unicodeHexVal);
$decodedStr .= codepointToUtf8($unicode);
$pos += 4;
} else {
// we have an escaped ascii character
$hexVal = substr($source, $pos, 2);
$decodedStr .= chr(hexdec($hexVal));
$pos += 2;
}
} else {
$decodedStr .= $charAt;
$pos++;
}
}
if ($iconv_to != "UTF-8") {
$decodedStr = iconv("utf-8", $iconv_to, $decodedStr);
}
return $decodedStr;
}
示例12: listToString
/**
* @param $list array
* @return string
*/
public static function listToString($list)
{
$out = '';
foreach ($list as $cp) {
$out .= codepointToUtf8($cp);
}
return $out;
}
示例13: hexUnicodeToUtf8
function hexUnicodeToUtf8($hexcp)
{
return @codepointToUtf8(@hexDec($hexcp));
}
示例14: decodeEntity
/**
* If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD,
* return the UTF-8 encoding of that character. Otherwise, returns
* pseudo-entity source (eg &foo;)
*
* @param string $name
* @return string
*/
static function decodeEntity($name)
{
global $wgHtmlEntities, $wgHtmlEntityAliases;
if (isset($wgHtmlEntityAliases[$name])) {
$name = $wgHtmlEntityAliases[$name];
}
if (isset($wgHtmlEntities[$name])) {
return codepointToUtf8($wgHtmlEntities[$name]);
} else {
return "&{$name};";
}
}
示例15: decodeChar
/**
* Return UTF-8 string for a codepoint if that is a valid
* character reference, otherwise U+FFFD REPLACEMENT CHARACTER.
* @param $codepoint Integer
* @return String
*/
static function decodeChar($codepoint)
{
if (SGString::validateCodepoint($codepoint)) {
return codepointToUtf8($codepoint);
} else {
return UTF8_REPLACEMENT;
}
}