本文整理汇总了PHP中UtfNormal::quickIsNFCVerify方法的典型用法代码示例。如果您正苦于以下问题:PHP UtfNormal::quickIsNFCVerify方法的具体用法?PHP UtfNormal::quickIsNFCVerify怎么用?PHP UtfNormal::quickIsNFCVerify使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类UtfNormal
的用法示例。
在下文中一共展示了UtfNormal::quickIsNFCVerify方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: cleanUp
/**
* The ultimate convenience function! Clean up invalid UTF-8 sequences,
* and convert to normal form C, canonical composition.
*
* Fast return for pure ASCII strings; some lesser optimizations for
* strings containing only known-good characters. Not as fast as toNFC().
*
* @param string $string a UTF-8 string
* @return string a clean, shiny, normalized UTF-8 string
* @static
*/
static function cleanUp($string)
{
if (NORMALIZE_ICU) {
# We exclude a few chars that ICU would not.
$string = preg_replace('/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f]/', UTF8_REPLACEMENT, $string);
$string = str_replace(UTF8_FFFE, UTF8_REPLACEMENT, $string);
$string = str_replace(UTF8_FFFF, UTF8_REPLACEMENT, $string);
# UnicodeString constructor fails if the string ends with a
# head byte. Add a junk char at the end, we'll strip it off.
return rtrim(utf8_normalize($string . "", UNORM_NFC), "");
} elseif (UtfNormal::quickIsNFCVerify($string)) {
# Side effect -- $string has had UTF-8 errors cleaned up.
return $string;
} else {
return UtfNormal::NFC($string);
}
}
示例2: convIPTCHelper
/**
* Helper function of a helper function to convert charset for iptc values.
* @param string|array $data The IPTC string
* @param string $charset The charset
*
* @return string
*/
private static function convIPTCHelper($data, $charset)
{
if ($charset) {
wfSuppressWarnings();
$data = iconv($charset, "UTF-8//IGNORE", $data);
wfRestoreWarnings();
if ($data === false) {
$data = "";
wfDebugLog('iptc', __METHOD__ . " Error converting iptc data charset {$charset} to utf-8");
}
} else {
//treat as utf-8 if is valid utf-8. otherwise pretend its windows-1252
// most of the time if there is no 1:90 tag, it is either ascii, latin1, or utf-8
$oldData = $data;
UtfNormal::quickIsNFCVerify($data);
//make $data valid utf-8
if ($data === $oldData) {
return $data;
//if validation didn't change $data
} else {
return self::convIPTCHelper($oldData, 'Windows-1252');
}
}
return trim($data);
}
示例3: charCodeString
/**
* Do userComment tags and similar. See pg. 34 of exif standard.
* basically first 8 bytes is charset, rest is value.
* This has not been tested on any shift-JIS strings.
* @param string $prop prop name.
*/
private function charCodeString($prop)
{
if (isset($this->mFilteredExifData[$prop])) {
if (strlen($this->mFilteredExifData[$prop]) <= 8) {
//invalid. Must be at least 9 bytes long.
$this->debug($this->mFilteredExifData[$prop], __FUNCTION__, false);
unset($this->mFilteredExifData[$prop]);
return;
}
$charCode = substr($this->mFilteredExifData[$prop], 0, 8);
$val = substr($this->mFilteredExifData[$prop], 8);
switch ($charCode) {
case "JIS":
//JIS
$charset = "Shift-JIS";
break;
case "UNICODE":
$charset = "UTF-16" . $this->byteOrder;
break;
default:
//ascii or undefined.
$charset = "";
break;
}
// This could possibly check to see if iconv is really installed
// or if we're using the compatibility wrapper in globalFunctions.php
if ($charset) {
wfSuppressWarnings();
$val = iconv($charset, 'UTF-8//IGNORE', $val);
wfRestoreWarnings();
} else {
// if valid utf-8, assume that, otherwise assume windows-1252
$valCopy = $val;
UtfNormal::quickIsNFCVerify($valCopy);
//validates $valCopy.
if ($valCopy !== $val) {
wfSuppressWarnings();
$val = iconv('Windows-1252', 'UTF-8//IGNORE', $val);
wfRestoreWarnings();
}
}
//trim and check to make sure not only whitespace.
$val = trim($val);
if (strlen($val) === 0) {
//only whitespace.
$this->debug($this->mFilteredExifData[$prop], __FUNCTION__, "{$prop}: Is only whitespace");
unset($this->mFilteredExifData[$prop]);
return;
}
//all's good.
$this->mFilteredExifData[$prop] = $val;
}
}
示例4: cleanUp
/**
* The ultimate convenience function! Clean up invalid UTF-8 sequences,
* and convert to normal form C, canonical composition.
*
* Fast return for pure ASCII strings; some lesser optimizations for
* strings containing only known-good characters. Not as fast as toNFC().
*
* @param $string String: a UTF-8 string
* @return string a clean, shiny, normalized UTF-8 string
*/
static function cleanUp($string)
{
if (NORMALIZE_ICU) {
$string = self::replaceForNativeNormalize($string);
# UnicodeString constructor fails if the string ends with a
# head byte. Add a junk char at the end, we'll strip it off.
return rtrim(utf8_normalize($string . "", UNORM_NFC), "");
} elseif (NORMALIZE_INTL) {
$string = self::replaceForNativeNormalize($string);
$norm = normalizer_normalize($string, Normalizer::FORM_C);
if ($norm === null || $norm === false) {
# normalizer_normalize will either return false or null
# (depending on which doc you read) if invalid utf8 string.
# quickIsNFCVerify cleans up invalid sequences.
if (UtfNormal::quickIsNFCVerify($string)) {
# if that's true, the string is actually already normal.
return $string;
} else {
# Now we are valid but non-normal
return normalizer_normalize($string, Normalizer::FORM_C);
}
} else {
return $norm;
}
} elseif (UtfNormal::quickIsNFCVerify($string)) {
# Side effect -- $string has had UTF-8 errors cleaned up.
return $string;
} else {
return UtfNormal::NFC($string);
}
}
示例5: segmentSplitter
/** Function to extract metadata segments of interest from jpeg files
* based on GIFMetadataExtractor.
*
* we can almost use getimagesize to do this
* but gis doesn't support having multiple app1 segments
* and those can't extract xmp on files containing both exif and xmp data
*
* @param string $filename Name of jpeg file
* @return array Array of interesting segments.
* @throws MWException If given invalid file.
*/
static function segmentSplitter($filename)
{
$showXMP = XMPReader::isSupported();
$segmentCount = 0;
$segments = array('XMP_ext' => array(), 'COM' => array(), 'PSIR' => array());
if (!$filename) {
throw new MWException("No filename specified for " . __METHOD__);
}
if (!file_exists($filename) || is_dir($filename)) {
throw new MWException("Invalid file {$filename} passed to " . __METHOD__);
}
$fh = fopen($filename, "rb");
if (!$fh) {
throw new MWException("Could not open file {$filename}");
}
$buffer = fread($fh, 2);
if ($buffer !== "ÿØ") {
throw new MWException("Not a jpeg, no SOI");
}
while (!feof($fh)) {
$buffer = fread($fh, 1);
$segmentCount++;
if ($segmentCount > self::MAX_JPEG_SEGMENTS) {
// this is just a sanity check
throw new MWException('Too many jpeg segments. Aborting');
}
if ($buffer !== "ÿ") {
throw new MWException("Error reading jpeg file marker. " . "Expected 0xFF but got " . bin2hex($buffer));
}
$buffer = fread($fh, 1);
while ($buffer === "ÿ" && !feof($fh)) {
// Skip through any 0xFF padding bytes.
$buffer = fread($fh, 1);
}
if ($buffer === "þ") {
// COM section -- file comment
// First see if valid utf-8,
// if not try to convert it to windows-1252.
$com = $oldCom = trim(self::jpegExtractMarker($fh));
UtfNormal::quickIsNFCVerify($com);
// turns $com to valid utf-8.
// thus if no change, its utf-8, otherwise its something else.
if ($com !== $oldCom) {
wfSuppressWarnings();
$com = $oldCom = iconv('windows-1252', 'UTF-8//IGNORE', $oldCom);
wfRestoreWarnings();
}
// Try it again, if its still not a valid string, then probably
// binary junk or some really weird encoding, so don't extract.
UtfNormal::quickIsNFCVerify($com);
if ($com === $oldCom) {
$segments["COM"][] = $oldCom;
} else {
wfDebug(__METHOD__ . " Ignoring JPEG comment as is garbage.\n");
}
} elseif ($buffer === "á") {
// APP1 section (Exif, XMP, and XMP extended)
// only extract if XMP is enabled.
$temp = self::jpegExtractMarker($fh);
// check what type of app segment this is.
if (substr($temp, 0, 29) === "http://ns.adobe.com/xap/1.0/" && $showXMP) {
$segments["XMP"] = substr($temp, 29);
} elseif (substr($temp, 0, 35) === "http://ns.adobe.com/xmp/extension/" && $showXMP) {
$segments["XMP_ext"][] = substr($temp, 35);
} elseif (substr($temp, 0, 29) === "XMP://ns.adobe.com/xap/1.0/" && $showXMP) {
// Some images (especially flickr images) seem to have this.
// I really have no idea what the deal is with them, but
// whatever...
$segments["XMP"] = substr($temp, 29);
wfDebug(__METHOD__ . ' Found XMP section with wrong app identifier ' . "Using anyways.\n");
} elseif (substr($temp, 0, 6) === "Exif") {
// Just need to find out what the byte order is.
// because php's exif plugin sucks...
// This is a II for little Endian, MM for big. Not a unicode BOM.
$byteOrderMarker = substr($temp, 6, 2);
if ($byteOrderMarker === 'MM') {
$segments['byteOrder'] = 'BE';
} elseif ($byteOrderMarker === 'II') {
$segments['byteOrder'] = 'LE';
} else {
wfDebug(__METHOD__ . " Invalid byte ordering?!\n");
}
}
} elseif ($buffer === "í") {
// APP13 - PSIR. IPTC and some photoshop stuff
$temp = self::jpegExtractMarker($fh);
if (substr($temp, 0, 14) === "Photoshop 3.0") {
$segments["PSIR"][] = $temp;
}
//.........这里部分代码省略.........
示例6: getMetadata
/**
* @throws Exception
* @param $filename string
* @return array
*/
static function getMetadata($filename)
{
self::$gif_frame_sep = pack("C", ord(","));
self::$gif_extension_sep = pack("C", ord("!"));
self::$gif_term = pack("C", ord(";"));
$frameCount = 0;
$duration = 0.0;
$isLooped = false;
$xmp = "";
$comment = array();
if (!$filename) {
throw new Exception("No file name specified");
} elseif (!file_exists($filename) || is_dir($filename)) {
throw new Exception("File {$filename} does not exist");
}
$fh = fopen($filename, 'rb');
if (!$fh) {
throw new Exception("Unable to open file {$filename}");
}
// Check for the GIF header
$buf = fread($fh, 6);
if (!($buf == 'GIF87a' || $buf == 'GIF89a')) {
throw new Exception("Not a valid GIF file; header: {$buf}");
}
// Skip over width and height.
fread($fh, 4);
// Read BPP
$buf = fread($fh, 1);
$bpp = self::decodeBPP($buf);
// Skip over background and aspect ratio
fread($fh, 2);
// Skip over the GCT
self::readGCT($fh, $bpp);
while (!feof($fh)) {
$buf = fread($fh, 1);
if ($buf == self::$gif_frame_sep) {
// Found a frame
$frameCount++;
## Skip bounding box
fread($fh, 8);
## Read BPP
$buf = fread($fh, 1);
$bpp = self::decodeBPP($buf);
## Read GCT
self::readGCT($fh, $bpp);
fread($fh, 1);
self::skipBlock($fh);
} elseif ($buf == self::$gif_extension_sep) {
$buf = fread($fh, 1);
if (strlen($buf) < 1) {
throw new Exception("Ran out of input");
}
$extension_code = unpack('C', $buf);
$extension_code = $extension_code[1];
if ($extension_code == 0xf9) {
// Graphics Control Extension.
fread($fh, 1);
// Block size
fread($fh, 1);
// Transparency, disposal method, user input
$buf = fread($fh, 2);
// Delay, in hundredths of seconds.
if (strlen($buf) < 2) {
throw new Exception("Ran out of input");
}
$delay = unpack('v', $buf);
$delay = $delay[1];
$duration += $delay * 0.01;
fread($fh, 1);
// Transparent colour index
$term = fread($fh, 1);
// Should be a terminator
if (strlen($term) < 1) {
throw new Exception("Ran out of input");
}
$term = unpack('C', $term);
$term = $term[1];
if ($term != 0) {
throw new Exception("Malformed Graphics Control Extension block");
}
} elseif ($extension_code == 0xfe) {
// Comment block(s).
$data = self::readBlock($fh);
if ($data === "") {
throw new Exception('Read error, zero-length comment block');
}
// The standard says this should be ASCII, however its unclear if
// thats true in practise. Check to see if its valid utf-8, if so
// assume its that, otherwise assume its windows-1252 (iso-8859-1)
$dataCopy = $data;
// quickIsNFCVerify has the side effect of replacing any invalid characters
UtfNormal::quickIsNFCVerify($dataCopy);
if ($dataCopy !== $data) {
wfSuppressWarnings();
$data = iconv('windows-1252', 'UTF-8', $data);
//.........这里部分代码省略.........