当前位置: 首页>>代码示例>>PHP>>正文


PHP UtfNormal::quickIsNFCVerify方法代码示例

本文整理汇总了PHP中UtfNormal::quickIsNFCVerify方法的典型用法代码示例。如果您正苦于以下问题:PHP UtfNormal::quickIsNFCVerify方法的具体用法?PHP UtfNormal::quickIsNFCVerify怎么用?PHP UtfNormal::quickIsNFCVerify使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在UtfNormal的用法示例。


在下文中一共展示了UtfNormal::quickIsNFCVerify方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。

示例1: cleanUp

 /**
  * The ultimate convenience function! Clean up invalid UTF-8 sequences,
  * and convert to normal form C, canonical composition.
  *
  * Fast return for pure ASCII strings; some lesser optimizations for
  * strings containing only known-good characters. Not as fast as toNFC().
  *
  * @param string $string a UTF-8 string
  * @return string a clean, shiny, normalized UTF-8 string
  * @static
  */
 static function cleanUp($string)
 {
     if (NORMALIZE_ICU) {
         # We exclude a few chars that ICU would not.
         $string = preg_replace('/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f]/', UTF8_REPLACEMENT, $string);
         $string = str_replace(UTF8_FFFE, UTF8_REPLACEMENT, $string);
         $string = str_replace(UTF8_FFFF, UTF8_REPLACEMENT, $string);
         # UnicodeString constructor fails if the string ends with a
         # head byte. Add a junk char at the end, we'll strip it off.
         return rtrim(utf8_normalize($string . "", UNORM_NFC), "");
     } elseif (UtfNormal::quickIsNFCVerify($string)) {
         # Side effect -- $string has had UTF-8 errors cleaned up.
         return $string;
     } else {
         return UtfNormal::NFC($string);
     }
 }
开发者ID:BackupTheBerlios,项目名称:shoutwiki-svn,代码行数:28,代码来源:UtfNormal.php

示例2: convIPTCHelper

 /**
  * Helper function of a helper function to convert charset for iptc values.
  * @param string|array $data The IPTC string
  * @param string $charset The charset
  *
  * @return string
  */
 private static function convIPTCHelper($data, $charset)
 {
     if ($charset) {
         wfSuppressWarnings();
         $data = iconv($charset, "UTF-8//IGNORE", $data);
         wfRestoreWarnings();
         if ($data === false) {
             $data = "";
             wfDebugLog('iptc', __METHOD__ . " Error converting iptc data charset {$charset} to utf-8");
         }
     } else {
         //treat as utf-8 if is valid utf-8. otherwise pretend its windows-1252
         // most of the time if there is no 1:90 tag, it is either ascii, latin1, or utf-8
         $oldData = $data;
         UtfNormal::quickIsNFCVerify($data);
         //make $data valid utf-8
         if ($data === $oldData) {
             return $data;
             //if validation didn't change $data
         } else {
             return self::convIPTCHelper($oldData, 'Windows-1252');
         }
     }
     return trim($data);
 }
开发者ID:Tarendai,项目名称:spring-website,代码行数:32,代码来源:IPTC.php

示例3: charCodeString

 /**
  * Do userComment tags and similar. See pg. 34 of exif standard.
  * basically first 8 bytes is charset, rest is value.
  * This has not been tested on any shift-JIS strings.
  * @param string $prop prop name.
  */
 private function charCodeString($prop)
 {
     if (isset($this->mFilteredExifData[$prop])) {
         if (strlen($this->mFilteredExifData[$prop]) <= 8) {
             //invalid. Must be at least 9 bytes long.
             $this->debug($this->mFilteredExifData[$prop], __FUNCTION__, false);
             unset($this->mFilteredExifData[$prop]);
             return;
         }
         $charCode = substr($this->mFilteredExifData[$prop], 0, 8);
         $val = substr($this->mFilteredExifData[$prop], 8);
         switch ($charCode) {
             case "JIS":
                 //JIS
                 $charset = "Shift-JIS";
                 break;
             case "UNICODE":
                 $charset = "UTF-16" . $this->byteOrder;
                 break;
             default:
                 //ascii or undefined.
                 $charset = "";
                 break;
         }
         // This could possibly check to see if iconv is really installed
         // or if we're using the compatibility wrapper in globalFunctions.php
         if ($charset) {
             wfSuppressWarnings();
             $val = iconv($charset, 'UTF-8//IGNORE', $val);
             wfRestoreWarnings();
         } else {
             // if valid utf-8, assume that, otherwise assume windows-1252
             $valCopy = $val;
             UtfNormal::quickIsNFCVerify($valCopy);
             //validates $valCopy.
             if ($valCopy !== $val) {
                 wfSuppressWarnings();
                 $val = iconv('Windows-1252', 'UTF-8//IGNORE', $val);
                 wfRestoreWarnings();
             }
         }
         //trim and check to make sure not only whitespace.
         $val = trim($val);
         if (strlen($val) === 0) {
             //only whitespace.
             $this->debug($this->mFilteredExifData[$prop], __FUNCTION__, "{$prop}: Is only whitespace");
             unset($this->mFilteredExifData[$prop]);
             return;
         }
         //all's good.
         $this->mFilteredExifData[$prop] = $val;
     }
 }
开发者ID:mangowi,项目名称:mediawiki,代码行数:59,代码来源:Exif.php

示例4: cleanUp

 /**
  * The ultimate convenience function! Clean up invalid UTF-8 sequences,
  * and convert to normal form C, canonical composition.
  *
  * Fast return for pure ASCII strings; some lesser optimizations for
  * strings containing only known-good characters. Not as fast as toNFC().
  *
  * @param $string String: a UTF-8 string
  * @return string a clean, shiny, normalized UTF-8 string
  */
 static function cleanUp($string)
 {
     if (NORMALIZE_ICU) {
         $string = self::replaceForNativeNormalize($string);
         # UnicodeString constructor fails if the string ends with a
         # head byte. Add a junk char at the end, we'll strip it off.
         return rtrim(utf8_normalize($string . "", UNORM_NFC), "");
     } elseif (NORMALIZE_INTL) {
         $string = self::replaceForNativeNormalize($string);
         $norm = normalizer_normalize($string, Normalizer::FORM_C);
         if ($norm === null || $norm === false) {
             # normalizer_normalize will either return false or null
             # (depending on which doc you read) if invalid utf8 string.
             # quickIsNFCVerify cleans up invalid sequences.
             if (UtfNormal::quickIsNFCVerify($string)) {
                 # if that's true, the string is actually already normal.
                 return $string;
             } else {
                 # Now we are valid but non-normal
                 return normalizer_normalize($string, Normalizer::FORM_C);
             }
         } else {
             return $norm;
         }
     } elseif (UtfNormal::quickIsNFCVerify($string)) {
         # Side effect -- $string has had UTF-8 errors cleaned up.
         return $string;
     } else {
         return UtfNormal::NFC($string);
     }
 }
开发者ID:GodelDesign,项目名称:Godel,代码行数:41,代码来源:UtfNormal.php

示例5: segmentSplitter

 /** Function to extract metadata segments of interest from jpeg files
  * based on GIFMetadataExtractor.
  *
  * we can almost use getimagesize to do this
  * but gis doesn't support having multiple app1 segments
  * and those can't extract xmp on files containing both exif and xmp data
  *
  * @param string $filename Name of jpeg file
  * @return array Array of interesting segments.
  * @throws MWException If given invalid file.
  */
 static function segmentSplitter($filename)
 {
     $showXMP = XMPReader::isSupported();
     $segmentCount = 0;
     $segments = array('XMP_ext' => array(), 'COM' => array(), 'PSIR' => array());
     if (!$filename) {
         throw new MWException("No filename specified for " . __METHOD__);
     }
     if (!file_exists($filename) || is_dir($filename)) {
         throw new MWException("Invalid file {$filename} passed to " . __METHOD__);
     }
     $fh = fopen($filename, "rb");
     if (!$fh) {
         throw new MWException("Could not open file {$filename}");
     }
     $buffer = fread($fh, 2);
     if ($buffer !== "ÿØ") {
         throw new MWException("Not a jpeg, no SOI");
     }
     while (!feof($fh)) {
         $buffer = fread($fh, 1);
         $segmentCount++;
         if ($segmentCount > self::MAX_JPEG_SEGMENTS) {
             // this is just a sanity check
             throw new MWException('Too many jpeg segments. Aborting');
         }
         if ($buffer !== "ÿ") {
             throw new MWException("Error reading jpeg file marker. " . "Expected 0xFF but got " . bin2hex($buffer));
         }
         $buffer = fread($fh, 1);
         while ($buffer === "ÿ" && !feof($fh)) {
             // Skip through any 0xFF padding bytes.
             $buffer = fread($fh, 1);
         }
         if ($buffer === "þ") {
             // COM section -- file comment
             // First see if valid utf-8,
             // if not try to convert it to windows-1252.
             $com = $oldCom = trim(self::jpegExtractMarker($fh));
             UtfNormal::quickIsNFCVerify($com);
             // turns $com to valid utf-8.
             // thus if no change, its utf-8, otherwise its something else.
             if ($com !== $oldCom) {
                 wfSuppressWarnings();
                 $com = $oldCom = iconv('windows-1252', 'UTF-8//IGNORE', $oldCom);
                 wfRestoreWarnings();
             }
             // Try it again, if its still not a valid string, then probably
             // binary junk or some really weird encoding, so don't extract.
             UtfNormal::quickIsNFCVerify($com);
             if ($com === $oldCom) {
                 $segments["COM"][] = $oldCom;
             } else {
                 wfDebug(__METHOD__ . " Ignoring JPEG comment as is garbage.\n");
             }
         } elseif ($buffer === "á") {
             // APP1 section (Exif, XMP, and XMP extended)
             // only extract if XMP is enabled.
             $temp = self::jpegExtractMarker($fh);
             // check what type of app segment this is.
             if (substr($temp, 0, 29) === "http://ns.adobe.com/xap/1.0/" && $showXMP) {
                 $segments["XMP"] = substr($temp, 29);
             } elseif (substr($temp, 0, 35) === "http://ns.adobe.com/xmp/extension/" && $showXMP) {
                 $segments["XMP_ext"][] = substr($temp, 35);
             } elseif (substr($temp, 0, 29) === "XMP://ns.adobe.com/xap/1.0/" && $showXMP) {
                 // Some images (especially flickr images) seem to have this.
                 // I really have no idea what the deal is with them, but
                 // whatever...
                 $segments["XMP"] = substr($temp, 29);
                 wfDebug(__METHOD__ . ' Found XMP section with wrong app identifier ' . "Using anyways.\n");
             } elseif (substr($temp, 0, 6) === "Exif") {
                 // Just need to find out what the byte order is.
                 // because php's exif plugin sucks...
                 // This is a II for little Endian, MM for big. Not a unicode BOM.
                 $byteOrderMarker = substr($temp, 6, 2);
                 if ($byteOrderMarker === 'MM') {
                     $segments['byteOrder'] = 'BE';
                 } elseif ($byteOrderMarker === 'II') {
                     $segments['byteOrder'] = 'LE';
                 } else {
                     wfDebug(__METHOD__ . " Invalid byte ordering?!\n");
                 }
             }
         } elseif ($buffer === "í") {
             // APP13 - PSIR. IPTC and some photoshop stuff
             $temp = self::jpegExtractMarker($fh);
             if (substr($temp, 0, 14) === "Photoshop 3.0") {
                 $segments["PSIR"][] = $temp;
             }
//.........这里部分代码省略.........
开发者ID:rploaiza,项目名称:dbpedia-latinoamerica,代码行数:101,代码来源:JpegMetadataExtractor.php

示例6: getMetadata

 /**
  * @throws Exception
  * @param $filename string
  * @return array
  */
 static function getMetadata($filename)
 {
     self::$gif_frame_sep = pack("C", ord(","));
     self::$gif_extension_sep = pack("C", ord("!"));
     self::$gif_term = pack("C", ord(";"));
     $frameCount = 0;
     $duration = 0.0;
     $isLooped = false;
     $xmp = "";
     $comment = array();
     if (!$filename) {
         throw new Exception("No file name specified");
     } elseif (!file_exists($filename) || is_dir($filename)) {
         throw new Exception("File {$filename} does not exist");
     }
     $fh = fopen($filename, 'rb');
     if (!$fh) {
         throw new Exception("Unable to open file {$filename}");
     }
     // Check for the GIF header
     $buf = fread($fh, 6);
     if (!($buf == 'GIF87a' || $buf == 'GIF89a')) {
         throw new Exception("Not a valid GIF file; header: {$buf}");
     }
     // Skip over width and height.
     fread($fh, 4);
     // Read BPP
     $buf = fread($fh, 1);
     $bpp = self::decodeBPP($buf);
     // Skip over background and aspect ratio
     fread($fh, 2);
     // Skip over the GCT
     self::readGCT($fh, $bpp);
     while (!feof($fh)) {
         $buf = fread($fh, 1);
         if ($buf == self::$gif_frame_sep) {
             // Found a frame
             $frameCount++;
             ## Skip bounding box
             fread($fh, 8);
             ## Read BPP
             $buf = fread($fh, 1);
             $bpp = self::decodeBPP($buf);
             ## Read GCT
             self::readGCT($fh, $bpp);
             fread($fh, 1);
             self::skipBlock($fh);
         } elseif ($buf == self::$gif_extension_sep) {
             $buf = fread($fh, 1);
             if (strlen($buf) < 1) {
                 throw new Exception("Ran out of input");
             }
             $extension_code = unpack('C', $buf);
             $extension_code = $extension_code[1];
             if ($extension_code == 0xf9) {
                 // Graphics Control Extension.
                 fread($fh, 1);
                 // Block size
                 fread($fh, 1);
                 // Transparency, disposal method, user input
                 $buf = fread($fh, 2);
                 // Delay, in hundredths of seconds.
                 if (strlen($buf) < 2) {
                     throw new Exception("Ran out of input");
                 }
                 $delay = unpack('v', $buf);
                 $delay = $delay[1];
                 $duration += $delay * 0.01;
                 fread($fh, 1);
                 // Transparent colour index
                 $term = fread($fh, 1);
                 // Should be a terminator
                 if (strlen($term) < 1) {
                     throw new Exception("Ran out of input");
                 }
                 $term = unpack('C', $term);
                 $term = $term[1];
                 if ($term != 0) {
                     throw new Exception("Malformed Graphics Control Extension block");
                 }
             } elseif ($extension_code == 0xfe) {
                 // Comment block(s).
                 $data = self::readBlock($fh);
                 if ($data === "") {
                     throw new Exception('Read error, zero-length comment block');
                 }
                 // The standard says this should be ASCII, however its unclear if
                 // thats true in practise. Check to see if its valid utf-8, if so
                 // assume its that, otherwise assume its windows-1252 (iso-8859-1)
                 $dataCopy = $data;
                 // quickIsNFCVerify has the side effect of replacing any invalid characters
                 UtfNormal::quickIsNFCVerify($dataCopy);
                 if ($dataCopy !== $data) {
                     wfSuppressWarnings();
                     $data = iconv('windows-1252', 'UTF-8', $data);
//.........这里部分代码省略.........
开发者ID:nischayn22,项目名称:mediawiki-core,代码行数:101,代码来源:GIFMetadataExtractor.php


注:本文中的UtfNormal::quickIsNFCVerify方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。