本文整理汇总了PHP中utf8_stripspecials函数的典型用法代码示例。如果您正苦于以下问题:PHP utf8_stripspecials函数的具体用法?PHP utf8_stripspecials怎么用?PHP utf8_stripspecials使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了utf8_stripspecials函数的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: entities_to_7bit
function entities_to_7bit($str)
{
require_once LEPTON_PATH . '/framework/summary.utf8.php';
// convert to UTF-8
$str = charset_to_utf8($str);
if (!utf8_check($str)) {
return $str;
}
// replace some specials
$str = utf8_stripspecials($str, '_');
// translate non-ASCII characters to ASCII
$str = utf8_romanize($str);
// missed some? - Many UTF-8-chars can't be romanized
// convert to HTML-entities, and replace entites by hex-numbers
$str = utf8_fast_umlauts_to_entities($str, false);
$str = str_replace(''', ''', $str);
// $str = preg_replace_callback('/&#([0-9]+);/', function($matches) {return "dechex($matches[1])";}, $str);
// $str = preg_replace_callback('/&#([0-9]+);/', function($matches) {return dechex($matches[1]);}, $str);
if (version_compare(PHP_VERSION, '5.3', '<')) {
$str = preg_replace('/&#([0-9]+);/e', "dechex('\$1')", $str);
} else {
$str = preg_replace_callback('/&#([0-9]+);/', create_function('$aMatches', 'return dechex($aMatches[1]);'), $str);
}
// maybe there are some > < ' " & left, replace them too
$str = str_replace(array('>', '<', ''', '\'', '"', '&'), '', $str);
$str = str_replace('&', '', $str);
return $str;
}
示例2: test1
function test1()
{
// we test multiple cases here - format: string, repl, additional, test
$tests = array();
$tests[] = array('asciistring', '', '', 'asciistring');
$tests[] = array('asciistring', '', '\\._\\-:', 'asciistring');
$tests[] = array('ascii.string', '', '\\._\\-:', 'asciistring');
$tests[] = array('ascii.string', ' ', '\\._\\-:', 'ascii string');
$tests[] = array('2.1.14', ' ', '\\._\\-:', '2 1 14');
$tests[] = array('ascii.string', '', '\\._\\-:\\*', 'asciistring');
$tests[] = array('ascii.string', ' ', '\\._\\-:\\*', 'ascii string');
$tests[] = array('2.1.14', ' ', '\\._\\-:\\*', '2 1 14');
foreach ($tests as $test) {
$this->assertEqual(utf8_stripspecials($test[0], $test[1], $test[2]), $test[3]);
}
}
示例3: cleanID
function cleanID($raw_id)
{
$sepchar = "_";
$sepcharpat = '#\\' . $sepchar . '+#';
$id = trim((string) $raw_id);
$id = utf8_strtolower($id);
//alternative namespace seperator
$id = strtr($id, ';', ':');
$id = strtr($id, '/', $sepchar);
$id = utf8_romanize($id);
$id = utf8_deaccent($id, -1);
//remove specials
$id = utf8_stripspecials($id, $sepchar, '\\*');
$id = utf8_strip($id);
$id = preg_replace($sepcharpat, $sepchar, $id);
$id = preg_replace('#:+#', ':', $id);
$id = preg_replace('#:[:\\._\\-]+#', ':', $id);
return $id;
}
示例4: code
/**
* Send the wanted code block to the browser
*
* When the correct block was found it exits the script.
*/
function code($text, $language = NULL, $filename = '')
{
global $INPUT;
if (!$language) {
$language = 'txt';
}
if (!$filename) {
$filename = 'snippet.' . $language;
}
$filename = utf8_basename($filename);
$filename = utf8_stripspecials($filename, '_');
if ($this->_codeblock == $INPUT->str('codeblock')) {
header("Content-Type: text/plain; charset=utf-8");
header("Content-Disposition: attachment; filename={$filename}");
header("X-Robots-Tag: noindex");
echo trim($text, "\r\n");
exit;
}
$this->_codeblock++;
}
示例5: code
/**
* Send the wanted code block to the browser
*
* When the correct block was found it exits the script.
*/
function code($text, $language = null, $filename = '')
{
global $INPUT;
if (!$language) {
$language = 'txt';
}
if (!$filename) {
$filename = 'snippet.' . $language;
}
$filename = utf8_basename($filename);
$filename = utf8_stripspecials($filename, '_');
// send CRLF to Windows clients
if (strpos($INPUT->server->str('HTTP_USER_AGENT'), 'Windows') !== false) {
$text = str_replace("\n", "\r\n", $text);
}
if ($this->_codeblock == $INPUT->str('codeblock')) {
header("Content-Type: text/plain; charset=utf-8");
header("Content-Disposition: attachment; filename={$filename}");
header("X-Robots-Tag: noindex");
echo trim($text, "\r\n");
exit;
}
$this->_codeblock++;
}
示例6: idx_tokenizer
/**
* Tokenizes a string into an array of search words
*
* Uses the same algorithm as idx_getPageWords()
*
* @param string $string the query as given by the user
* @param arrayref $stopwords array of stopwords
* @param boolean $wc are wildcards allowed?
*/
function idx_tokenizer($string, &$stopwords, $wc = false)
{
$words = array();
$wc = $wc ? '' : ($wc = '\\*');
if (preg_match('/[^0-9A-Za-z]/u', $string)) {
// handle asian chars as single words (may fail on older PHP version)
$asia = @preg_replace('/(' . IDX_ASIAN . ')/u', ' \\1 ', $string);
if (!is_null($asia)) {
$string = $asia;
}
//recover from regexp failure
$arr = explode(' ', utf8_stripspecials($string, ' ', '\\._\\-:' . $wc));
foreach ($arr as $w) {
if (!is_numeric($w) && strlen($w) < IDX_MINWORDLENGTH) {
continue;
}
$w = utf8_strtolower($w);
if ($stopwords && is_int(array_search("{$w}\n", $stopwords))) {
continue;
}
$words[] = $w;
}
} else {
$w = $string;
if (!is_numeric($w) && strlen($w) < IDX_MINWORDLENGTH) {
return $words;
}
$w = strtolower($w);
if (is_int(array_search("{$w}\n", $stopwords))) {
return $words;
}
$words[] = $w;
}
return $words;
}
示例7: tokenizer
/**
* Split the text into words for fulltext search
*
* TODO: does this also need &$stopwords ?
*
* @triggers INDEXER_TEXT_PREPARE
* This event allows plugins to modify the text before it gets tokenized.
* Plugins intercepting this event should also intercept INDEX_VERSION_GET
*
* @param string $text plain text
* @param boolean $wc are wildcards allowed?
* @return array list of words in the text
* @author Tom N Harris <tnharris@whoopdedo.org>
* @author Andreas Gohr <andi@splitbrain.org>
*/
public function tokenizer($text, $wc = false)
{
$wc = $wc ? '' : '\\*';
$stopwords =& idx_get_stopwords();
// prepare the text to be tokenized
$evt = new Doku_Event('INDEXER_TEXT_PREPARE', $text);
if ($evt->advise_before(true)) {
if (preg_match('/[^0-9A-Za-z ]/u', $text)) {
// handle asian chars as single words (may fail on older PHP version)
$asia = @preg_replace('/(' . IDX_ASIAN . ')/u', ' \\1 ', $text);
if (!is_null($asia)) {
$text = $asia;
}
// recover from regexp falure
}
}
$evt->advise_after();
unset($evt);
$text = strtr($text, array("\r" => ' ', "\n" => ' ', "\t" => ' ', "" => ''));
if (preg_match('/[^0-9A-Za-z ]/u', $text)) {
$text = utf8_stripspecials($text, ' ', '\\._\\-:' . $wc);
}
$wordlist = explode(' ', $text);
foreach ($wordlist as $i => $word) {
$wordlist[$i] = preg_match('/[^0-9A-Za-z]/u', $word) ? utf8_strtolower($word) : strtolower($word);
}
foreach ($wordlist as $i => $word) {
if (!is_numeric($word) && strlen($word) < IDX_MINWORDLENGTH || array_search($word, $stopwords, true) !== false) {
unset($wordlist[$i]);
}
}
return array_values($wordlist);
}
示例8: cleanID
/**
* Remove unwanted chars from ID
*
* Cleans a given ID to only use allowed characters. Accented characters are
* converted to unaccented ones
*
* @author Andreas Gohr <andi@splitbrain.org>
* @param string $raw_id The pageid to clean
* @param boolean $ascii Force ASCII
* @return string cleaned id
*/
function cleanID($raw_id, $ascii = false)
{
global $conf;
static $sepcharpat = null;
global $cache_cleanid;
$cache =& $cache_cleanid;
if ($conf['syslog']) {
syslog(LOG_WARNING, '[pageutils.php] cleanID: raw_id: ' . $raw_id);
}
// check if it's already in the memory cache
if (isset($cache[(string) $raw_id])) {
return $cache[(string) $raw_id];
}
$sepchar = $conf['sepchar'];
if ($sepcharpat == null) {
// build string only once to save clock cycles
$sepcharpat = '#\\' . $sepchar . '+#';
}
$id = trim((string) $raw_id);
if ($conf['mixedcase'] == 0) {
$id = utf8_strtolower($id);
}
//alternative namespace seperator
if ($conf['useslash']) {
$id = strtr($id, ';/', '::');
} else {
$id = strtr($id, ';/', ':' . $sepchar);
}
if ($conf['deaccent'] == 2 || $ascii) {
$id = utf8_romanize($id);
}
if ($conf['deaccent'] || $ascii) {
$id = utf8_deaccent($id, -1);
}
//remove specials if specialcharacters is set to 0
if ($conf['specialcharacters'] == 0) {
$id = utf8_stripspecials($id, $sepchar, '\\*');
}
if ($ascii) {
$id = utf8_strip($id);
}
//clean up
$id = preg_replace($sepcharpat, $sepchar, $id);
$id = preg_replace('#:+#', ':', $id);
$id = trim($id, ':._-');
$id = preg_replace('#:[:\\._\\-]+#', ':', $id);
$id = preg_replace('#[:\\._\\-]+:#', ':', $id);
$cache[(string) $raw_id] = $id;
if ($conf['syslog']) {
syslog(LOG_WARNING, '[pageutils.php] cleanID: id to be returned: ' . $id);
}
return $id;
}
示例9: cleanID
/**
* Remove unwanted chars from ID
*
* Cleans a given ID to only use allowed characters. Accented characters are
* converted to unaccented ones
*
* @author Andreas Gohr <andi@splitbrain.org>
* @param string $raw_id The pageid to clean
* @param boolean $ascii Force ASCII
* @param boolean $media Allow leading or trailing _ for media files
*/
function cleanID($raw_id, $ascii = false, $media = false)
{
global $conf;
static $sepcharpat = null;
global $cache_cleanid;
$cache =& $cache_cleanid;
// check if it's already in the memory cache
if (isset($cache[(string) $raw_id])) {
return $cache[(string) $raw_id];
}
$sepchar = $conf['sepchar'];
if ($sepcharpat == null) {
// build string only once to save clock cycles
$sepcharpat = '#\\' . $sepchar . '+#';
}
$id = trim((string) $raw_id);
$id = utf8_strtolower($id);
//alternative namespace seperator
$id = strtr($id, ';', ':');
if ($conf['useslash']) {
$id = strtr($id, '/', ':');
} else {
$id = strtr($id, '/', $sepchar);
}
if ($conf['deaccent'] == 2 || $ascii) {
$id = utf8_romanize($id);
}
if ($conf['deaccent'] || $ascii) {
$id = utf8_deaccent($id, -1);
}
//remove specials
$id = utf8_stripspecials($id, $sepchar, '\\*');
if ($ascii) {
$id = utf8_strip($id);
}
//clean up
$id = preg_replace($sepcharpat, $sepchar, $id);
$id = preg_replace('#:+#', ':', $id);
$id = $media ? trim($id, ':.-') : trim($id, ':._-');
$id = preg_replace('#:[:\\._\\-]+#', ':', $id);
$cache[(string) $raw_id] = $id;
return $id;
}
示例10: log_externalsearch
/**
* Log external search queries
*
* Will not write anything if the referer isn't a search engine
*/
public function log_externalsearch($referer, &$type)
{
$referer = utf8_strtolower($referer);
include dirname(__FILE__) . '/searchengines.php';
/** @var array $SEARCHENGINES */
$query = '';
$name = '';
// parse the referer
$urlparts = parse_url($referer);
$domain = $urlparts['host'];
$qpart = $urlparts['query'];
if (!$qpart) {
$qpart = $urlparts['fragment'];
}
//google does this
$params = array();
parse_str($qpart, $params);
// check domain against common search engines
foreach ($SEARCHENGINES as $regex => $info) {
if (preg_match('/' . $regex . '/', $domain)) {
$type = 'search';
$name = array_shift($info);
// check the known parameters for content
foreach ($info as $k) {
if (empty($params[$k])) {
continue;
}
$query = $params[$k];
break;
}
break;
}
}
// try some generic search engin parameters
if ($type != 'search') {
foreach (array('search', 'query', 'q', 'keywords', 'keyword') as $k) {
if (empty($params[$k])) {
continue;
}
$query = $params[$k];
// we seem to have found some generic search, generate name from domain
$name = preg_replace('/(\\.co)?\\.([a-z]{2,5})$/', '', $domain);
//strip tld
$name = explode('.', $name);
$name = array_pop($name);
$type = 'search';
break;
}
}
// still no hit? return
if ($type != 'search') {
return;
}
// clean the query
$query = preg_replace('/^(cache|related):[^\\+]+/', '', $query);
// non-search queries
$query = preg_replace('/ +/', ' ', $query);
// ws compact
$query = trim($query);
if (!utf8_check($query)) {
$query = utf8_encode($query);
}
// assume latin1 if not utf8
// no query? no log
if (!$query) {
return;
}
// log it!
$words = explode(' ', utf8_stripspecials($query, ' ', '\\._\\-:\\*'));
$this->log_search($_REQUEST['p'], $query, $words, $name);
}
示例11: utf8_stripspecials
function utf8_stripspecials($string, $repl = '', $keep = '')
{
return utf8_stripspecials($string, $repl, $keep);
}
示例12: entities_to_7bit
function entities_to_7bit($str)
{
// convert to UTF-8
$str = charset_to_utf8($str);
if (!utf8_check($str)) {
return $str;
}
// replace some specials
$str = utf8_stripspecials($str, '_');
// translate non-ASCII characters to ASCII
$str = utf8_romanize($str);
// missed some? - Many UTF-8-chars can't be romanized
// convert to HTML-entities, and replace entites by hex-numbers
$str = utf8_fast_umlauts_to_entities($str, false);
$str = str_replace(''', ''', $str);
$str = preg_replace('/&#([0-9]+);/e', "dechex('\$1')", $str);
// maybe there are some > < ' " & left, replace them too
$str = str_replace(array('>', '<', ''', '\'', '"', '&'), '', $str);
$str = str_replace('&', '', $str);
return $str;
}