本文整理汇总了PHP中Zend_Search_Lucene_Analysis_Token类的典型用法代码示例。如果您正苦于以下问题:PHP Zend_Search_Lucene_Analysis_Token类的具体用法?PHP Zend_Search_Lucene_Analysis_Token怎么用?PHP Zend_Search_Lucene_Analysis_Token使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Zend_Search_Lucene_Analysis_Token类的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: normalize
/**
* @see Zend_Search_Lucene_Analysis_TokenFilter
*/
public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken)
{
$text = $this->stemmer->doStem($srcToken->getTermText());
$newToken = new Zend_Search_Lucene_Analysis_Token($text, $srcToken->getStartOffset(), $srcToken->getEndOffset());
$newToken->setPositionIncrement($srcToken->getPositionIncrement());
return $newToken;
}
示例2: normalize
/**
* Normalize Token or remove it (if null is returned)
*
* @param Zend_Search_Lucene_Analysis_Token $srcToken
* @return Zend_Search_Lucene_Analysis_Token
*/
public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken) {
if (array_key_exists($srcToken->getTermText(), $this->_stopSet)) {
return null;
} else {
return $srcToken;
}
}
示例3: normalize
/**
* Normalize Token or remove it (if null is returned)
*
* @param Zend_Search_Lucene_Analysis_Token $srcToken
* @return Zend_Search_Lucene_Analysis_Token
*/
public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken) {
if (strlen($srcToken->getTermText()) < $this->length) {
return null;
} else {
return $srcToken;
}
}
示例4: normalize
/**
* If not numeric, calls the parent method.
*
* @param Zend_Search_Lucene_Analysis_Token $srcToken
* @return Zend_Search_Lucene_Analysis_Token
*/
public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken)
{
if (!ctype_digit($srcToken->getTermText())) {
return parent::normalize($srcToken);
} else {
return $srcToken;
}
}
示例5: normalize
/**
* Normalize Token or remove it (if null is returned).
*
* @param Zend_Search_Lucene_Analysis_Token $srcToken
* @return Zend_Search_Lucene_Analysis_Token
*/
public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken)
{
// gets token text, invokes hook_search_preprocess().
$processed_text = $srcToken->getTermText();
search_invoke_preprocess($processed_text);
// returns the new processed token
$newToken = new Zend_Search_Lucene_Analysis_Token($processed_text, $srcToken->getStartOffset(), $srcToken->getEndOffset());
$newToken->setPositionIncrement($srcToken->getPositionIncrement());
return $newToken;
}
示例6: normalize
/**
* Normalize Token or remove it (if null is returned)
*
* @param Zend_Search_Lucene_Analysis_Token $srcToken
* @return Zend_Search_Lucene_Analysis_Token
*/
public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken)
{
$newToken = new Zend_Search_Lucene_Analysis_Token(
strtolower( $srcToken->getTermText() ),
$srcToken->getStartOffset(),
$srcToken->getEndOffset());
$newToken->setPositionIncrement($srcToken->getPositionIncrement());
return $newToken;
}
示例7: normalize
/**
* Normalize Token or remove it (if null is returned)
*
* @param Zend_Search_Lucene_Analysis_Token $srcToken
* @return Zend_Search_Lucene_Analysis_Token
*/
public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken)
{
if ($this->mbString) {
$value = mb_strtolower($srcToken->getTermText(), 'utf8');
} else {
$value = strtolower($srcToken->getTermText());
}
$newToken = new Zend_Search_Lucene_Analysis_Token($value, $srcToken->getStartOffset(), $srcToken->getEndOffset());
$newToken->setPositionIncrement($srcToken->getPositionIncrement());
return $newToken;
}
示例8: normalize
/**
* Normalize Token or remove it (if null is returned)
*
* @param Zend_Search_Lucene_Analysis_Token $srcToken
* @return Zend_Search_Lucene_Analysis_Token
*/
public function normalize(Zend_Search_Lucene_Analysis_Token $po_srctoken)
{
$vo_lang_analyzer = new LanguageDetection();
$vs_original_string = $po_srctoken->getTermText();
$vs_lang_code = $vo_lang_analyzer->analyze($vs_original_string);
/* stem text with respect to language that has been detected */
$vo_stemmer = new SnoballStemmer();
if ($vs_lang_code) {
$vs_stemmed_string = $vo_stemmer->stem($vs_original_string, $vs_lang_code);
} else {
/* if language could not be detected, don't do any stemming at all */
$vs_stemmed_string = $vs_original_string;
}
/* build new token to return */
$vo_new_token = new Zend_Search_Lucene_Analysis_Token($vs_stemmed_string, $po_srctoken->getStartOffset(), $po_srctoken->getEndOffset());
$vo_new_token->setPositionIncrement($po_srctoken->getPositionIncrement());
return $vo_new_token;
}
示例9: normalize
/**
* Normalize Token or remove it (if null is returned)
*
* @param Zend_Search_Lucene_Analysis_Token $srcToken
* @return Zend_Search_Lucene_Analysis_Token
*/
public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken)
{
$srcToken->setTermText(mb_strtolower($srcToken->getTermText(), 'UTF-8'));
return $srcToken;
}
示例10: normalize
/**
* Normalize Token or remove it (if null is returned)
*
* @param Zend_Search_Lucene_Analysis_Token $srcToken
* @return Zend_Search_Lucene_Analysis_Token
*/
public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken)
{
//iconv("utf-8", "us-ascii//TRANSLIT", $url); // TRANSLIT does the whole job
// We could use also remove_accents() in uri.php
// Problem: ñ -> n
//$token = strtolower(iconv("utf-8", "us-ascii//TRANSLIT", $srcToken->getTermText()));
$token = strtolower($srcToken->getTermText());
if (strlen($token) < 2 || array_key_exists($token, $this->_stopSet)) {
return null;
}
$newToken = new Zend_Search_Lucene_Analysis_Token($token, $srcToken->getStartOffset(), $srcToken->getEndOffset());
$newToken->setPositionIncrement($srcToken->getPositionIncrement());
return $newToken;
}
示例11: dirname
<?php
/**
* This file is part of the sfLucene package.
* (c) Carl Vondrick <carl.vondrick@symfony-project.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
require dirname(__FILE__) . '/../../bootstrap/unit.php';
require 'util/xfLuceneZendManager.class.php';
require 'stemmer/xfLuceneStemmerTokenFilter.class.php';
require 'stemmer/xfLuceneStemmer.interface.php';
require 'stemmer/xfLuceneStemmerPorter.class.php';
require 'vendor/PorterStemmer/PorterStemmer.class.php';
$t = new lime_test(2, new lime_output_color());
$s = new xfLuceneStemmerPorter();
$filter = new xfLuceneStemmerTokenFilter($s);
$token = new Zend_Search_Lucene_Analysis_Token('nationalize', 10, 21);
$token->setPositionIncrement(0);
$response = $filter->normalize($token);
$t->isa_ok($response, 'Zend_Search_Lucene_Analysis_Token', '->normalize() returns a Zend_Search_Lucene_Analysis_Token');
$t->is($response->getTermText(), 'nation', '->normalize() consults the stemmer');
示例12: nextToken
/**
* Tokenization stream API
* Get next token
* Returns null at the end of stream
*
* @return Zend_Search_Lucene_Analysis_Token|null
*/
public function nextToken()
{
// есть ли нам откуда брать данные?
if (!$this->num_chunks) {
return null;
}
// сначала отдаём уже имеющиеся токены
if (sizeof($this->token_stack)) {
return array_pop($this->token_stack);
}
while ($this->num_chunks > $this->current_chunk) {
$word = $this->input_chunks[$this->current_chunk + 1];
// специальный случай: идентификаторы сайтов и разделов в виде sub123, site5
if ($this->ignore_numbers && ($word == 'site' || $word == 'sub') && preg_match("/^(\\d+)/", $this->input_chunks[$this->current_chunk + 2], $matches)) {
$word .= $matches[1];
}
$word_length = mb_strlen($word, 'UTF-8');
$delimiter_length = mb_strlen($this->input_chunks[$this->current_chunk], 'UTF-8');
$start_position = $this->current_chunk == 1 ? 0 : $this->char_position + $delimiter_length + 1;
$end_position = $start_position + $word_length;
// готовимся к следующему циклу
$this->char_position = $end_position;
$this->current_chunk += 2;
if (!$word_length) {
continue;
}
// на входе была строка без значащих символов?
// применяем фильтры
$processed = $this->apply_nc_filters($word);
$count = sizeof($processed);
if ($count > 0) {
for ($i = 1; $i < $count; $i++) {
// i.e. if $count > 1
$token = new Zend_Search_Lucene_Analysis_Token($processed[$i], $start_position, $end_position);
// умная книга Lucene in Action советует установить $token->setPositionIncrement(0),
// но, по-моему, разницы нет (в исходниках ZSL отмечено "todo: Process
// $token->getPositionIncrement()" - может быть, в будущем заработает)
$token->setPositionIncrement(0);
$this->token_stack[] = $token;
}
return new Zend_Search_Lucene_Analysis_Token($processed[0], $start_position, $end_position);
}
}
return null;
}