本文整理汇总了PHP中Zend_Search_Lucene_Analysis_Analyzer::getDefault方法的典型用法代码示例。如果您正苦于以下问题:PHP Zend_Search_Lucene_Analysis_Analyzer::getDefault方法的具体用法?PHP Zend_Search_Lucene_Analysis_Analyzer::getDefault怎么用?PHP Zend_Search_Lucene_Analysis_Analyzer::getDefault使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Zend_Search_Lucene_Analysis_Analyzer
的用法示例。
在下文中一共展示了Zend_Search_Lucene_Analysis_Analyzer::getDefault方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: getQuery
public function getQuery($encoding)
{
if (strpos($this->_phrase, '?') !== false || strpos($this->_phrase, '*') !== false) {
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('Wildcards are only allowed in a single terms.');
}
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_phrase, $encoding);
if (count($tokens) == 0) {
return new Zend_Search_Lucene_Search_Query_Insignificant();
}
if (count($tokens) == 1) {
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
$query = new Zend_Search_Lucene_Search_Query_Term($term);
$query->setBoost($this->_boost);
return $query;
}
//It's not empty or one term query
$position = -1;
$query = new Zend_Search_Lucene_Search_Query_Phrase();
foreach ($tokens as $token) {
$position += $token->getPositionIncrement();
$term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field);
$query->addTerm($term, $position);
}
if ($this->_proximityQuery) {
$query->setSlop($this->_wordsDistance);
}
$query->setBoost($this->_boost);
return $query;
}
示例2: addDocument
/**
* Adds a document to this segment.
*
* @param Zend_Search_Lucene_Document $document
* @throws Zend_Search_Lucene_Exception
*/
public function addDocument(Zend_Search_Lucene_Document $document)
{
$storedFields = array();
$docNorms = array();
$similarity = Zend_Search_Lucene_Search_Similarity::getDefault();
foreach ($document->getFieldNames() as $fieldName) {
$field = $document->getField($fieldName);
$this->addField($field);
if ($field->storeTermVector) {
/**
* @todo term vector storing support
*/
throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.');
}
if ($field->isIndexed) {
if ($field->isTokenized) {
$tokenList = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($field->stringValue);
} else {
$tokenList = array();
$tokenList[] = new Zend_Search_Lucene_Analysis_Token($field->stringValue, 0, strlen($field->stringValue));
}
$docNorms[$field->name] = chr($similarity->encodeNorm($similarity->lengthNorm($field->name, count($tokenList))));
$position = 0;
foreach ($tokenList as $token) {
$term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name);
$termKey = $term->key();
if (!isset($this->_termDictionary[$termKey])) {
// New term
$this->_termDictionary[$termKey] = $term;
$this->_termDocs[$termKey] = array();
$this->_termDocs[$termKey][$this->_docCount] = array();
} else {
if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
// Existing term, but new term entry
$this->_termDocs[$termKey][$this->_docCount] = array();
}
}
$position += $token->getPositionIncrement();
$this->_termDocs[$termKey][$this->_docCount][] = $position;
}
}
if ($field->isStored) {
$storedFields[] = $field;
}
}
foreach ($this->_fields as $fieldName => $field) {
if (!$field->isIndexed) {
continue;
}
if (!isset($this->_norms[$fieldName])) {
$this->_norms[$fieldName] = str_repeat(chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, 0))), $this->_docCount);
}
if (isset($docNorms[$fieldName])) {
$this->_norms[$fieldName] .= $docNorms[$fieldName];
} else {
$this->_norms[$fieldName] .= chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, 0)));
}
}
$this->addStoredFields($storedFields);
}
示例3: testAnalyzer
public function testAnalyzer()
{
$currentAnalyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
$this->assertTrue($currentAnalyzer instanceof Zend_Search_Lucene_Analysis_Analyzer);
$newAnalyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num();
Zend_Search_Lucene_Analysis_Analyzer::setDefault($newAnalyzer);
$this->assertTrue(Zend_Search_Lucene_Analysis_Analyzer::getDefault() === $newAnalyzer);
// Set analyzer to the default value (used in other tests)
Zend_Search_Lucene_Analysis_Analyzer::setDefault($currentAnalyzer);
}
示例4: analyze
public function analyze($text)
{
$result = parent::analyze($text);
sfOpenPNEApplicationConfiguration::registerZend();
Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8());
$analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
$analyzer->setInput($text, 'UTF-8');
while (($nextToken = $analyzer->nextToken()) !== null) {
$result[] = $nextToken->getTermText();
}
return $result;
}
示例5: prepareZendSearchLucene
private static function prepareZendSearchLucene()
{
Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive());
$stopWords = sfConfig::get('app_sf_propel_luceneable_behavior_stopWords', false);
$stopWordsFilter = new Zend_Search_Lucene_Analysis_TokenFilter_StopWords(false === $stopWords ? array() : explode(',', $stopWords));
Zend_Search_Lucene_Analysis_Analyzer::getDefault()->addFilter($stopWordsFilter);
$shortWords = sfConfig::get('app_sf_propel_luceneable_behavior_shortWords', 3);
$shortWordsFilter = new Zend_Search_Lucene_Analysis_TokenFilter_ShortWords($shortWords);
Zend_Search_Lucene_Analysis_Analyzer::getDefault()->addFilter($shortWordsFilter);
Zend_Search_Lucene_Storage_Directory_Filesystem::setDefaultFilePermissions(0777);
}
示例6: testFilteredTokensQueryParserProcessing
public function testFilteredTokensQueryParserProcessing()
{
$index = Zend_Search_Lucene::open(dirname(__FILE__) . '/_index23Sample/_files');
$this->assertEquals(count(Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize('123456787654321')), 0);
$hits = $index->find('"PEAR developers" AND Home AND 123456787654321');
$this->assertEquals(count($hits), 1);
$expectedResultset = array(array(1, 0.16827, 'IndexSource/contributing.wishlist.html'));
foreach ($hits as $resId => $hit) {
$this->assertEquals($hit->id, $expectedResultset[$resId][0]);
$this->assertTrue(abs($hit->score - $expectedResultset[$resId][1]) < 1.0E-6);
$this->assertEquals($hit->path, $expectedResultset[$resId][2]);
}
}
示例7: highlightExtended
/**
* Highlight text using specified View helper or callback function.
*
* @param string|array $words Words to highlight. Words could be organized using the array or string.
* @param callback $callback Callback method, used to transform (highlighting) text.
* @param array $params Array of additionall callback parameters passed through into it
* (first non-optional parameter is an HTML fragment for highlighting)
* @return string
* @throws Zend_Search_Lucene_Exception
*/
public function highlightExtended($words, $callback, $params = array())
{
/** Zend_Search_Lucene_Analysis_Analyzer */
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
if (!is_array($words)) {
$words = array($words);
}
$wordsToHighlightList = array();
$analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
foreach ($words as $wordString) {
$wordsToHighlightList[] = $analyzer->tokenize($wordString);
}
$wordsToHighlight = call_user_func_array('array_merge', $wordsToHighlightList);
if (count($wordsToHighlight) == 0) {
return $this->_doc->saveHTML();
}
$wordsToHighlightFlipped = array();
foreach ($wordsToHighlight as $id => $token) {
$wordsToHighlightFlipped[$token->getTermText()] = $id;
}
if (!is_callable($callback)) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('$viewHelper parameter must be a View Helper name, View Helper object or callback.');
}
$xpath = new DOMXPath($this->_doc);
$matchedNodes = $xpath->query("/html/body");
foreach ($matchedNodes as $matchedNode) {
$this->_highlightNodeRecursive($matchedNode, $wordsToHighlightFlipped, $callback, $params);
}
}
示例8: highlightMatchesDOM
/**
* Highlight query terms
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
*/
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
{
/** @todo implementation */
$words = array();
$matchExpression = '/^' . str_replace(array('\\?', '\\*'), array('.', '.*'), preg_quote($this->_pattern->text, '/')) . '$/';
if (@preg_match('/\\pL/u', 'a') == 1) {
// PCRE unicode support is turned on
// add Unicode modifier to the match expression
$matchExpression .= 'u';
}
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($doc->getFieldUtf8Value('body'), 'UTF-8');
foreach ($tokens as $token) {
if (preg_match($matchExpression, $token->getTermText()) === 1) {
$words[] = $token->getTermText();
}
}
$doc->highlight($words, $this->_getHighlightColor($colorIndex));
}
示例9: _highlightMatches
/**
* Query specific matches highlighting
*
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
*/
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
{
/** Skip fields detection. We don't need it, since we expect all fields presented in the HTML body and don't differentiate them */
/** Skip exact term matching recognition, keyword fields highlighting is not supported */
/** Skip wildcard queries recognition. Supported wildcards are removed by text analyzer */
// tokenize phrase using current analyzer and process it as a phrase query
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_phrase, $this->_phraseEncoding);
if (count($tokens) == 0) {
// Do nothing
return;
}
if (count($tokens) == 1) {
$highlighter->highlight($tokens[0]->getTermText());
return;
}
//It's non-trivial phrase query
$words = array();
foreach ($tokens as $token) {
$words[] = $token->getTermText();
}
$highlighter->highlight($words);
}
示例10: _highlightMatches
/**
* Query specific matches highlighting
*
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
*/
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
{
/** Skip fields detection. We don't need it, since we expect all fields presented in the HTML body and don't differentiate them */
/** Skip exact term matching recognition, keyword fields highlighting is not supported */
// -------------------------------------
// Recognize wildcard queries
/** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */
if (@preg_match('/\\pL/u', 'a') == 1) {
$subPatterns = preg_split('/[*?]/u', iconv($this->_encoding, 'UTF-8', $this->_word));
} else {
$subPatterns = preg_split('/[*?]/', $this->_word);
}
if (count($subPatterns) > 1) {
// Do nothing
return;
}
// -------------------------------------
// Recognize one-term multi-term and "insignificant" queries
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_word, $this->_encoding);
if (count($tokens) == 0) {
// Do nothing
return;
}
if (count($tokens) == 1) {
require_once 'Zend/Search/Lucene/Index/Term.php';
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
require_once 'Zend/Search/Lucene/Search/Query/Fuzzy.php';
$query = new Zend_Search_Lucene_Search_Query_Fuzzy($term, $this->_minimumSimilarity);
$query->_highlightMatches($highlighter);
return;
}
// Word is tokenized into several tokens
// But fuzzy search is supported only for non-multiple word terms
// Do nothing
}
示例11: foreach
$t->isa_ok($indexer, 'sfLuceneIndexerFactory', '->getIndexer() returns an instance of sfLuceneIndexerFactory');
$t->diag('testing ->getContext()');
$t->isa_ok($lucene->getContext(), 'sfContext', '->getContext() returns an instance of sfContext');
$t->is($lucene->getContext(), sfContext::getInstance(), '->getContext() returns the same context');
$t->diag('testing ->configure()');
$lucene->configure();
$t->is(Zend_Search_Lucene_Search_QueryParser::getDefaultEncoding(), 'UTF-8', '->configure() configures the query parsers encoding');
foreach (array('Text', 'TextNum', 'Utf8', 'Utf8Num') as $type) {
$lucene->setParameter('analyzer', $type);
$lucene->configure();
$class = 'Zend_Search_Lucene_Analysis_Analyzer_Common_' . $type;
$expected = new $class();
$expected->addFilter(new sfLuceneLowerCaseFilter(true));
$expected->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_StopWords(array('and', 'the')));
$expected->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_ShortWords(2));
$actual = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
$t->ok($actual == $expected, '->configure() configures the analyzer for ' . $type);
}
$lucene->setParameter('analyzer', 'foobar');
try {
$lucene->configure();
$t->fail('->configure() analyzer must be of text, textnum, utf8, or utf8num');
} catch (Exception $e) {
$t->pass('->configure() analyzer must be of text, textnum, utf8, or utf8num');
}
$lucene->setParameter('analyzer', 'utf8num');
$t->diag('testing ->find()');
class MockLucene
{
public $args;
public $scoring;
示例12: _highlightMatches
/**
* Query specific matches highlighting
*
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
*/
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
{
/** Skip fields detection. We don't need it, since we expect all fields presented in the HTML body and don't differentiate them */
/** Skip exact term matching recognition, keyword fields highlighting is not supported */
// -------------------------------------
// Recognize wildcard queries
/** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */
if (@preg_match('/\\pL/u', 'a') == 1) {
$word = iconv($this->_encoding, 'UTF-8', $this->_word);
$wildcardsPattern = '/[*?]/u';
$subPatternsEncoding = 'UTF-8';
} else {
$word = $this->_word;
$wildcardsPattern = '/[*?]/';
$subPatternsEncoding = $this->_encoding;
}
$subPatterns = preg_split($wildcardsPattern, $word, -1, PREG_SPLIT_OFFSET_CAPTURE);
if (count($subPatterns) > 1) {
// Wildcard query is recognized
$pattern = '';
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
foreach ($subPatterns as $id => $subPattern) {
// Append corresponding wildcard character to the pattern before each sub-pattern (except first)
if ($id != 0) {
$pattern .= $word[$subPattern[1] - 1];
}
// Check if each subputtern is a single word in terms of current analyzer
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($subPattern[0], $subPatternsEncoding);
if (count($tokens) > 1) {
// Do nothing (nothing is highlighted)
return;
}
foreach ($tokens as $token) {
$pattern .= $token->getTermText();
}
}
require_once 'Zend/Search/Lucene/Index/Term.php';
$term = new Zend_Search_Lucene_Index_Term($pattern, $this->_field);
require_once 'Zend/Search/Lucene/Search/Query/Wildcard.php';
$query = new Zend_Search_Lucene_Search_Query_Wildcard($term);
$query->_highlightMatches($highlighter);
return;
}
// -------------------------------------
// Recognize one-term multi-term and "insignificant" queries
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_word, $this->_encoding);
if (count($tokens) == 0) {
// Do nothing
return;
}
if (count($tokens) == 1) {
$highlighter->highlight($tokens[0]->getTermText());
return;
}
//It's not insignificant or one term query
$words = array();
foreach ($tokens as $token) {
$words[] = $token->getTermText();
}
$highlighter->highlight($words);
}
示例13: _highlightMatches
/**
* Query specific matches highlighting
*
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
*/
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
{
$words = array();
$docBody = $highlighter->getDocument()->getFieldUtf8Value('body');
// require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($docBody, 'UTF-8');
$lowerTermText = $this->_lowerTerm !== null ? $this->_lowerTerm->text : null;
$upperTermText = $this->_upperTerm !== null ? $this->_upperTerm->text : null;
if ($this->_inclusive) {
foreach ($tokens as $token) {
$termText = $token->getTermText();
if (($lowerTermText == null || $lowerTermText <= $termText) && ($upperTermText == null || $termText <= $upperTermText)) {
$words[] = $termText;
}
}
} else {
foreach ($tokens as $token) {
$termText = $token->getTermText();
if (($lowerTermText == null || $lowerTermText < $termText) && ($upperTermText == null || $termText < $upperTermText)) {
$words[] = $termText;
}
}
}
$highlighter->highlight($words);
}
示例14: get_similar_posts
/**
* Return a list of posts that are similar to the current post.
* This is not a very good implementation, so do not expect
* amazing results - the term vector is not available for a doc
* in ZSL, which limits how far you can go!
*
* @return array ids
*/
public function get_similar_posts($post, $max_recommended = 5)
{
Zend_Search_Lucene::setResultSetLimit($max_recommended + 1);
$title = $post->title;
$tags = $post->tags;
$tagstring = '';
foreach ($tags as $tag) {
$tagstring .= $tag . ' ';
}
$analyser = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
$tokens = $analyser->tokenize(strtolower($tagstring) . ' ' . strtolower($title));
$query = new Zend_Search_Lucene_Search_Query_MultiTerm();
foreach ($tokens as $token) {
$query->addTerm(new Zend_Search_Lucene_Index_Term($token->getTermText()));
}
$hits = $this->_index->find($query);
$ids = array();
$counter = 0;
foreach ($hits as $hit) {
if ($hit->postid != $post->id) {
$ids[] = $hit->postid;
$counter++;
}
if ($counter == $max_recommended) {
break;
}
}
return $ids;
}
示例15: createFuzzyQuery
/**
*
* @return Zend_Search_Lucene_Search_Query $query
* @param string $queryString
*/
public function createFuzzyQuery($queryString)
{
Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding($this->_encoding);
$userQuery = Zend_Search_Lucene_Search_QueryParser::parse($queryString, $this->_encoding);
$query = new Zend_Search_Lucene_Search_Query_Boolean();
$query->addSubquery($userQuery);
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($queryString, $this->_encoding);
if (2 > count($tokens)) {
$term = new Zend_Search_Lucene_Index_Term($queryString, 'name');
$fuzzy = new Zend_Search_Lucene_Search_Query_Fuzzy($term, 0.4);
$query->addSubquery($fuzzy);
}
return $query;
}