本文整理汇总了PHP中Zend_Search_Lucene_Document_Html::loadHtmlFile方法的典型用法代码示例。如果您正苦于以下问题:PHP Zend_Search_Lucene_Document_Html::loadHtmlFile方法的具体用法?PHP Zend_Search_Lucene_Document_Html::loadHtmlFile怎么用?PHP Zend_Search_Lucene_Document_Html::loadHtmlFile使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Zend_Search_Lucene_Document_Html
的用法示例。
在下文中一共展示了Zend_Search_Lucene_Document_Html::loadHtmlFile方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: index
/**
* Extract data from a PDF document and add this to the Lucene index.
*
* @param \Zend_Search_Lucene_Proxy $Index The Lucene index object.
* @param string $type ['html', 'docx', 'xsls', 'pptx', 'content']
* @param array $indexValues
* @param string $locale
* @param object $obj
* @param string $pathFile The path to the PDF document.
*
* @return \Zend_Search_Lucene_Proxy
* @access public
* @static
* @author Etienne de Longeaux <etienne_delongeaux@hotmail.com>
* @since 2012-06-11
*/
public static function index(\Zend_Search_Lucene_Proxy $Index, $type, $indexValues = null, $locale = '', $obj = null, $pathFile = '')
{
// ignore invalid characters for lucene text search
\Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding('utf-8');
\Zend_Search_Lucene_Analysis_Analyzer::setDefault(new \Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive());
self::$_index = $Index;
self::$_doc = null;
switch ($type) {
case "html":
self::$_doc = \Zend_Search_Lucene_Document_Html::loadHtmlFile($pathFile, false);
$indexValues['Key'] = filemtime($pathFile);
$indexValues['Contents'] = self::$_doc->getFieldUtf8Value('body');
break;
case "docx":
self::$_doc = \Zend_Search_Lucene_Document_Docx::loadDocxFile($pathFile, false);
$indexValues['Key'] = filemtime($pathFile);
$indexValues['Contents'] = self::$_doc->getFieldUtf8Value('body');
break;
case "xsls":
self::$_doc = \Zend_Search_Lucene_Document_Xlsx::loadXlsxFile($pathFile, false);
$indexValues['Key'] = filemtime($pathFile);
$indexValues['Contents'] = self::$_doc->getFieldUtf8Value('body');
break;
case "pptx":
self::$_doc = \Zend_Search_Lucene_Document_Pptx::loadPptxFile($pathFile, false);
$indexValues['Key'] = filemtime($pathFile);
$indexValues['Contents'] = self::$_doc->getFieldUtf8Value('body');
break;
case "page":
// we create a new instance of Zend_Search_Lucene_Document
self::$_doc = \Zend_Search_Lucene_Document_Html::loadHTML($indexValues['Contents'], false);
$indexValues['Contents'] = self::$_doc->getFieldUtf8Value('body');
break;
}
if (self::$_doc instanceof \Zend_Search_Lucene_Document) {
// Remove all accens
$indexValues['Contents'] = \Sfynx\ToolBundle\Util\PiStringManager::minusculesSansAccents($indexValues['Contents']);
// Remove all doublons
$indexValues['Contents'] = \Sfynx\ToolBundle\Util\PiStringManager::uniqueWord($indexValues['Contents']);
// clean the content
$indexValues['Contents'] = \Sfynx\ToolBundle\Util\PiStringManager::cleanContent($indexValues['Contents']);
// Delete all stop words
$stopWord = \Sfynx\ToolBundle\Util\PiStringManager::stopWord(strtolower($locale));
if ($stopWord) {
$wordsIndex = explode(' ', $indexValues['Contents']);
$diff = array_diff($wordsIndex, $stopWord);
$indexValues['Contents'] = implode(' ', $diff);
}
// print_r($locale);
// print_r('<br /><br /><br />');
// print_r(implode(' ', $wordsIndex));
// print_r('<br /><br /><br />');
// print_r(implode(' ', $stopWord));
// print_r('<br /><br /><br />');
// print_r($indexValues['Contents']);
// print_r('<br /><br /><br />');
// If the document creation was sucessful then add it to our index.
try {
setlocale(LC_ALL, $locale);
self::defaultAddFields($indexValues);
self::addDocument();
// print_r($indexValues['Key']);
// print_r('<br />');
// print_r($indexValues['Contents']);
// print_r('<br /><br /><br />');
} catch (\Exception $e) {
setlocale(LC_ALL, 'fr_FR');
self::defaultAddFields($indexValues);
try {
self::addDocument();
} catch (\Exception $e) {
}
}
}
// Return the Lucene index object.
return self::$_index;
}