本文整理汇总了PHP中Indexer::unqueueDocument方法的典型用法代码示例。如果您正苦于以下问题:PHP Indexer::unqueueDocument方法的具体用法?PHP Indexer::unqueueDocument怎么用?PHP Indexer::unqueueDocument使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Indexer
的用法示例。
在下文中一共展示了Indexer::unqueueDocument方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: do_main
function do_main()
{
$doc = $this->oDocument;
$docid = $doc->getId();
if (Permission::userIsSystemAdministrator()) {
$full_path = $doc->getFullPath();
if (Indexer::isDocumentScheduled($docid)) {
Indexer::unqueueDocument($docid);
$this->addInfoMessage(sprintf(_kt("Document '%s' has been removed from the indexing queue."), $full_path));
} else {
Indexer::index($doc, 'A');
$this->addInfoMessage(sprintf(_kt("Document '%s' has been added to the indexing queue."), $full_path));
}
}
redirect("view.php?fDocumentId={$docid}");
exit;
}
示例2: processDocument
/**
* Process a document - extract text and index it
* Refactored from indexDocuments()
*
* @param unknown_type $docinfo
*/
public function processDocument($document, $docinfo)
{
global $default;
static $extractorCache = array();
// increment indexed documents count
Indexer::incrementCount();
// if document is a zero byte file, let's just unqueue and return
if ($document->getFileSize() == 0) {
Indexer::unqueueDocument($docinfo['document_id'], sprintf(_kt("Zero Byte documents do not need to be indexed: %d"), $docinfo['document_id']));
return;
}
$docId = $docinfo['document_id'];
$extension = $docinfo['filetypes'];
$mimeType = $docinfo['mimetypes'];
$extractorClass = $docinfo['extractor'];
$indexDocument = in_array($docinfo['what'], array('A', 'C'));
$indexDiscussion = in_array($docinfo['what'], array('A', 'D'));
$this->indexingHistory = '';
$tempPath = $this->tempPath;
$this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Indexing docid: %d extension: '%s' mimetype: '%s' extractor: '%s'"), $docId, $extension, $mimeType, $extractorClass), 'debug');
if (empty($extractorClass)) {
/*
if no extractor is found and we don't need to index discussions, then we can remove the item from the queue.
*/
if ($indexDiscussion) {
$indexDocument = false;
$this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Not indexing docid: %d content because extractor could not be resolve. Still indexing discussion."), $docId), 'info');
} else {
Indexer::unqueueDocument($docId, sprintf(_kt("No extractor for docid: %d"), $docId));
return;
}
} else {
/*
If an extractor is available, we must ensure it is enabled.
*/
if (!$this->isExtractorEnabled($extractorClass)) {
$this->logPendingDocumentInfoStatus($docId, sprintf(_kt("diagnose: Not indexing docid: %d because extractor '%s' is disabled."), $docId, $extractorClass), 'info');
return;
}
}
if ($this->debug) {
$this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Processing docid: %d.\n"), $docId), 'info');
}
if ($this->restartCurrentBatch) {
Indexer::unqueueDocument($docId);
Indexer::index($docId, 'A');
return;
}
$filename = $document->getFileName();
if (substr($filename, 0, 1) == '~' || substr($filename, -1) == '~') {
Indexer::unqueueDocument($docId, sprintf(_kt("indexDocuments: Filename for document id %d starts with a tilde (~). This is assumed to be a temporary file. This is ignored."), $docId), 'error');
return;
}
$removeFromQueue = true;
if ($indexDocument) {
if (array_key_exists($extractorClass, $extractorCache)) {
$extractor = $extractorCache[$extractorClass];
} else {
$extractor = $extractorCache[$extractorClass] = $this->getExtractor($extractorClass);
}
if (!$extractor instanceof DocumentExtractor) {
$this->logPendingDocumentInfoStatus($docId, sprintf(_kt("indexDocuments: extractor '%s' is not a document extractor class."), $extractorClass), 'error');
return;
}
$version = $document->getMajorVersionNumber() . '.' . $document->getMinorVersionNumber();
$sourceFile = $this->storageManager->temporaryFile($document);
if (empty($sourceFile) || !is_file($sourceFile)) {
Indexer::unqueueDocument($docId, sprintf(_kt("indexDocuments: source file '%s' for document %d does not exist."), $sourceFile, $docId), 'error');
continue;
}
if ($extractor->needsIntermediateSourceFile()) {
//$extension = pathinfo($document->getFileName(), PATHINFO_EXTENSION);
$intermediate = $tempPath . '/' . $docId . '.' . $extension;
$result = @copy($sourceFile, $intermediate);
if ($result === false) {
$this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Could not create intermediate file from document %d"), $docId), 'error');
// problem. lets try again later. probably permission related. log the issue.
continue;
}
$sourceFile = $intermediate;
}
$extractor->setSourceFile($sourceFile);
$extractor->setMimeType($mimeType);
$extractor->setExtension($extension);
$extractor->setDocument($document);
$extractor->setIndexingStatus(null);
$extractor->setExtractionStatus(null);
$targetFile = tempnam($tempPath, 'ktindexer');
$extractor->setTargetFile($targetFile);
$this->logPendingDocumentInfoStatus($docId, sprintf(_kt("Extra Info docid: %d Source File: '%s' Target File: '%s'"), $docId, $sourceFile, $targetFile), 'debug');
$this->executeHook($extractor, 'pre_extract');
$this->executeHook($extractor, 'pre_extract', $mimeType);
$removeFromQueue = false;
if ($extractor->extractTextContent()) {
//.........这里部分代码省略.........
示例3: extractTextContent
public function extractTextContent()
{
global $default;
$docId = $this->document->getId();
if (empty($this->extension)) {
$default->log->info("DocumentId: {$docId} - Document does not have an extension");
Indexer::unqueueDocument($docId, sprintf("Removing document from queue: documentId %d", $docId));
return false;
}
// Open Office does not support the following files
if (in_array($this->extension, array('xlt'))) {
$default->log->info("DocumentId: {$docId} - Open Office does not support .xlt.");
Indexer::unqueueDocument($docId, sprintf("Removing document from queue - Open Office does not support .xlt: documentId %d", $docId));
return false;
}
if (false === parent::extractTextContent()) {
if (strpos($this->output, 'OpenOffice process not found or not listening') !== false) {
$indexer = Indexer::get();
$indexer->restartBatch();
return false;
} elseif (strpos($this->output, 'Unexpected connection closure') !== false || strpos($this->output, '\'NoneType\' object has no attribute \'storeToURL\'') !== false || strpos($this->output, 'The document could not be opened for conversion. This could indicate an unsupported mimetype.') !== false || strpos($this->output, 'URL seems to be an unsupported one.') !== false || strpos($this->output, '__main__.com.sun.star.task.ErrorCodeIOException') !== false) {
$default->log->info("DocumentId: {$docId} - Suspect the file cannot be indexed by Open Office.");
file_put_contents($this->targetfile, '');
$indexer = Indexer::get();
$indexer->restartBatch();
Indexer::unqueueDocument($docId, sprintf(_kt("Removing document from queue: documentId %d"), $docId));
return true;
}
return false;
}
if ($this->targetExtension != 'html') {
file_put_contents($this->targetfile, '');
return true;
}
$content = file_get_contents($this->targetfile);
$this->setTargetFile($this->targetfile . '.txt');
$content = $this->filter($content);
if (empty($content)) {
return touch($this->targetfile);
}
return file_put_contents($this->targetfile, $content);
}
示例4: processQueue
public function processQueue()
{
global $default;
$default->log->debug('documentProcessor: starting');
// Check for lock file to ensure processor is not currently running
$cacheDir = $default->cacheDirectory;
$lockFile = $cacheDir . DIRECTORY_SEPARATOR . 'document_processor.lock';
if (file_exists($lockFile)) {
// lock file exists, exit
$default->log->debug('documentProcessor: stopping, lock file in place ' . $lockFile);
return;
}
if ($default->enableIndexing) {
// Setup indexing - load extractors, run diagnostics
if ($this->indexer->preIndexingSetup() === false) {
$default->log->debug('documentProcessor: stopping - indexer setup failed.');
return;
}
}
// Get document queue
$queue = $this->indexer->getDocumentsQueue($this->limit);
if (empty($queue)) {
$default->log->debug('documentProcessor: stopping - no documents in processing queue');
return;
}
// indexing starting - create lock file
touch($lockFile);
// Process queue
foreach ($queue as $item) {
// Get the document object
$document = Document::get($item['document_id']);
if (PEAR::isError($document)) {
Indexer::unqueueDocument($docId, sprintf(_kt("indexDocuments: Cannot resolve document id %d: %s."), $docId, $document->getMessage()), 'error');
continue;
}
// index document
if ($default->enableIndexing) {
$this->indexer->processDocument($document, $item);
}
// loop through processors
if ($this->processors !== false) {
foreach ($this->processors as $processor) {
$default->log->debug('documentProcessor: running processor: ' . $processor->getNamespace());
// Check document mime type against supported types
if (!$this->isSupportedMimeType($item['mimetypes'], $processor->getSupportedMimeTypes())) {
$default->log->debug('documentProcessor: not a supported mimetype: ' . $item['mimetypes']);
continue;
}
// Process document
$processor->setDocument($document);
$processor->processDocument();
}
}
}
// update the indexer statistics
$this->indexer->updateIndexStats();
// Remove lock file to indicate processing has completed
if (file_exists($lockFile)) {
@unlink($lockFile);
}
$default->log->debug('documentProcessor: stopping');
}
示例5: do_main
function do_main()
{
//Number of items on a page
$itemsPerPage = 50;
$pageNum = 1;
if (isset($_REQUEST['itemsPerPage'])) {
$itemsPerPage = $_REQUEST['itemsPerPage'];
}
//registerTypes registers the mime types and populates the needed tables.
$indexer = Indexer::get();
$indexer->registerTypes();
switch ($_REQUEST['rescheduleValue']) {
case 'reschedule':
foreach (KTUtil::arrayGet($_REQUEST, 'index_error', array()) as $sDocId => $v) {
Indexer::reindexDocument($sDocId);
}
break;
case 'remove':
foreach (KTUtil::arrayGet($_REQUEST, 'index_error', array()) as $sDocId => $v) {
Indexer::unqueueDocument($sDocId, 'Document removed from queue via admin interface. Normally this is because an indexer is not able to process the document.');
}
break;
case 'rescheduleall':
$aIndexerValues = Indexer::getIndexingQueue();
foreach ($aIndexerValues as $sDocValues) {
Indexer::reindexDocument($sDocValues['document_id']);
}
break;
case 'removeall':
$aIndexerValues = Indexer::getIndexingQueue();
foreach ($aIndexerValues as $sDocValues) {
Indexer::unqueueDocument($sDocValues['document_id'], 'Document removed from queue via admin interface. Normally this is because an indexer is not able to process the document.');
}
break;
}
$oTemplating =& KTTemplating::getSingleton();
$oTemplate =& $oTemplating->loadTemplate('ktcore/search2/reporting/indexerrors');
$aIndexerValues = Indexer::getIndexingQueue();
foreach ($aIndexerValues as $key => $doc) {
$extractor = $indexer->getExtractor($doc['extractor']);
if (is_null($extractor)) {
$doc['extractor'] = 'n/a';
continue;
}
$doc['extractor'] = $extractor->getDisplayName();
$aIndexerValues[$key] = $doc;
}
$aIndexList = array();
//creating page variables and loading the items for the current page
if (!empty($aIndexerValues)) {
$items = count($aIndexerValues);
if (fmod($items, $itemsPerPage) > 0) {
$pages = floor($items / $itemsPerPage) + 1;
} else {
$pages = $items / $itemsPerPage;
}
for ($i = 1; $i <= $pages; $i++) {
$aPages[] = $i;
}
if ($items < $itemsPerPage) {
$limit = $items - 1;
} else {
$limit = $itemsPerPage - 1;
}
if (isset($_REQUEST['pageValue'])) {
$pageNum = (int) $_REQUEST['pageValue'];
if ($pageNum > $pages) {
$pageNum = $pages;
}
$start = ($pageNum - 1) * $itemsPerPage - 1;
$limit = $start + $itemsPerPage;
for ($i = $start; $i <= $limit; $i++) {
if (isset($aIndexerValues[$i])) {
$aIndexList[] = $aIndexerValues[$i];
}
}
} else {
for ($i = 0; $i <= $limit; $i++) {
$aIndexList[] = $aIndexerValues[$i];
}
}
}
$config = KTConfig::getSingleton();
$rootUrl = $config->get('KnowledgeTree/rootUrl');
$oTemplate->setData(array('context' => $this, 'pageList' => $aPages, 'pageCount' => $pages, 'pageNum' => $pageNum, 'itemCount' => $items, 'itemsPerPage' => $itemsPerPage, 'indexErrors' => $aIndexList, 'root_url' => $rootUrl));
return $oTemplate;
}
示例6: processIndexQueue
/**
* Fetch the documents in the indexing queue and start the indexer
*
*/
public function processIndexQueue()
{
global $default;
if (!$default->enableIndexing) {
$default->log->debug('documentProcessor: indexer disabled');
return;
}
$default->log->debug('documentProcessor: starting indexer');
// Check for lock file to ensure processor is not currently running
$cacheDir = $default->cacheDirectory;
$lockFile = $cacheDir . DIRECTORY_SEPARATOR . 'document_processor.lock';
if (file_exists($lockFile)) {
// If something causes the document processor to stop part way through processing, the lock
// file will remain stopping the document processor from resuming. To workaround this problem
// we check the creation date of the lockfile and remove it if it is older than 24 hours or
// 48 hours if the batch size is greater than 1000 documents.
$stat = stat($lockFile);
$created = $stat['mtime'];
$gap = 24;
if ($this->limit > 1000) {
$gap = 48;
$default->log->warn('documentProcessor: batch size of documents to index is set to ' . $this->limit . ', this could cause problems.');
}
$check = time() - $gap * 60 * 60;
if ($check > $created) {
$default->log->error('documentProcessor: lock file is older than ' . $gap . ' hours, deleting it to restart indexing - ' . $lockFile);
@unlink($lockFile);
} else {
// lock file exists, exit
// through a warning if the lock file is older than half an hour
$small_gap = time() - 30 * 60;
if ($small_gap > $created) {
$default->log->warn('documentProcessor: stopping, lock file in place since ' . date('Y-m-d H:i:s', $created) . ' - ' . $lockFile);
}
return;
}
}
// Setup indexing - load extractors, run diagnostics
if ($this->indexer->preIndexingSetup() === false) {
$default->log->error('documentProcessor: stopping - indexer setup failed.');
return;
}
// Get document queue
$queue = $this->indexer->getDocumentsQueue($this->limit);
if (empty($queue)) {
$default->log->debug('documentProcessor: stopping - no documents in indexing queue');
return;
}
// indexing starting - create lock file
touch($lockFile);
// Process queue
foreach ($queue as $item) {
// Get the document object
$docId = $item['document_id'];
$document = Document::get($docId);
if (PEAR::isError($document)) {
Indexer::unqueueDocument($docId, sprintf(_kt("indexDocuments: Cannot resolve document id %d: %s."), $docId, $document->getMessage()), 'error');
continue;
}
// index document
$this->indexer->processDocument($document, $item);
}
// update the indexer statistics
$this->indexer->updateIndexStats();
// Remove lock file to indicate processing has completed
if (file_exists($lockFile)) {
@unlink($lockFile);
}
$default->log->debug('documentProcessor: stopping indexer, batch completed');
}
示例7: processQueue
public function processQueue()
{
global $default;
$default->log->debug('documentProcessor: starting');
if ($default->enableIndexing) {
// Setup indexing - load extractors, run diagnostics
if ($this->indexer->preIndexingSetup() === false) {
$default->log->debug('documentProcessor: stopping - indexer setup failed.');
return;
}
}
// Get document queue
$queue = $this->indexer->getDocumentsQueue($this->limit);
if (empty($queue)) {
$default->log->debug('documentProcessor: stopping - no documents in processing queue');
return;
}
// Process queue
foreach ($queue as $item) {
// Get the document object
$document = Document::get($item['document_id']);
if (PEAR::isError($document)) {
Indexer::unqueueDocument($docId, sprintf(_kt("indexDocuments: Cannot resolve document id %d: %s."), $docId, $document->getMessage()), 'error');
continue;
}
// index document
if ($default->enableIndexing) {
$this->indexer->processDocument($document, $item);
}
// loop through processors
if ($this->processors !== false) {
foreach ($this->processors as $processor) {
$default->log->debug('documentProcessor: running processor: ' . $processor->getNamespace());
// Check document mime type against supported types
if (!$this->isSupportedMimeType($item['mimetypes'], $processor->getSupportedMimeTypes())) {
$default->log->debug('documentProcessor: not a supported mimetype: ' . $item['mimetypes']);
continue;
}
// Process document
$processor->setDocument($document);
$processor->processDocument();
}
}
}
// update the indexer statistics
$this->indexer->updateIndexStats();
$default->log->debug('documentProcessor: stopping');
}