当前位置: 首页>>代码示例>>PHP>>正文


PHP pdf::get_pages方法代码示例

本文整理汇总了PHP中pdf::get_pages方法的典型用法代码示例。如果您正苦于以下问题:PHP pdf::get_pages方法的具体用法?PHP pdf::get_pages怎么用?PHP pdf::get_pages使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pdf的用法示例。


在下文中一共展示了pdf::get_pages方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。

示例1: index

 /**
  * Index posts stored in $this->unindexedPosts
  *
  * @since 1.0
  */
 function index()
 {
     global $wp_filesystem, $searchwp;
     $this->check_for_parallel_indexer();
     if (is_array($this->unindexedPosts) && count($this->unindexedPosts)) {
         do_action('searchwp_indexer_pre_chunk', $this->unindexedPosts);
         // all of the IDs to index have not been indexed, proceed with indexing them
         while (($unindexedPost = current($this->unindexedPosts)) !== false) {
             $this->setPost($unindexedPost);
             // log the attempt
             $count = get_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'attempts', true);
             if ($count == false) {
                 $count = 0;
             } else {
                 $count = intval($count);
             }
             $count++;
             // increment our counter to prevent the indexer getting stuck on a gigantic PDF
             update_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'attempts', $count);
             do_action('searchwp_log', 'Attempt ' . $count . ' at indexing ' . $this->post->ID);
             // if we breached the maximum number of attempts, flag it to skip
             $this->maxAttemptsToIndex = absint(apply_filters('searchwp_max_index_attempts', $this->maxAttemptsToIndex));
             if (intval($count) > $this->maxAttemptsToIndex) {
                 do_action('searchwp_log', 'Too many indexing attempts on ' . $this->post->ID . ' (' . $this->maxAttemptsToIndex . ') - skipping');
                 // flag it to be skipped
                 update_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'skip', true);
             } else {
                 // check to see if we're running a second pass on terms
                 $termCache = get_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'terms', true);
                 if (!is_array($termCache)) {
                     do_action('searchwp_index_post', $this->post);
                     // if it's an attachment, we want the permalink
                     $slug = $this->post->post_type == 'attachment' ? str_replace(get_bloginfo('wpurl'), '', get_permalink($this->post->ID)) : '';
                     // we allow users to override the extracted content from documents, if they have done so this flag is set
                     $skipDocProcessing = get_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'skip_doc_processing', true);
                     $omitDocProcessing = apply_filters('searchwp_omit_document_processing', false);
                     if (!$skipDocProcessing && !$omitDocProcessing) {
                         // if it's a PDF we need to populate our Custom Field with it's content
                         if ($this->post->post_mime_type == 'application/pdf') {
                             // grab the filename of the PDF
                             $filename = get_attached_file($this->post->ID);
                             // allow for external PDF content extraction
                             $pdfContent = apply_filters('searchwp_external_pdf_processing', '', $filename, $this->post->ID);
                             // only try to extract content if the external processing has not provided the PDF content we're looking for
                             if (empty($pdfContent)) {
                                 // PdfParser runs only on 5.3+ but SearchWP runs on 5.2+
                                 if (version_compare(PHP_VERSION, '5.3', '>=')) {
                                     include_once $searchwp->dir . '/vendor/pdfparser-bootloader.php';
                                 }
                                 // a wrapper class was conditionally included if we're running PHP 5.3+ so let's try that
                                 if (class_exists('SearchWP_PdfParser')) {
                                     // try PdfParser first
                                     $parser = new SearchWP_PdfParser();
                                     $parser = $parser->init();
                                     $pdf = $parser->parseFile($filename);
                                     $text = $pdf->getText();
                                     $pdfContent = trim(str_replace("\n", " ", $text));
                                 }
                                 // try PDF2Text
                                 if (empty($pdfContent)) {
                                     if (!class_exists('PDF2Text')) {
                                         include_once $searchwp->dir . '/includes/class.pdf2text.php';
                                     }
                                     $pdfParser = new PDF2Text();
                                     $pdfParser->setFilename($filename);
                                     $pdfParser->decodePDF();
                                     $pdfContent = $pdfParser->output();
                                     $pdfContent = trim(str_replace("\n", " ", $pdfContent));
                                 }
                                 // check to see if the first pass produced nothing or concatenated strings
                                 $fullContentLength = strlen($pdfContent);
                                 $numberOfSpaces = substr_count($pdfContent, ' ');
                                 if (empty($pdfContent) || $numberOfSpaces / $fullContentLength * 100 < 10) {
                                     WP_Filesystem();
                                     $filecontent = $wp_filesystem->exists($filename) ? $wp_filesystem->get_contents($filename) : '';
                                     if (false != strpos($filecontent, 'trailer')) {
                                         if (!class_exists('pdf_readstream')) {
                                             include_once $searchwp->dir . '/includes/class.pdfreadstream.php';
                                         }
                                         $pdfContent = '';
                                         $pdf = new pdf(get_attached_file($this->post->ID));
                                         $pages = $pdf->get_pages();
                                         if (!empty($pages)) {
                                             while (list($nr, $page) = each($pages)) {
                                                 $pdfContent .= $page->get_text();
                                             }
                                         }
                                     } else {
                                         // empty out the content so wacky concatenations are not indexed
                                         $pdfContent = '';
                                         // flag it for further review
                                         update_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'review', true);
                                         update_post_meta($this->post->ID, '_' . SEARCHWP_PREFIX . 'skip', true);
                                     }
                                 }
//.........这里部分代码省略.........
开发者ID:vossavant,项目名称:phoenix,代码行数:101,代码来源:class.indexer.php

示例2: absint

	/**
	 * Extract plain text from PDF
	 *
	 * @since 2.5
	 * @param $post_id integer The post ID of the PDF in the Media library
	 *
	 * @return string The contents of the PDF
	 */
	function extract_pdf_text( $post_id ) {
		global $wp_filesystem, $searchwp;

		$pdf_post = get_post( absint( $post_id ) );

		// make sure it's a PDF
		if ( 'application/pdf' !== $pdf_post->post_mime_type ) {
			return '';
		}

		// grab the filename of the PDF
		$filename = get_attached_file( absint( $post_id ) );

		// make sure the file exists locally
		if ( ! file_exists( $filename ) ) {
			return '';
		}

		// PdfParser runs only on 5.3+ but SearchWP runs on 5.2+
		if ( version_compare( PHP_VERSION, '5.3', '>=' ) ) {

			/** @noinspection PhpIncludeInspection */
			include_once( $searchwp->dir . '/vendor/pdfparser-bootloader.php' );

			// a wrapper class was conditionally included if we're running PHP 5.3+ so let's try that
			if ( class_exists( 'SearchWP_PdfParser' ) ) {

				/** @noinspection PhpIncludeInspection */
				include_once( $searchwp->dir . '/vendor/pdfparser/vendor/autoload.php' );

				// try PdfParser first
				$parser = new SearchWP_PdfParser();
				$parser = $parser->init();
				try {
					$pdf = $parser->parseFile( $filename );
					$pdfContent = $pdf->getText();
				} catch (Exception $e) {
					do_action( 'searchwp_log', 'PDF parsing failed: ' . $e->getMessage() );
					return false;
				}
			}
		}

		// try PDF2Text
		if ( empty( $pdfContent ) ) {
			if ( ! class_exists( 'PDF2Text' ) ) {
				/** @noinspection PhpIncludeInspection */
				include_once( $searchwp->dir . '/vendor/class.pdf2text.php' );
			}
			$pdfParser = new PDF2Text();
			$pdfParser->setFilename( $filename );
			$pdfParser->decodePDF();
			$pdfContent = $pdfParser->output();
			$pdfContent = trim( str_replace( "\n", ' ', $pdfContent ) );
		}

		// check to see if the first pass produced nothing or concatenated strings
		$fullContentLength = strlen( $pdfContent );
		$numberOfSpaces = substr_count( $pdfContent, ' ' );
		if ( empty( $pdfContent ) || ( ( $numberOfSpaces / $fullContentLength ) * 100 < 10 ) ) {
			WP_Filesystem();

			if ( method_exists( $wp_filesystem, 'exists' ) && method_exists( $wp_filesystem, 'get_contents' ) ) {
				$filecontent = $wp_filesystem->exists( $filename ) ? $wp_filesystem->get_contents( $filename ) : '';
			} else {
				$filecontent = '';
			}

			if ( false != strpos( $filecontent, 'trailer' ) ) {
				if ( ! class_exists( 'pdf_readstream' ) ) {
					/** @noinspection PhpIncludeInspection */
					include_once( $searchwp->dir . '/vendor/class.pdfreadstream.php' );
				}
				$pdfContent = '';
				$pdf = new pdf( get_attached_file( $this->post->ID ) );
				$pages = $pdf->get_pages();
				if ( ! empty( $pages ) ) {
					/** @noinspection PhpUnusedLocalVariableInspection */
					while ( list( $nr, $page ) = each( $pages ) ) {
						if ( method_exists( $page, 'get_text' ) ) {
							$pdfContent .= $page->get_text();
						}
					}
				}
			} else {
				// empty out the content so wacky concatenations are not indexed
				$pdfContent = false;
			}
		}

		return $pdfContent;
	}
开发者ID:acutedeveloper,项目名称:carepoint-development,代码行数:100,代码来源:class.indexer.php


注:本文中的pdf::get_pages方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。