當前位置: 首頁>>代碼示例>>C#>>正文


C# PdfReader.GetPageContent方法代碼示例

本文整理匯總了C#中iTextSharp.text.pdf.PdfReader.GetPageContent方法的典型用法代碼示例。如果您正苦於以下問題:C# PdfReader.GetPageContent方法的具體用法?C# PdfReader.GetPageContent怎麽用?C# PdfReader.GetPageContent使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在iTextSharp.text.pdf.PdfReader的用法示例。


在下文中一共展示了PdfReader.GetPageContent方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的C#代碼示例。

示例1: RemoveLayers

 /// <summary>
 /// Removes layers from a PDF document </summary>
 /// <param name="reader">	a PdfReader containing a PDF document </param>
 /// <param name="layers">	a sequence of names of OCG layers </param>
 /// <exception cref="IOException"> </exception>
 public virtual void RemoveLayers(PdfReader reader, params string[] layers)
 {
     int n = reader.NumberOfPages;
     for (int i = 1; i <= n; i++)
         reader.SetPageContent(i, reader.GetPageContent(i));
     ICollection<string> ocgs = new HashSet2<string>();
     for (int i = 0; i < layers.Length; i++)
     {
         ocgs.Add(layers[i]);
     }
     OCGParser parser = new OCGParser(ocgs);
     for (int i = 1; i <= n; i++)
     {
         PdfDictionary page = reader.GetPageN(i);
         Parse(parser, page);
         page.Remove(new PdfName("PieceInfo"));
         RemoveAnnots(page, ocgs);
         RemoveProperties(page, ocgs);
     }
     PdfDictionary root = reader.Catalog;
     PdfDictionary ocproperties = root.GetAsDict(PdfName.OCPROPERTIES);
     if (ocproperties != null) {
         RemoveOCGsFromArray(ocproperties, PdfName.OCGS, ocgs);
         PdfDictionary d = ocproperties.GetAsDict(PdfName.D);
         if (d != null) {
             RemoveOCGsFromArray(d, PdfName.ON, ocgs);
             RemoveOCGsFromArray(d, PdfName.OFF, ocgs);
             RemoveOCGsFromArray(d, PdfName.LOCKED, ocgs);
             RemoveOCGsFromArray(d, PdfName.RBGROUPS, ocgs);
             RemoveOCGsFromArray(d, PdfName.ORDER, ocgs);
             RemoveOCGsFromArray(d, PdfName.AS, ocgs);
         }
     }
     reader.RemoveUnusedObjects();
 }
開發者ID:,項目名稱:,代碼行數:40,代碼來源:

示例2: IsTextInPdf

        /// <summary>
        /// 
        /// </summary>
        /// <param name="inFileName"></param>
        /// <param name="textToFind"></param>
        /// <returns></returns>
        public bool IsTextInPdf(string inFileName, string textToFind)
        {
            try
            {
                // Create a reader for the given PDF file
                using (PdfReader reader = new PdfReader(inFileName)) {

                    //Console.Write("Processing: ");

                    for (int page = 1; page <= reader.NumberOfPages; page++)
                    {
                        string temp = ExtractTextFromPDFBytes(reader.GetPageContent(page));
                        if (temp.IndexOf(textToFind) != -1)
                        {
                            return true;
                        }
                    }
                    return false;
                }
            }
            catch
            {
                return false;
            }
        }
開發者ID:koocbor,項目名稱:CommonAppBlankChecker,代碼行數:31,代碼來源:PdfChecker.cs

示例3: Post

        /// <summary>
        /// Compress a pdf
        /// </summary>
        /// <param name="base64Pdf">A small model to hold a base64 encoded pdf object { "content" : "somebase64" }</param>
        /// <returns>{ "content" : "smallerBase64" }</returns>
        public IHttpActionResult Post(Base64Pdf base64Pdf)
        {
            try
            {
                if (base64Pdf.data == null)
                    return BadRequest("Check supplied pdf model");

                byte[] data = Convert.FromBase64String(base64Pdf.data);

                //Compress
                byte[] compressedData;
                using (var memStream = new MemoryStream())
                {
                    var reader = new PdfReader(data);
                    var stamper = new PdfStamper(reader, memStream, PdfWriter.VERSION_1_4);
                    var pageNum = reader.NumberOfPages;

                    for (var i = 1; i <= pageNum; i++)
                        reader.SetPageContent(i, reader.GetPageContent(i));

                    stamper.SetFullCompression();
                    stamper.Close();
                    reader.Close();

                    compressedData = memStream.ToArray();
                }
                var compressedBase64 = Convert.ToBase64String(compressedData);

                return Json(new Base64Pdf { data = compressedBase64 });
            }
            catch (Exception ex)
            {
                return InternalServerError(ex);
            }
        }
開發者ID:penance316,項目名稱:CompressPdfWebApi,代碼行數:40,代碼來源:PdfController.cs

示例4: TestMultipleDocuments

        public void TestMultipleDocuments()
        {
            byte[] testFile1 = File.ReadAllBytes(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "test_files\\documents\\document.docx"));
            byte[] testFile2 = File.ReadAllBytes(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "test_files\\documents\\document.docx"));
            Dictionary<string, byte[]> files = new Dictionary<string, byte[]>();
            files.Add("document1.docx", testFile1);
            files.Add("document2.docx", testFile2);
            PdfConverter converter = new PdfConverter();
            byte[] pdf = converter.ConvertFiles(files, new ConversionOptions());

            Assert.IsNotNull(pdf);
            Document doc = new Document();
            PdfReader reader = new PdfReader(pdf);
            int pages = reader.NumberOfPages;
            byte[] page1 = reader.GetPageContent(1);
            byte[] page2 = reader.GetPageContent(2);
            doc.Close();

            Assert.IsTrue(pages == 2);
            Assert.AreEqual(page1.Length, page1.Length);
        }
開發者ID:bradyholt,項目名稱:pedamorf,代碼行數:21,代碼來源:TestDocumentConversion.cs

示例5: SetPageContentTest01

        public void SetPageContentTest01()  {
            String outPdf = DestFolder + "out1.pdf";
            PdfReader reader =
                new PdfReader(TestResourceUtils.GetResourceAsStream(TestResourcesPath, "in.pdf"));
            PdfStamper stamper = new PdfStamper(reader, new FileStream(outPdf, FileMode.Create));
            reader.EliminateSharedStreams();
            int total = reader.NumberOfPages + 1;
            for (int i = 1; i < total; i++) {
                byte[] bb = reader.GetPageContent(i);
                reader.SetPageContent(i, bb);
            }
            stamper.Close();

            Assert.Null(new CompareTool().CompareByContent(outPdf, TestResourceUtils.GetResourceAsTempFile(TestResourcesPath, "cmp_out1.pdf"), DestFolder, "diff_"));
        }
開發者ID:yu0410aries,項目名稱:itextsharp,代碼行數:15,代碼來源:PdfStamperTest.cs

示例6: Read

        public static List<String> Read()
        {
            var pdfReader = new PdfReader(_filePath);
            var pages = new List<String>();

            for (int i = 0; i < pdfReader.NumberOfPages; i++)
            {
                string textFromPage = Encoding.UTF8.GetString(Encoding.Convert(Encoding.Default, Encoding.UTF8, pdfReader.GetPageContent(i + 1)));

                pages.Add(GetDataConvertedData(textFromPage));
                //pages.AddRange(textFromPage.Split(new[] { "\n" }, StringSplitOptions.None)
                //                    .Where(text => text.Contains("Tj")).ToList());
                //pages.Add(textFromPage);
            }

            return pages;
        }
開發者ID:Humbunklung,項目名稱:CodeForge,代碼行數:17,代碼來源:Program.cs

示例7: Extract_Text

        /// <summary> Extracts the full text from a PDF file and writes to a file </summary>
        /// <param name="PDF_In_Name">Full path to the pdf file</param>
        /// <param name="Text_Out_Name">Output file name for the extracted text </param>
        /// <returns>TRUE if successful, otherwise FALSE</returns>
        public static bool Extract_Text(string PDF_In_Name, string Text_Out_Name)
        {
            StreamWriter outFile = null;
            PdfReader reader = null;
            try
            {
                // Create a reader for the given PDF file
                reader = new PdfReader(PDF_In_Name);
                //outFile = File.CreateText(outFileName);
                outFile = new StreamWriter(Text_Out_Name, false, Encoding.UTF8);

                for (int page = 1; page <= reader.NumberOfPages; page++)
                {
                    try
                    {
                        string text_to_add = ExtractTextFromPDFBytes(reader.GetPageContent(page));

                        if (text_to_add.Trim().Length > 0)
                        {
                            outFile.WriteLine();
                            outFile.WriteLine("PAGE " + page);
                            outFile.WriteLine();
                            outFile.WriteLine(text_to_add);
                        }
                    }
                    catch
                    {

                    }
                }
                return true;
            }
            catch
            {

            }
            finally
            {
                if (outFile != null) outFile.Close();
                if ( reader != null ) reader.Close();
            }

            return false;
        }
開發者ID:Elkolt,項目名稱:SobekCM-Web-Application,代碼行數:48,代碼來源:PDF_Tools.cs

示例8: InspectPdf

        // ---------------------------------------------------------------------------
        /**
         * Parses object and content information of a PDF into a text file.
         * @param pdf the original PDF
         *
         * this method uses code from;
         * PdfContentReaderTool.ListContentStreamForPage()
         * so i can pass in a byte array instead of file path
         *
         */
        public string InspectPdf(byte[] pdf)
        {
            PdfReader reader = new PdfReader(pdf);
              int maxPageNum = reader.NumberOfPages;
              StringBuilder sb = new StringBuilder();
              for (int pageNum = 1; pageNum <= maxPageNum; pageNum++){
            sb.AppendLine("==============Page " + pageNum + "====================");
            sb.AppendLine("- - - - - Dictionary - - - - - -");
            PdfDictionary pageDictionary = reader.GetPageN(pageNum);
            sb.AppendLine(
              PdfContentReaderTool.GetDictionaryDetail(pageDictionary)
            );

            sb.AppendLine("- - - - - XObject Summary - - - - - -");
            sb.AppendLine(PdfContentReaderTool.GetXObjectDetail(
              pageDictionary.GetAsDict(PdfName.RESOURCES))
            );

            sb.AppendLine("- - - - - Content Stream - - - - - -");
            RandomAccessFileOrArray f = reader.SafeFile;

            byte[] contentBytes = reader.GetPageContent(pageNum, f);
            f.Close();

            foreach (byte b in contentBytes) {
              sb.Append((char)b);
            }

            sb.AppendLine("- - - - - Text Extraction - - - - - -");
            String extractedText = PdfTextExtractor.GetTextFromPage(
              reader, pageNum, new LocationTextExtractionStrategy()
            );
            if (extractedText.Length != 0) {
              sb.AppendLine(extractedText);
            }
            else {
              sb.AppendLine("No text found on page " + pageNum);
            }
            sb.AppendLine();
              }
              return sb.ToString();
        }
開發者ID:kuujinbo,項目名稱:iTextInAction2Ed,代碼行數:52,代碼來源:InspectPageContent.cs

示例9: ExtractText

        /// <summary>
        /// Extracts a text from a PDF file.
        /// </summary>
        /// <param name="inFileName">the full path to the pdf file.</param>
        /// <param name="outFileName">the output file name.</param>
        /// <returns>the extracted text</returns>
        public String ExtractText(string inFileName,int topage)
        {
            StreamWriter outFile = null;
            try
            {
                // Create a reader for the given PDF file
                PdfReader reader = new PdfReader(inFileName);

                //outFile = File.CreateText(outFileName);
             //   outFile = new StreamWriter(outFileName, false, System.Text.Encoding.UTF8);
                String outputText ="";

                Console.Write("Processing: ");

                int     totalLen    = 68;
                float   charUnit    = ((float)totalLen) / (float)reader.NumberOfPages;
                int     totalWritten= 0;
                float   curUnit     = 0;

               // for (int page = 1; page <= reader.NumberOfPages; page++)
                for (int page = 1; page <= topage; page++)
                {
                    outputText += ExtractTextFromPDFBytes(reader.GetPageContent(page)) + " ";

                }
                return (outputText =="") ? null : outputText;
            }
            catch
            {
                File.AppendAllText("log_extract.txt", DateTime.Now.ToShortDateString() + " " + DateTime.Now.ToShortTimeString() + ": " + inFileName + Environment.NewLine);
                return null;
            }
            finally
            {
                if (outFile != null) outFile.Close();
            }
        }
開發者ID:witwall,項目名稱:isbnextractor,代碼行數:43,代碼來源:PDFParser.cs

示例10: ReadContent

// ---------------------------------------------------------------------------    
    /**
     * Reads the content stream of the first page of a PDF into a text file.
     * @param src the PDF file
     */
    public string ReadContent(byte[] src) {
      PdfReader reader = new PdfReader(src);
      byte[] pc = reader.GetPageContent(1);
      return Encoding.UTF8.GetString(pc, 0, pc.Length);
    }
開發者ID:,項目名稱:,代碼行數:10,代碼來源:

示例11: ExtractText

        public string ExtractText(string inFileName, out int tot)
        {
            string outs = "";
            try
            {
                
                PdfReader reader = new PdfReader(inFileName);
             
                Debug.WriteLine("Processing: ");

                int totalLen = 68;
                float charUnit = ((float)totalLen) / (float)reader.NumberOfPages;
                int totalWritten = 0;
                float curUnit = 0;

                for (int page = 1; page <= reader.NumberOfPages; page++)
                {
                    string k = Encoding.GetEncoding("koi8-r") .GetString(reader.GetPageContent(page));//ExtractTextFromPDFBytes(reader.GetPageContent(page));
                    string wk = k;//Encoding.GetEncoding("utf-8").GetString(reader.GetPageContent(page));//Encoding.GetEncoding("koi8r").GetString(reader.GetPageContent(page));
                   // string k = ExtractTextFromPDFBytes(Encoding.GetEncoding("koi8r").GetBytes(wk));*/
                    outs += wk + " ";



                    // Write the progress.
                    if (charUnit >= 1.0f)
                    {
                        for (int i = 0; i < (int)charUnit; i++)
                        {
                            Debug.WriteLine("#");
                            totalWritten++;
                        }
                    }
                    else
                    {
                        curUnit += charUnit;
                        if (curUnit >= 1.0f)
                        {
                            for (int i = 0; i < (int)curUnit; i++)
                            {
                                Debug.WriteLine("#");
                                totalWritten++;
                            }
                            curUnit = 0;
                        }

                    }
                }

                if (totalWritten < totalLen)
                {
                    for (int i = 0; i < (totalLen - totalWritten); i++)
                    {
                        Debug.WriteLine("#");
                    }
                }
                tot = totalWritten;
                return outs;
            }
            catch (Exception ex)
            {
                Debug.WriteLine("2"+ex.Message);
                tot = -1;
                return "-1";
                
            }
            finally
            {
              //  if (outFile != null) outFile.Close();
            }
        }
開發者ID:vmouse,項目名稱:Minyust,代碼行數:71,代碼來源:PDFParser.cs

示例12: CompareInnerText

        virtual public bool CompareInnerText(String path1, String path2) {
            PdfReader reader1 = new PdfReader(path1);
            byte[] streamBytes1 = reader1.GetPageContent(1);
            PRTokeniser tokenizer1 =
                new PRTokeniser(new RandomAccessFileOrArray(new RandomAccessSourceFactory().CreateSource(streamBytes1)));



            PdfReader reader2 = new PdfReader(path2);
            byte[] streamBytes2 = reader2.GetPageContent(1);
            PRTokeniser tokenizer2 =
                new PRTokeniser(new RandomAccessFileOrArray(new RandomAccessSourceFactory().CreateSource(streamBytes2)));

            try {
                while (tokenizer1.NextToken()) {
                    if (!tokenizer2.NextToken())
                        return false;
                    else {
                        if (tokenizer1.TokenType != tokenizer2.TokenType)
                            return false;
                        else {
                            if (tokenizer1.TokenType == tokenizer2.TokenType && tokenizer2.TokenType == PRTokeniser.TokType.NUMBER) {
                                if (Math.Abs(float.Parse(tokenizer1.StringValue, CultureInfo.InvariantCulture)
                                             - float.Parse(tokenizer2.StringValue, CultureInfo.InvariantCulture)) > 0.001)
                                    return false;
                            } else if (!tokenizer1.StringValue.Equals(tokenizer2.StringValue))
                                return false;
                        }

                    }
                }
                return true;
            }
            finally {
                reader1.Close();
                reader2.Close();
            }
        }
開發者ID:Niladri24dutta,項目名稱:itextsharp,代碼行數:38,代碼來源:ChunkTest.cs

示例13: OpenPdf

        private void OpenPdf()
        {
            _pdfPages.Clear();
            try
            {
                var openFileDialog = new OpenFileDialog
                                         {
                                             DefaultExt = ".pdf",
                                             Filter = "Pdf documents (.pdf)|*.pdf"
                                         };

                bool? result = openFileDialog.ShowDialog();

                if (result == true)
                {
                    string filename = openFileDialog.FileName;
                    var pdfReader = new PdfReader(filename);
                    for (int i = 1; i <= pdfReader.NumberOfPages; i++)
                    {
                        byte[] pagesBytes = pdfReader.GetPageContent(i);
                        var token = new PRTokeniser(pagesBytes);
                        var pageContent = new StringBuilder();
                        while (token.NextToken())
                        {
                            if (token.TokenType == PRTokeniser.TokType.STRING)
                            {
                                pageContent.Append(token.StringValue);
                            }
                        }
                        _pdfPages.Add(pageContent.ToString());
                    }
                }
                RaisePropertyChanged("MaxIndex");
            }
            catch (Exception)
            {
                MessageBox.Show("Fail to load file");
            }
            CurrentIndex = 1;
        }
開發者ID:tikrimi,項目名稱:Tools,代碼行數:40,代碼來源:MainViewModel.cs

示例14: ExtractText

        internal string ExtractText(string inFileName)
        {
            PdfReader reader = new PdfReader(inFileName);
            string Results = string.Empty;

            try
            {
                int totalLen = 68;
                float charUnit = ((float)totalLen) / (float)reader.NumberOfPages;
                for (int page = 1; page <= reader.NumberOfPages; page++)
                {
                    Results = Results + ExtractTextFromPDFBytes(reader.GetPageContent(page)) + " ";
                }
            }
            catch (Exception m)
            {
                MyException mobj = new MyException("ExtractText() : " + m.Message);
            }

            return Results;
        }
開發者ID:TushChandak,項目名稱:Moogle,代碼行數:21,代碼來源:PDFParser.cs

示例15: GetInvoice

    /// <summary>
    /// Extracts a text from a PDF file.
    /// </summary>
    /// <param name="inFileName">the full path to the pdf file.</param>
    /// <param name="outFileName">the output file name.</param>
    /// <returns>the extracted text</returns>
    private static Boolean GetInvoice(string inFileName, String outputFile, String memberNumber, Boolean notUsed)
    {
        Boolean memberFound = false;
        try
        {
            // Create a reader for the given PDF file
            PdfReader reader = new PdfReader(inFileName);
            //outFile = File.CreateText(outFileName);
            //outFile = new StreamWriter(outFileName, false, System.Text.Encoding.UTF8);

            //Console.Write("Processing: ");

            int totalLen = 68;
            float charUnit = ((float)totalLen) / (float)reader.NumberOfPages;
            int totalWritten = 0;
            float curUnit = 0;

            //ExtractPages(inFileName, @"C:\Users\Nikolaj Sostack\Downloads\PDF\pdf.pdf", 1, 1);

            int pageFound = -1;
            for (int page = 1; page <= reader.NumberOfPages; page++)
            {

                //System.IO.File.WriteAllBytes(@"C:\Users\Nikolaj Sostack\Downloads\PDF\pdf.pdf", reader.GetPageContent(page));

                string txt = ExtractTextFromPDFBytes(reader.GetPageContent(page));

                var lastLine = txt.Split('\r')[1];
                var number = lastLine;

                if( txt.Contains("\n\r" + memberNumber + "\n\r") )
                {
                    if (!number.StartsWith("-"))
                    {
                        if (!String.IsNullOrEmpty(outputFile))
                            pageFound = page;

                        memberFound = true;
                    }
                }

                //// Write the progress.
                //if (charUnit >= 1.0f)
                //{
                //    for (int i = 0; i < (int)charUnit; i++)
                //    {
                //        Console.Write("#");
                //        totalWritten++;
                //    }
                //}
                //else
                //{
                //    curUnit += charUnit;
                //    if (curUnit >= 1.0f)
                //    {
                //        for (int i = 0; i < (int)curUnit; i++)
                //        {
                //            Console.Write("#");
                //            totalWritten++;
                //        }
                //        curUnit = 0;
                //    }

                //}
            }

            if( memberFound && pageFound > -1 )
                ExtractPages(inFileName, outputFile, pageFound, pageFound);

            //if (totalWritten < totalLen)
            //{
            //    for (int i = 0; i < (totalLen - totalWritten); i++)
            //    {
            //        Console.Write("#");
            //    }
            //}
        }
        catch
        {
            throw;
        }
        finally
        {

            //if (outFile != null) outFile.Close();
        }

        return memberFound;
    }
開發者ID:NNSostack,項目名稱:KIFWeb,代碼行數:95,代碼來源:PDFParser.cs


注:本文中的iTextSharp.text.pdf.PdfReader.GetPageContent方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。