本文整理匯總了C#中iTextSharp.text.pdf.PdfReader.GetPageContent方法的典型用法代碼示例。如果您正苦於以下問題:C# PdfReader.GetPageContent方法的具體用法?C# PdfReader.GetPageContent怎麽用?C# PdfReader.GetPageContent使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類iTextSharp.text.pdf.PdfReader
的用法示例。
在下文中一共展示了PdfReader.GetPageContent方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的C#代碼示例。
示例1: RemoveLayers
/// <summary>
/// Removes layers from a PDF document </summary>
/// <param name="reader"> a PdfReader containing a PDF document </param>
/// <param name="layers"> a sequence of names of OCG layers </param>
/// <exception cref="IOException"> </exception>
public virtual void RemoveLayers(PdfReader reader, params string[] layers)
{
int n = reader.NumberOfPages;
for (int i = 1; i <= n; i++)
reader.SetPageContent(i, reader.GetPageContent(i));
ICollection<string> ocgs = new HashSet2<string>();
for (int i = 0; i < layers.Length; i++)
{
ocgs.Add(layers[i]);
}
OCGParser parser = new OCGParser(ocgs);
for (int i = 1; i <= n; i++)
{
PdfDictionary page = reader.GetPageN(i);
Parse(parser, page);
page.Remove(new PdfName("PieceInfo"));
RemoveAnnots(page, ocgs);
RemoveProperties(page, ocgs);
}
PdfDictionary root = reader.Catalog;
PdfDictionary ocproperties = root.GetAsDict(PdfName.OCPROPERTIES);
if (ocproperties != null) {
RemoveOCGsFromArray(ocproperties, PdfName.OCGS, ocgs);
PdfDictionary d = ocproperties.GetAsDict(PdfName.D);
if (d != null) {
RemoveOCGsFromArray(d, PdfName.ON, ocgs);
RemoveOCGsFromArray(d, PdfName.OFF, ocgs);
RemoveOCGsFromArray(d, PdfName.LOCKED, ocgs);
RemoveOCGsFromArray(d, PdfName.RBGROUPS, ocgs);
RemoveOCGsFromArray(d, PdfName.ORDER, ocgs);
RemoveOCGsFromArray(d, PdfName.AS, ocgs);
}
}
reader.RemoveUnusedObjects();
}
示例2: IsTextInPdf
/// <summary>
///
/// </summary>
/// <param name="inFileName"></param>
/// <param name="textToFind"></param>
/// <returns></returns>
public bool IsTextInPdf(string inFileName, string textToFind)
{
try
{
// Create a reader for the given PDF file
using (PdfReader reader = new PdfReader(inFileName)) {
//Console.Write("Processing: ");
for (int page = 1; page <= reader.NumberOfPages; page++)
{
string temp = ExtractTextFromPDFBytes(reader.GetPageContent(page));
if (temp.IndexOf(textToFind) != -1)
{
return true;
}
}
return false;
}
}
catch
{
return false;
}
}
示例3: Post
/// <summary>
/// Compress a pdf
/// </summary>
/// <param name="base64Pdf">A small model to hold a base64 encoded pdf object { "content" : "somebase64" }</param>
/// <returns>{ "content" : "smallerBase64" }</returns>
public IHttpActionResult Post(Base64Pdf base64Pdf)
{
try
{
if (base64Pdf.data == null)
return BadRequest("Check supplied pdf model");
byte[] data = Convert.FromBase64String(base64Pdf.data);
//Compress
byte[] compressedData;
using (var memStream = new MemoryStream())
{
var reader = new PdfReader(data);
var stamper = new PdfStamper(reader, memStream, PdfWriter.VERSION_1_4);
var pageNum = reader.NumberOfPages;
for (var i = 1; i <= pageNum; i++)
reader.SetPageContent(i, reader.GetPageContent(i));
stamper.SetFullCompression();
stamper.Close();
reader.Close();
compressedData = memStream.ToArray();
}
var compressedBase64 = Convert.ToBase64String(compressedData);
return Json(new Base64Pdf { data = compressedBase64 });
}
catch (Exception ex)
{
return InternalServerError(ex);
}
}
示例4: TestMultipleDocuments
public void TestMultipleDocuments()
{
byte[] testFile1 = File.ReadAllBytes(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "test_files\\documents\\document.docx"));
byte[] testFile2 = File.ReadAllBytes(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "test_files\\documents\\document.docx"));
Dictionary<string, byte[]> files = new Dictionary<string, byte[]>();
files.Add("document1.docx", testFile1);
files.Add("document2.docx", testFile2);
PdfConverter converter = new PdfConverter();
byte[] pdf = converter.ConvertFiles(files, new ConversionOptions());
Assert.IsNotNull(pdf);
Document doc = new Document();
PdfReader reader = new PdfReader(pdf);
int pages = reader.NumberOfPages;
byte[] page1 = reader.GetPageContent(1);
byte[] page2 = reader.GetPageContent(2);
doc.Close();
Assert.IsTrue(pages == 2);
Assert.AreEqual(page1.Length, page1.Length);
}
示例5: SetPageContentTest01
public void SetPageContentTest01() {
String outPdf = DestFolder + "out1.pdf";
PdfReader reader =
new PdfReader(TestResourceUtils.GetResourceAsStream(TestResourcesPath, "in.pdf"));
PdfStamper stamper = new PdfStamper(reader, new FileStream(outPdf, FileMode.Create));
reader.EliminateSharedStreams();
int total = reader.NumberOfPages + 1;
for (int i = 1; i < total; i++) {
byte[] bb = reader.GetPageContent(i);
reader.SetPageContent(i, bb);
}
stamper.Close();
Assert.Null(new CompareTool().CompareByContent(outPdf, TestResourceUtils.GetResourceAsTempFile(TestResourcesPath, "cmp_out1.pdf"), DestFolder, "diff_"));
}
示例6: Read
public static List<String> Read()
{
var pdfReader = new PdfReader(_filePath);
var pages = new List<String>();
for (int i = 0; i < pdfReader.NumberOfPages; i++)
{
string textFromPage = Encoding.UTF8.GetString(Encoding.Convert(Encoding.Default, Encoding.UTF8, pdfReader.GetPageContent(i + 1)));
pages.Add(GetDataConvertedData(textFromPage));
//pages.AddRange(textFromPage.Split(new[] { "\n" }, StringSplitOptions.None)
// .Where(text => text.Contains("Tj")).ToList());
//pages.Add(textFromPage);
}
return pages;
}
示例7: Extract_Text
/// <summary> Extracts the full text from a PDF file and writes to a file </summary>
/// <param name="PDF_In_Name">Full path to the pdf file</param>
/// <param name="Text_Out_Name">Output file name for the extracted text </param>
/// <returns>TRUE if successful, otherwise FALSE</returns>
public static bool Extract_Text(string PDF_In_Name, string Text_Out_Name)
{
StreamWriter outFile = null;
PdfReader reader = null;
try
{
// Create a reader for the given PDF file
reader = new PdfReader(PDF_In_Name);
//outFile = File.CreateText(outFileName);
outFile = new StreamWriter(Text_Out_Name, false, Encoding.UTF8);
for (int page = 1; page <= reader.NumberOfPages; page++)
{
try
{
string text_to_add = ExtractTextFromPDFBytes(reader.GetPageContent(page));
if (text_to_add.Trim().Length > 0)
{
outFile.WriteLine();
outFile.WriteLine("PAGE " + page);
outFile.WriteLine();
outFile.WriteLine(text_to_add);
}
}
catch
{
}
}
return true;
}
catch
{
}
finally
{
if (outFile != null) outFile.Close();
if ( reader != null ) reader.Close();
}
return false;
}
示例8: InspectPdf
// ---------------------------------------------------------------------------
/**
* Parses object and content information of a PDF into a text file.
* @param pdf the original PDF
*
* this method uses code from;
* PdfContentReaderTool.ListContentStreamForPage()
* so i can pass in a byte array instead of file path
*
*/
public string InspectPdf(byte[] pdf)
{
PdfReader reader = new PdfReader(pdf);
int maxPageNum = reader.NumberOfPages;
StringBuilder sb = new StringBuilder();
for (int pageNum = 1; pageNum <= maxPageNum; pageNum++){
sb.AppendLine("==============Page " + pageNum + "====================");
sb.AppendLine("- - - - - Dictionary - - - - - -");
PdfDictionary pageDictionary = reader.GetPageN(pageNum);
sb.AppendLine(
PdfContentReaderTool.GetDictionaryDetail(pageDictionary)
);
sb.AppendLine("- - - - - XObject Summary - - - - - -");
sb.AppendLine(PdfContentReaderTool.GetXObjectDetail(
pageDictionary.GetAsDict(PdfName.RESOURCES))
);
sb.AppendLine("- - - - - Content Stream - - - - - -");
RandomAccessFileOrArray f = reader.SafeFile;
byte[] contentBytes = reader.GetPageContent(pageNum, f);
f.Close();
foreach (byte b in contentBytes) {
sb.Append((char)b);
}
sb.AppendLine("- - - - - Text Extraction - - - - - -");
String extractedText = PdfTextExtractor.GetTextFromPage(
reader, pageNum, new LocationTextExtractionStrategy()
);
if (extractedText.Length != 0) {
sb.AppendLine(extractedText);
}
else {
sb.AppendLine("No text found on page " + pageNum);
}
sb.AppendLine();
}
return sb.ToString();
}
示例9: ExtractText
/// <summary>
/// Extracts a text from a PDF file.
/// </summary>
/// <param name="inFileName">the full path to the pdf file.</param>
/// <param name="outFileName">the output file name.</param>
/// <returns>the extracted text</returns>
public String ExtractText(string inFileName,int topage)
{
StreamWriter outFile = null;
try
{
// Create a reader for the given PDF file
PdfReader reader = new PdfReader(inFileName);
//outFile = File.CreateText(outFileName);
// outFile = new StreamWriter(outFileName, false, System.Text.Encoding.UTF8);
String outputText ="";
Console.Write("Processing: ");
int totalLen = 68;
float charUnit = ((float)totalLen) / (float)reader.NumberOfPages;
int totalWritten= 0;
float curUnit = 0;
// for (int page = 1; page <= reader.NumberOfPages; page++)
for (int page = 1; page <= topage; page++)
{
outputText += ExtractTextFromPDFBytes(reader.GetPageContent(page)) + " ";
}
return (outputText =="") ? null : outputText;
}
catch
{
File.AppendAllText("log_extract.txt", DateTime.Now.ToShortDateString() + " " + DateTime.Now.ToShortTimeString() + ": " + inFileName + Environment.NewLine);
return null;
}
finally
{
if (outFile != null) outFile.Close();
}
}
示例10: ReadContent
// ---------------------------------------------------------------------------
/**
* Reads the content stream of the first page of a PDF into a text file.
* @param src the PDF file
*/
public string ReadContent(byte[] src) {
PdfReader reader = new PdfReader(src);
byte[] pc = reader.GetPageContent(1);
return Encoding.UTF8.GetString(pc, 0, pc.Length);
}
示例11: ExtractText
public string ExtractText(string inFileName, out int tot)
{
string outs = "";
try
{
PdfReader reader = new PdfReader(inFileName);
Debug.WriteLine("Processing: ");
int totalLen = 68;
float charUnit = ((float)totalLen) / (float)reader.NumberOfPages;
int totalWritten = 0;
float curUnit = 0;
for (int page = 1; page <= reader.NumberOfPages; page++)
{
string k = Encoding.GetEncoding("koi8-r") .GetString(reader.GetPageContent(page));//ExtractTextFromPDFBytes(reader.GetPageContent(page));
string wk = k;//Encoding.GetEncoding("utf-8").GetString(reader.GetPageContent(page));//Encoding.GetEncoding("koi8r").GetString(reader.GetPageContent(page));
// string k = ExtractTextFromPDFBytes(Encoding.GetEncoding("koi8r").GetBytes(wk));*/
outs += wk + " ";
// Write the progress.
if (charUnit >= 1.0f)
{
for (int i = 0; i < (int)charUnit; i++)
{
Debug.WriteLine("#");
totalWritten++;
}
}
else
{
curUnit += charUnit;
if (curUnit >= 1.0f)
{
for (int i = 0; i < (int)curUnit; i++)
{
Debug.WriteLine("#");
totalWritten++;
}
curUnit = 0;
}
}
}
if (totalWritten < totalLen)
{
for (int i = 0; i < (totalLen - totalWritten); i++)
{
Debug.WriteLine("#");
}
}
tot = totalWritten;
return outs;
}
catch (Exception ex)
{
Debug.WriteLine("2"+ex.Message);
tot = -1;
return "-1";
}
finally
{
// if (outFile != null) outFile.Close();
}
}
示例12: CompareInnerText
virtual public bool CompareInnerText(String path1, String path2) {
PdfReader reader1 = new PdfReader(path1);
byte[] streamBytes1 = reader1.GetPageContent(1);
PRTokeniser tokenizer1 =
new PRTokeniser(new RandomAccessFileOrArray(new RandomAccessSourceFactory().CreateSource(streamBytes1)));
PdfReader reader2 = new PdfReader(path2);
byte[] streamBytes2 = reader2.GetPageContent(1);
PRTokeniser tokenizer2 =
new PRTokeniser(new RandomAccessFileOrArray(new RandomAccessSourceFactory().CreateSource(streamBytes2)));
try {
while (tokenizer1.NextToken()) {
if (!tokenizer2.NextToken())
return false;
else {
if (tokenizer1.TokenType != tokenizer2.TokenType)
return false;
else {
if (tokenizer1.TokenType == tokenizer2.TokenType && tokenizer2.TokenType == PRTokeniser.TokType.NUMBER) {
if (Math.Abs(float.Parse(tokenizer1.StringValue, CultureInfo.InvariantCulture)
- float.Parse(tokenizer2.StringValue, CultureInfo.InvariantCulture)) > 0.001)
return false;
} else if (!tokenizer1.StringValue.Equals(tokenizer2.StringValue))
return false;
}
}
}
return true;
}
finally {
reader1.Close();
reader2.Close();
}
}
示例13: OpenPdf
private void OpenPdf()
{
_pdfPages.Clear();
try
{
var openFileDialog = new OpenFileDialog
{
DefaultExt = ".pdf",
Filter = "Pdf documents (.pdf)|*.pdf"
};
bool? result = openFileDialog.ShowDialog();
if (result == true)
{
string filename = openFileDialog.FileName;
var pdfReader = new PdfReader(filename);
for (int i = 1; i <= pdfReader.NumberOfPages; i++)
{
byte[] pagesBytes = pdfReader.GetPageContent(i);
var token = new PRTokeniser(pagesBytes);
var pageContent = new StringBuilder();
while (token.NextToken())
{
if (token.TokenType == PRTokeniser.TokType.STRING)
{
pageContent.Append(token.StringValue);
}
}
_pdfPages.Add(pageContent.ToString());
}
}
RaisePropertyChanged("MaxIndex");
}
catch (Exception)
{
MessageBox.Show("Fail to load file");
}
CurrentIndex = 1;
}
示例14: ExtractText
internal string ExtractText(string inFileName)
{
PdfReader reader = new PdfReader(inFileName);
string Results = string.Empty;
try
{
int totalLen = 68;
float charUnit = ((float)totalLen) / (float)reader.NumberOfPages;
for (int page = 1; page <= reader.NumberOfPages; page++)
{
Results = Results + ExtractTextFromPDFBytes(reader.GetPageContent(page)) + " ";
}
}
catch (Exception m)
{
MyException mobj = new MyException("ExtractText() : " + m.Message);
}
return Results;
}
示例15: GetInvoice
/// <summary>
/// Extracts a text from a PDF file.
/// </summary>
/// <param name="inFileName">the full path to the pdf file.</param>
/// <param name="outFileName">the output file name.</param>
/// <returns>the extracted text</returns>
private static Boolean GetInvoice(string inFileName, String outputFile, String memberNumber, Boolean notUsed)
{
Boolean memberFound = false;
try
{
// Create a reader for the given PDF file
PdfReader reader = new PdfReader(inFileName);
//outFile = File.CreateText(outFileName);
//outFile = new StreamWriter(outFileName, false, System.Text.Encoding.UTF8);
//Console.Write("Processing: ");
int totalLen = 68;
float charUnit = ((float)totalLen) / (float)reader.NumberOfPages;
int totalWritten = 0;
float curUnit = 0;
//ExtractPages(inFileName, @"C:\Users\Nikolaj Sostack\Downloads\PDF\pdf.pdf", 1, 1);
int pageFound = -1;
for (int page = 1; page <= reader.NumberOfPages; page++)
{
//System.IO.File.WriteAllBytes(@"C:\Users\Nikolaj Sostack\Downloads\PDF\pdf.pdf", reader.GetPageContent(page));
string txt = ExtractTextFromPDFBytes(reader.GetPageContent(page));
var lastLine = txt.Split('\r')[1];
var number = lastLine;
if( txt.Contains("\n\r" + memberNumber + "\n\r") )
{
if (!number.StartsWith("-"))
{
if (!String.IsNullOrEmpty(outputFile))
pageFound = page;
memberFound = true;
}
}
//// Write the progress.
//if (charUnit >= 1.0f)
//{
// for (int i = 0; i < (int)charUnit; i++)
// {
// Console.Write("#");
// totalWritten++;
// }
//}
//else
//{
// curUnit += charUnit;
// if (curUnit >= 1.0f)
// {
// for (int i = 0; i < (int)curUnit; i++)
// {
// Console.Write("#");
// totalWritten++;
// }
// curUnit = 0;
// }
//}
}
if( memberFound && pageFound > -1 )
ExtractPages(inFileName, outputFile, pageFound, pageFound);
//if (totalWritten < totalLen)
//{
// for (int i = 0; i < (totalLen - totalWritten); i++)
// {
// Console.Write("#");
// }
//}
}
catch
{
throw;
}
finally
{
//if (outFile != null) outFile.Close();
}
return memberFound;
}