当前位置: 首页>>代码示例>>Java>>正文


Java PDFParser类代码示例

本文整理汇总了Java中org.apache.pdfbox.pdfparser.PDFParser的典型用法代码示例。如果您正苦于以下问题:Java PDFParser类的具体用法?Java PDFParser怎么用?Java PDFParser使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


PDFParser类属于org.apache.pdfbox.pdfparser包,在下文中一共展示了PDFParser类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: readPDFDocument

import org.apache.pdfbox.pdfparser.PDFParser; //导入依赖的package包/类
private void readPDFDocument() {
	try {
		FileInputStream fs = new FileInputStream(f);
		String text = "";
		PDFParser parser = new PDFParser(fs);
		parser.parse();
		COSDocument cosDoc = parser.getDocument();
		PDFTextStripper pdfStripper = new PDFTextStripper();
		PDDocument pdDoc = new PDDocument(cosDoc);
		text = pdfStripper.getText(pdDoc);
		String[] docxLines = text.split(System.lineSeparator());
		for (String line : docxLines) {
			lines.add(line);
		}
		fs.close();
	} catch (Exception e) {
		JOptionPane.showMessageDialog(null, "Fehler in readPDFDocument",
				"Fehler", JOptionPane.ERROR_MESSAGE);
		e.printStackTrace();
	}
}
 
开发者ID:Steffen93,项目名称:filterit,代码行数:22,代码来源:FileObject.java

示例2: pdftoText

import org.apache.pdfbox.pdfparser.PDFParser; //导入依赖的package包/类
static String pdftoText(String fileName,int pageno) throws IOException, CryptographyException 
{
		
		File file = new File(fileName);
		
		
			PDFParser parser = new PDFParser(new FileInputStream(file));
			parser.parse();
			COSDocument cosDoc = parser.getDocument();
			PDFTextStripper pdfStripper = new PDFTextStripper();
			PDDocument pdDoc = new PDDocument(cosDoc);
			/*pdDoc.decrypt("");
			pdDoc.setAllSecurityToBeRemoved(true);*/
			pdfStripper.setStartPage(pageno);
			pdfStripper.setEndPage(pageno);
			String parsedText = pdfStripper.getText(pdDoc);
	
		
				if (cosDoc != null)
					cosDoc.close();
				if (pdDoc != null)
					pdDoc.close();
		
		return parsedText;
	}
 
开发者ID:arks-api,项目名称:arks-api,代码行数:26,代码来源:PDFTextParser.java

示例3: rotate180

import org.apache.pdfbox.pdfparser.PDFParser; //导入依赖的package包/类
public ActionForward rotate180(ActionMapping mapping, ActionForm form, HttpServletRequest request, HttpServletResponse response) throws Exception {
	Document doc = documentDao.getDocument(request.getParameter("document"));

	String docdownload = oscar.OscarProperties.getInstance().getProperty("DOCUMENT_DIR");

	FileInputStream input = new FileInputStream(docdownload + doc.getDocfilename());
	PDFParser parser = new PDFParser(input);
	parser.parse();
	PDDocument pdf = parser.getPDDocument();
	int x = 1;
	for (Object p : pdf.getDocumentCatalog().getAllPages()) {
		PDPage pg = (PDPage)p;
		Integer r = (pg.getRotation() != null ? pg.getRotation() : 0);
		pg.setRotation((r+180)%360);

		ManageDocumentAction.deleteCacheVersion(doc, x);
		x++;
	}

	pdf.save(docdownload + doc.getDocfilename());
	pdf.close();

	input.close();

	return null;
}
 
开发者ID:williamgrosset,项目名称:OSCAR-ConCert,代码行数:27,代码来源:SplitDocumentAction.java

示例4: rotate90

import org.apache.pdfbox.pdfparser.PDFParser; //导入依赖的package包/类
public ActionForward rotate90(ActionMapping mapping, ActionForm form, HttpServletRequest request, HttpServletResponse response) throws Exception {
	Document doc = documentDao.getDocument(request.getParameter("document"));

	String docdownload = oscar.OscarProperties.getInstance().getProperty("DOCUMENT_DIR");

	FileInputStream input = new FileInputStream(docdownload + doc.getDocfilename());
	PDFParser parser = new PDFParser(input);
	parser.parse();
	PDDocument pdf = parser.getPDDocument();
	int x = 1;
	for (Object p : pdf.getDocumentCatalog().getAllPages()) {
		PDPage pg = (PDPage)p;
		Integer r = (pg.getRotation() != null ? pg.getRotation() : 0);
		pg.setRotation((r+90)%360);

		ManageDocumentAction.deleteCacheVersion(doc, x);
		x++;
	}

	pdf.save(docdownload + doc.getDocfilename());
	pdf.close();

	input.close();

	return null;
}
 
开发者ID:williamgrosset,项目名称:OSCAR-ConCert,代码行数:27,代码来源:SplitDocumentAction.java

示例5: testPdfFromStringTo

import org.apache.pdfbox.pdfparser.PDFParser; //导入依赖的package包/类
@Test
public void testPdfFromStringTo() throws Exception {

    // GIVEN a html template containing special characters that java stores in utf-16 internally
    Pdf pdf = new Pdf();
    pdf.addPage("<html><head><meta charset=\"utf-8\"></head><h1>Müller</h1></html>", PageType.htmlAsString);

    // WHEN
    byte[] pdfBytes = pdf.getPDF();

    PDFParser parser = new PDFParser(new ByteArrayInputStream(pdfBytes));

    // that is a valid PDF (otherwise an IOException occurs)
    parser.parse();
    PDFTextStripper pdfTextStripper = new PDFTextStripper();
    String pdfText = pdfTextStripper.getText(new PDDocument(parser.getDocument()));

    Assert.assertThat("document should contain the creditorName", pdfText, containsString("Müller"));
}
 
开发者ID:eamonfoy,项目名称:trello-to-markdown,代码行数:20,代码来源:PdfTest.java

示例6: testMultiplePages

import org.apache.pdfbox.pdfparser.PDFParser; //导入依赖的package包/类
@Test
public void testMultiplePages() throws Exception {
    Pdf pdf = new Pdf();
    pdf.addPage("<html><head><meta charset=\"utf-8\"></head><h1>Page 1</h1></html>", PageType.htmlAsString);
    pdf.addPage("<html><head><meta charset=\"utf-8\"></head><h1>Page 2</h1></html>", PageType.htmlAsString);
    pdf.addPage("http://www.google.com", PageType.url);
    pdf.addPage("<html><head><meta charset=\"utf-8\"></head><h1>Page 4</h1></html>", PageType.htmlAsString);

    // WHEN
    byte[] pdfBytes = pdf.getPDF();

    PDFParser parser = new PDFParser(new ByteArrayInputStream(pdfBytes));

    // that is a valid PDF (otherwise an IOException occurs)
    parser.parse();
    PDFTextStripper pdfTextStripper = new PDFTextStripper();
    String pdfText = pdfTextStripper.getText(new PDDocument(parser.getDocument()));

    Assert.assertThat("document should contain the fourth page name", pdfText, containsString("Page 4"));
}
 
开发者ID:eamonfoy,项目名称:trello-to-markdown,代码行数:21,代码来源:PdfTest.java

示例7: pdftoText

import org.apache.pdfbox.pdfparser.PDFParser; //导入依赖的package包/类
public  String pdftoText(InputStream is, boolean stats) throws IOException {
    PDDocument pdDoc = null;
    COSDocument cosDoc = null;
    try {
        PDFParser parser = new PDFParser(is);
        parser.parse();
        cosDoc = parser.getDocument();
        PDFTextStripper pdfStripper = new PDFTextStripper();
        pdDoc = new PDDocument(cosDoc);
        String text = pdfStripper.getText(pdDoc);
        if (stats) {
            vc.addAll(text);
        }
        return text;
    } finally {
        if (cosDoc != null) {
            cosDoc.close();
        }
        if (pdDoc != null) {
            pdDoc.close();
        }
    }
}
 
开发者ID:judovana,项目名称:JavadocOfflineSearch,代码行数:24,代码来源:PdfAttempter.java

示例8: testPdfFromStringTo

import org.apache.pdfbox.pdfparser.PDFParser; //导入依赖的package包/类
@Test
public void testPdfFromStringTo() throws Exception {

    // GIVEN an html template containing special characters that java stores in utf-16 internally
    Pdf pdf = pdfBuilder.build();
    pdf.addPage("<html><head><meta charset=\"utf-8\"></head><h1>Müller</h1></html>", PageType.htmlAsString);

    String tempFolder = temporaryFolder.newFolder().getPath();
    pdf.saveAs(tempFolder+"/output.pdf");

    // WHEN
    byte[] pdfBytes = pdf.getPDF();

    PDFParser parser = new PDFParser(new RandomAccessBufferedFileInputStream(new ByteArrayInputStream(pdfBytes)));

    // that is a valid PDF (otherwise an IOException occurs)
    parser.parse();
    PDFTextStripper pdfTextStripper = new PDFTextStripper();
    String pdfText = pdfTextStripper.getText(new PDDocument(parser.getDocument()));

    assertThat("document should contain the creditorName", pdfText, containsString("Müller"));
}
 
开发者ID:sastix,项目名称:cms,代码行数:23,代码来源:PdfTest.java

示例9: testPdfWithXvfb

import org.apache.pdfbox.pdfparser.PDFParser; //导入依赖的package包/类
@Test
public void testPdfWithXvfb() throws Exception {

    XvfbConfig xc = new XvfbConfig();
    xc.addParams(new Param("--auto-servernum"), new Param("--server-num=1"));

    WrapperConfig wc = new WrapperConfig();
    wc.setXvfbConfig(xc);

    Pdf pdf = new Pdf(wc);
    pdf.addPage("http://www.google.com", PageType.url);

    pdf.saveAs("output.pdf");

    // WHEN
    byte[] pdfBytes = pdf.getPDF();

    PDFParser parser = new PDFParser(new ByteArrayInputStream(pdfBytes));

    // that is a valid PDF (otherwise an IOException occurs)
    parser.parse();
    PDFTextStripper pdfTextStripper = new PDFTextStripper();
    String pdfText = pdfTextStripper.getText(new PDDocument(parser.getDocument()));

    Assert.assertThat("document should be generated", pdfText, containsString("Google"));
}
 
开发者ID:eamonfoy,项目名称:trello-to-markdown,代码行数:27,代码来源:XvfbTest.java

示例10: readThesaurus

import org.apache.pdfbox.pdfparser.PDFParser; //导入依赖的package包/类
private List<SubstanceInteraction> readThesaurus(File dir) throws IOException {
    File file = new File(dir, "thesaurus.pdf");

    PDFParser parser = new PDFParser(new RandomAccessBufferedFileInputStream(file));
    parser.parse();
    COSDocument cosDoc = parser.getDocument();
    ThesaurusPDFStripper pdfStripper = new ThesaurusPDFStripper();
    PDDocument pdDoc = new PDDocument(cosDoc);
    pdfStripper.setStartPage(2);
    pdfStripper.setEndPage(pdDoc.getNumberOfPages());

    pdfStripper.getText(pdDoc);

    cosDoc.close();

    return pdfStripper.substances;
}
 
开发者ID:Ellixo,项目名称:MedicamentDB,代码行数:18,代码来源:InteractionService.java

示例11: parse

import org.apache.pdfbox.pdfparser.PDFParser; //导入依赖的package包/类
@Test
public void parse() throws IOException {
    File file = new File(MedicamentTest.class.getClassLoader().getResource(".").getFile(), "thesaurus.pdf");

    PDFParser parser = new PDFParser(new RandomAccessBufferedFileInputStream(file));
    parser.parse();
    COSDocument cosDoc = parser.getDocument();
    AdvancedPDFStripper pdfStripper = new AdvancedPDFStripper();
    PDDocument pdDoc = new PDDocument(cosDoc);
    pdfStripper.setStartPage(2);
    pdfStripper.setEndPage(pdDoc.getNumberOfPages());

    pdfStripper.getText(pdDoc);

    System.out.println(pdfStripper.substances);
}
 
开发者ID:Ellixo,项目名称:MedicamentDB,代码行数:17,代码来源:PDFTest.java

示例12: testPdfFromStringTo

import org.apache.pdfbox.pdfparser.PDFParser; //导入依赖的package包/类
@Test
public void testPdfFromStringTo() throws Exception {

    // GIVEN a html template containing special characters that java stores in utf-16 internally
    Pdf pdf = new Pdf();
    pdf.addPageFromString("<html><head><meta charset=\"utf-8\"></head><h1>Müller</h1></html>");

    // WHEN
    byte[] pdfBytes = pdf.getPDF();

    PDFParser parser = new PDFParser(new ByteArrayInputStream(pdfBytes));

    // that is a valid PDF (otherwise an IOException occurs)
    parser.parse();
    PDFTextStripper pdfTextStripper = new PDFTextStripper();
    String pdfText = pdfTextStripper.getText(new PDDocument(parser.getDocument()));

    Assert.assertThat("document should contain the creditorName", pdfText, containsString("Müller"));
}
 
开发者ID:jhonnymertz,项目名称:java-wkhtmltopdf-wrapper,代码行数:20,代码来源:PdfTest.java

示例13: testMultiplePages

import org.apache.pdfbox.pdfparser.PDFParser; //导入依赖的package包/类
@Test
public void testMultiplePages() throws Exception {
    Pdf pdf = new Pdf();
    pdf.addPageFromString("<html><head><meta charset=\"utf-8\"></head><h1>Page 1</h1></html>");
    pdf.addPageFromString("<html><head><meta charset=\"utf-8\"></head><h1>Page 2</h1></html>");
    pdf.addPageFromUrl("http://www.google.com");
    pdf.addPageFromString("<html><head><meta charset=\"utf-8\"></head><h1>Page 4</h1></html>");

    // WHEN
    byte[] pdfBytes = pdf.getPDF();

    PDFParser parser = new PDFParser(new ByteArrayInputStream(pdfBytes));

    // that is a valid PDF (otherwise an IOException occurs)
    parser.parse();
    PDFTextStripper pdfTextStripper = new PDFTextStripper();
    String pdfText = pdfTextStripper.getText(new PDDocument(parser.getDocument()));

    Assert.assertThat("document should contain the fourth page name", pdfText, containsString("Page 4"));
}
 
开发者ID:jhonnymertz,项目名称:java-wkhtmltopdf-wrapper,代码行数:21,代码来源:PdfTest.java

示例14: getFile

import org.apache.pdfbox.pdfparser.PDFParser; //导入依赖的package包/类
private boolean getFile(String url)
{
    try
    {
        URL u = new URL(url);
        URLConnection con = u.openConnection();
        InputStream in = con.getInputStream();
        PDFParser p = new PDFParser(in);
        p.parse();
        PDDocument pdoc = new PDDocument(p.getDocument());
        PDFTextStripper pts = new PDFTextStripper();
        text = pts.getText(pdoc);
        pdoc.close();

        return true;
    }
    catch (Exception e)
    {
        e.printStackTrace();
        return false;
    }
}
 
开发者ID:spupyrev,项目名称:swcv,代码行数:23,代码来源:PDFReader.java

示例15: shrinkMe

import org.apache.pdfbox.pdfparser.PDFParser; //导入依赖的package包/类
/**
 * Shrink a PDF
 * @param f {@code File} pointing to the PDF to shrink
 * @param compQual Compression quality parameter. 0 is
 *                 smallest file, 1 is highest quality.
 * @return The compressed {@code PDDocument}
 * @throws FileNotFoundException
 * @throws IOException 
 */
private PDDocument shrinkMe() 
        throws FileNotFoundException, IOException {
     if(compQual < 0)
         compQual = compQualDefault;
     final FileInputStream fis = new FileInputStream(input);
     final PDFParser parser = new PDFParser(fis);
     parser.parse();
     final PDDocument doc = parser.getPDDocument();
     List pages = doc.getDocumentCatalog().getAllPages();
     for(Object p : pages) {
          if(!(p instanceof PDPage))
               continue;
          PDPage page = (PDPage) p;
          scanResources(page.getResources(), doc);
     }
     return doc;
}
 
开发者ID:bnanes,项目名称:shrink-pdf,代码行数:27,代码来源:ShrinkPDF.java


注:本文中的org.apache.pdfbox.pdfparser.PDFParser类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。