当前位置: 首页>>代码示例>>Java>>正文


Java PdfTextExtractor类代码示例

本文整理汇总了Java中com.itextpdf.text.pdf.parser.PdfTextExtractor的典型用法代码示例。如果您正苦于以下问题:Java PdfTextExtractor类的具体用法?Java PdfTextExtractor怎么用?Java PdfTextExtractor使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


PdfTextExtractor类属于com.itextpdf.text.pdf.parser包,在下文中一共展示了PdfTextExtractor类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testWritePlainLetter

import com.itextpdf.text.pdf.parser.PdfTextExtractor; //导入依赖的package包/类
@Test
public void testWritePlainLetter() {

	try {
		PdfReader reader = new PdfReader("letters/" + voter.getNif() + ".pdf");
		String page = PdfTextExtractor.getTextFromPage(reader, 1);
		String[] lines = page.split("\n");
		String email = lines[1].split(":\t")[1];
		String password = lines[2].split(":\t")[1];
		assertEquals(voter.getEmail(), email);
		assertEquals(voter.getPassword(), password);

	} catch (IOException e) {
		System.err.println("Archivo no encontrado");
	}

}
 
开发者ID:Arquisoft,项目名称:Voting_2b,代码行数:18,代码来源:PdfLetterTest.java

示例2: getNonWhiteSpacesFromPDF

import com.itextpdf.text.pdf.parser.PdfTextExtractor; //导入依赖的package包/类
private String getNonWhiteSpacesFromPDF(byte[] pdfByteArray)
		throws IOException {
	String nonWhiteSpace;

	PdfReader pdfReader = null;
	try {
		pdfReader = new PdfReader(pdfByteArray);
		nonWhiteSpace = StringUtils.deleteWhitespace(PdfTextExtractor
				.getTextFromPage(pdfReader, 1));
		// remove non-break space
		nonWhiteSpace = nonWhiteSpace.replace("\u00A0", "");
           nonWhiteSpace = nonWhiteSpace.replace("\u00AD", "-");
	} catch (IOException e) {
		throw e;
	} finally {
		if (pdfReader != null) {
			pdfReader.close();
		}
	}

	return nonWhiteSpace;
}
 
开发者ID:Altrusoft,项目名称:docserv,代码行数:23,代码来源:IntegrationTest.java

示例3: parsePdf

import com.itextpdf.text.pdf.parser.PdfTextExtractor; //导入依赖的package包/类
public void parsePdf(String filename) throws IOException {
    PdfReader reader = new PdfReader(filename);
    LOGGER.trace("Reading file " + filename);
    pdfData = new HashMap<Integer, String>();
    int numberOfPages = reader.getNumberOfPages();
    for (int page = 1; page <= numberOfPages; page++) {
        LOGGER.trace("Reading page " + page);
        String textFromPage = PdfTextExtractor.getTextFromPage(reader, page);
        pdfData.put(page, textFromPage);
    }
}
 
开发者ID:tapack,项目名称:satisfy,代码行数:12,代码来源:PDFWords.java

示例4: getPDFExtractedText

import com.itextpdf.text.pdf.parser.PdfTextExtractor; //导入依赖的package包/类
public void getPDFExtractedText() {
    String parsedText="";
    try {
        PdfReader reader = new PdfReader(FullscreenActivity.file.toString());
        int n = reader.getNumberOfPages();
        for (int i = 1; i<=n ; i++) {
            String text = detectAndImproveLine(PdfTextExtractor.getTextFromPage(reader, i));
            parsedText = parsedText + text  +"\n"; //Extracting the content from the different pages
        }
        reader.close();
    } catch (Exception e) {
        Log.d("d","Error extracting text");
    }
    foundText = PopUpEditSongFragment.parseToHTMLEntities(parsedText);
}
 
开发者ID:thebigg73,项目名称:OpenSongTablet,代码行数:16,代码来源:PopUpPDFToTextFragment.java

示例5: extractSimple

import com.itextpdf.text.pdf.parser.PdfTextExtractor; //导入依赖的package包/类
String extractSimple(PdfReader reader, int pageNo) throws IOException
{
    return PdfTextExtractor.getTextFromPage(reader, pageNo, new SimpleTextExtractionStrategy()
    {
        boolean empty = true;

        @Override
        public void beginTextBlock()
        {
            if (!empty)
                appendTextChunk("<BLOCK>");
            super.beginTextBlock();
        }

        @Override
        public void endTextBlock()
        {
            if (!empty)
                appendTextChunk("</BLOCK>\n");
            super.endTextBlock();
        }

        @Override
        public String getResultantText()
        {
            if (empty)
                return super.getResultantText();
            else
                return "<BLOCK>" + super.getResultantText();
        }

        @Override
        public void renderText(TextRenderInfo renderInfo)
        {
            empty = false;
            super.renderText(renderInfo);
        }
        
    });
}
 
开发者ID:mkl-public,项目名称:testarea-itext5,代码行数:41,代码来源:TextExtraction.java

示例6: checkIfPdfFileExist

import com.itextpdf.text.pdf.parser.PdfTextExtractor; //导入依赖的package包/类
private boolean checkIfPdfFileExist(String pdfFileName, String rtfFileName) 
{
	int year  = new DateTime().getDate().getYear();
	int month = new DateTime().getDate().getMonth();
	int day   = new DateTime().getDate().getDay();
	
	File pdfFile = new File(getPDFStorePath() + year + "\\" + month + "\\" + day + "\\" + pdfFileName);
	boolean nonNullFileExistAnd = pdfFile.exists() && (pdfFile.length() > 0);
	
	if (nonNullFileExistAnd)
	{
		long startCheck= System.currentTimeMillis();
		FileInputStream fin = null;
	    FileChannel ch = null;
	    try 
	    {
	        fin = new FileInputStream(pdfFile);
	        ch = fin.getChannel();
	        
	        byte fileContent[] = new byte[(int)pdfFile.length()];
               fin.read(fileContent);

               fin.close();
	        ch.close();

	        long start = System.currentTimeMillis();
			PdfReader pdfReader = new PdfReader(fileContent);  
		    String textFromPdfFilePageOne = PdfTextExtractor.getTextFromPage(pdfReader, 1);
		    long end = System.currentTimeMillis();								
			System.out.println("Reading first pdf page time : " + (new Float(end-start))/1000 + " seconds");
			
			if (Boolean.FALSE.equals(ConfigFlag.GEN.RELEASE_MODE.getValue()))
			{
				System.out.println(pdfFile + " first page content:");
				System.out.println(textFromPdfFilePageOne);
			}
			
			long endCheck = System.currentTimeMillis();								
			System.out.println("Total time for checking if file is on the share : " + (new Float(endCheck - startCheck)) / 1000 + " seconds");	
			
			//If CORRESPONDENCE_ARCHIVE_RTF_FILES is set try to archive file
			if (ConfigFlag.UI.CORRESPONDENCE_ARCHIVE_RTF_FILES.getValue())
			{
				archiveRtfFile(pdfFileName, rtfFileName);
			}				
	    }
	    catch (Exception e) 
	    {
	        e.printStackTrace();
	        return false;
	    }	
	    
	}
	
	return nonNullFileExistAnd;
}
 
开发者ID:oopcell,项目名称:AvoinApotti,代码行数:57,代码来源:Logic.java

示例7: extract

import com.itextpdf.text.pdf.parser.PdfTextExtractor; //导入依赖的package包/类
String extract(PdfReader reader, int pageNo, TextExtractionStrategy strategy) throws IOException
{
    return PdfTextExtractor.getTextFromPage(reader, pageNo, strategy);
}
 
开发者ID:mkl-public,项目名称:testarea-itext5,代码行数:5,代码来源:TextExtraction.java

示例8: extract

import com.itextpdf.text.pdf.parser.PdfTextExtractor; //导入依赖的package包/类
/**
 * Test with {@link HorizontalTextExtractionStrategy}, works for iText before 5.5.9-SNAPSHOT
 * Commit 53526e4854fcb80c86cbc2e113f7a07401dc9a67 ("Refactor LocationTextExtractionStrategy...").
 */
@SuppressWarnings("deprecation")
String extract(PdfReader reader, int pageNo) throws IOException, NoSuchFieldException, SecurityException
{
    return PdfTextExtractor.getTextFromPage(reader, pageNo, new HorizontalTextExtractionStrategy());
}
 
开发者ID:mkl-public,项目名称:testarea-itext5,代码行数:10,代码来源:ExtractSuperAndSubInLine.java

示例9: extractV2

import com.itextpdf.text.pdf.parser.PdfTextExtractor; //导入依赖的package包/类
/**
 * Test with {@link HorizontalTextExtractionStrategy}, works for iText since 5.5.9-SNAPSHOT
 * Commit 1ab350beae148be2a4bef5e663b3d67a004ff9f8 ("Make TextChunkLocation a Comparable<> class...").
 */
String extractV2(PdfReader reader, int pageNo) throws IOException, NoSuchFieldException, SecurityException
{
    return PdfTextExtractor.getTextFromPage(reader, pageNo, new HorizontalTextExtractionStrategy2());
}
 
开发者ID:mkl-public,项目名称:testarea-itext5,代码行数:9,代码来源:ExtractSuperAndSubInLine.java

示例10: extractRemapped

import com.itextpdf.text.pdf.parser.PdfTextExtractor; //导入依赖的package包/类
String extractRemapped(PdfReader reader, int pageNo) throws IOException, NoSuchFieldException, SecurityException
{
    TextExtractionStrategy strategy = new RemappingExtractionFilter(new LocationTextExtractionStrategy());
    return PdfTextExtractor.getTextFromPage(reader, pageNo, strategy);
}
 
开发者ID:mkl-public,项目名称:testarea-itext5,代码行数:6,代码来源:RemappedExtraction.java

示例11: checkIfPdfFileExist

import com.itextpdf.text.pdf.parser.PdfTextExtractor; //导入依赖的package包/类
public static boolean checkIfPdfFileExist(String pdfFileName, String rtfFileName) 
{
	int year  = new DateTime().getDate().getYear();
	int month = new DateTime().getDate().getMonth();
	int day   = new DateTime().getDate().getDay();
	
	File pdfFile = new File(getPDFStorePath() + year + "\\" + month + "\\" + day + "\\" + pdfFileName);
	boolean nonNullFileExistAnd = pdfFile.exists() && (pdfFile.length() > 0);
	
	if (nonNullFileExistAnd)
	{
		long startCheck= System.currentTimeMillis();
		FileInputStream fin = null;
	    FileChannel ch = null;
	    try 
	    {
	        fin = new FileInputStream(pdfFile);
	        ch = fin.getChannel();
	        
	        byte fileContent[] = new byte[(int)pdfFile.length()];
               fin.read(fileContent);

               fin.close();
	        ch.close();

	        long start = System.currentTimeMillis();
			PdfReader pdfReader = new PdfReader(fileContent);  
		    String textFromPdfFilePageOne = PdfTextExtractor.getTextFromPage(pdfReader, 1);
		    long end = System.currentTimeMillis();								
			System.out.println("Reading first pdf page time : " + (new Float(end-start))/1000 + " seconds");
			
			if (Boolean.FALSE.equals(ConfigFlag.GEN.RELEASE_MODE.getValue()))
			{
				System.out.println(pdfFile + " first page content:");
				System.out.println(textFromPdfFilePageOne);
			}
			
			long endCheck = System.currentTimeMillis();								
			System.out.println("Total time for checking if file is on the share : " + (new Float(endCheck - startCheck)) / 1000 + " seconds");	
			
			//If CORRESPONDENCE_ARCHIVE_RTF_FILES is set try to archive file
			if (ConfigFlag.UI.CORRESPONDENCE_ARCHIVE_RTF_FILES.getValue())
			{
				archiveRtfFile(pdfFileName, rtfFileName);
			}				
	    }
	    catch (Exception e) 
	    {
	        e.printStackTrace();
	        return false;
	    }	  
	}
	
	return nonNullFileExistAnd;
}
 
开发者ID:IMS-MAXIMS,项目名称:openMAXIMS,代码行数:56,代码来源:DocumentHelper.java


注:本文中的com.itextpdf.text.pdf.parser.PdfTextExtractor类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。