本文整理汇总了Java中com.itextpdf.text.pdf.parser.PdfTextExtractor类的典型用法代码示例。如果您正苦于以下问题:Java PdfTextExtractor类的具体用法?Java PdfTextExtractor怎么用?Java PdfTextExtractor使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
PdfTextExtractor类属于com.itextpdf.text.pdf.parser包,在下文中一共展示了PdfTextExtractor类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testWritePlainLetter
import com.itextpdf.text.pdf.parser.PdfTextExtractor; //导入依赖的package包/类
@Test
public void testWritePlainLetter() {
try {
PdfReader reader = new PdfReader("letters/" + voter.getNif() + ".pdf");
String page = PdfTextExtractor.getTextFromPage(reader, 1);
String[] lines = page.split("\n");
String email = lines[1].split(":\t")[1];
String password = lines[2].split(":\t")[1];
assertEquals(voter.getEmail(), email);
assertEquals(voter.getPassword(), password);
} catch (IOException e) {
System.err.println("Archivo no encontrado");
}
}
示例2: getNonWhiteSpacesFromPDF
import com.itextpdf.text.pdf.parser.PdfTextExtractor; //导入依赖的package包/类
private String getNonWhiteSpacesFromPDF(byte[] pdfByteArray)
throws IOException {
String nonWhiteSpace;
PdfReader pdfReader = null;
try {
pdfReader = new PdfReader(pdfByteArray);
nonWhiteSpace = StringUtils.deleteWhitespace(PdfTextExtractor
.getTextFromPage(pdfReader, 1));
// remove non-break space
nonWhiteSpace = nonWhiteSpace.replace("\u00A0", "");
nonWhiteSpace = nonWhiteSpace.replace("\u00AD", "-");
} catch (IOException e) {
throw e;
} finally {
if (pdfReader != null) {
pdfReader.close();
}
}
return nonWhiteSpace;
}
示例3: parsePdf
import com.itextpdf.text.pdf.parser.PdfTextExtractor; //导入依赖的package包/类
public void parsePdf(String filename) throws IOException {
PdfReader reader = new PdfReader(filename);
LOGGER.trace("Reading file " + filename);
pdfData = new HashMap<Integer, String>();
int numberOfPages = reader.getNumberOfPages();
for (int page = 1; page <= numberOfPages; page++) {
LOGGER.trace("Reading page " + page);
String textFromPage = PdfTextExtractor.getTextFromPage(reader, page);
pdfData.put(page, textFromPage);
}
}
示例4: getPDFExtractedText
import com.itextpdf.text.pdf.parser.PdfTextExtractor; //导入依赖的package包/类
public void getPDFExtractedText() {
String parsedText="";
try {
PdfReader reader = new PdfReader(FullscreenActivity.file.toString());
int n = reader.getNumberOfPages();
for (int i = 1; i<=n ; i++) {
String text = detectAndImproveLine(PdfTextExtractor.getTextFromPage(reader, i));
parsedText = parsedText + text +"\n"; //Extracting the content from the different pages
}
reader.close();
} catch (Exception e) {
Log.d("d","Error extracting text");
}
foundText = PopUpEditSongFragment.parseToHTMLEntities(parsedText);
}
示例5: extractSimple
import com.itextpdf.text.pdf.parser.PdfTextExtractor; //导入依赖的package包/类
String extractSimple(PdfReader reader, int pageNo) throws IOException
{
return PdfTextExtractor.getTextFromPage(reader, pageNo, new SimpleTextExtractionStrategy()
{
boolean empty = true;
@Override
public void beginTextBlock()
{
if (!empty)
appendTextChunk("<BLOCK>");
super.beginTextBlock();
}
@Override
public void endTextBlock()
{
if (!empty)
appendTextChunk("</BLOCK>\n");
super.endTextBlock();
}
@Override
public String getResultantText()
{
if (empty)
return super.getResultantText();
else
return "<BLOCK>" + super.getResultantText();
}
@Override
public void renderText(TextRenderInfo renderInfo)
{
empty = false;
super.renderText(renderInfo);
}
});
}
示例6: checkIfPdfFileExist
import com.itextpdf.text.pdf.parser.PdfTextExtractor; //导入依赖的package包/类
private boolean checkIfPdfFileExist(String pdfFileName, String rtfFileName)
{
int year = new DateTime().getDate().getYear();
int month = new DateTime().getDate().getMonth();
int day = new DateTime().getDate().getDay();
File pdfFile = new File(getPDFStorePath() + year + "\\" + month + "\\" + day + "\\" + pdfFileName);
boolean nonNullFileExistAnd = pdfFile.exists() && (pdfFile.length() > 0);
if (nonNullFileExistAnd)
{
long startCheck= System.currentTimeMillis();
FileInputStream fin = null;
FileChannel ch = null;
try
{
fin = new FileInputStream(pdfFile);
ch = fin.getChannel();
byte fileContent[] = new byte[(int)pdfFile.length()];
fin.read(fileContent);
fin.close();
ch.close();
long start = System.currentTimeMillis();
PdfReader pdfReader = new PdfReader(fileContent);
String textFromPdfFilePageOne = PdfTextExtractor.getTextFromPage(pdfReader, 1);
long end = System.currentTimeMillis();
System.out.println("Reading first pdf page time : " + (new Float(end-start))/1000 + " seconds");
if (Boolean.FALSE.equals(ConfigFlag.GEN.RELEASE_MODE.getValue()))
{
System.out.println(pdfFile + " first page content:");
System.out.println(textFromPdfFilePageOne);
}
long endCheck = System.currentTimeMillis();
System.out.println("Total time for checking if file is on the share : " + (new Float(endCheck - startCheck)) / 1000 + " seconds");
//If CORRESPONDENCE_ARCHIVE_RTF_FILES is set try to archive file
if (ConfigFlag.UI.CORRESPONDENCE_ARCHIVE_RTF_FILES.getValue())
{
archiveRtfFile(pdfFileName, rtfFileName);
}
}
catch (Exception e)
{
e.printStackTrace();
return false;
}
}
return nonNullFileExistAnd;
}
示例7: extract
import com.itextpdf.text.pdf.parser.PdfTextExtractor; //导入依赖的package包/类
String extract(PdfReader reader, int pageNo, TextExtractionStrategy strategy) throws IOException
{
return PdfTextExtractor.getTextFromPage(reader, pageNo, strategy);
}
示例8: extract
import com.itextpdf.text.pdf.parser.PdfTextExtractor; //导入依赖的package包/类
/**
* Test with {@link HorizontalTextExtractionStrategy}, works for iText before 5.5.9-SNAPSHOT
* Commit 53526e4854fcb80c86cbc2e113f7a07401dc9a67 ("Refactor LocationTextExtractionStrategy...").
*/
@SuppressWarnings("deprecation")
String extract(PdfReader reader, int pageNo) throws IOException, NoSuchFieldException, SecurityException
{
return PdfTextExtractor.getTextFromPage(reader, pageNo, new HorizontalTextExtractionStrategy());
}
示例9: extractV2
import com.itextpdf.text.pdf.parser.PdfTextExtractor; //导入依赖的package包/类
/**
* Test with {@link HorizontalTextExtractionStrategy}, works for iText since 5.5.9-SNAPSHOT
* Commit 1ab350beae148be2a4bef5e663b3d67a004ff9f8 ("Make TextChunkLocation a Comparable<> class...").
*/
String extractV2(PdfReader reader, int pageNo) throws IOException, NoSuchFieldException, SecurityException
{
return PdfTextExtractor.getTextFromPage(reader, pageNo, new HorizontalTextExtractionStrategy2());
}
示例10: extractRemapped
import com.itextpdf.text.pdf.parser.PdfTextExtractor; //导入依赖的package包/类
String extractRemapped(PdfReader reader, int pageNo) throws IOException, NoSuchFieldException, SecurityException
{
TextExtractionStrategy strategy = new RemappingExtractionFilter(new LocationTextExtractionStrategy());
return PdfTextExtractor.getTextFromPage(reader, pageNo, strategy);
}
示例11: checkIfPdfFileExist
import com.itextpdf.text.pdf.parser.PdfTextExtractor; //导入依赖的package包/类
public static boolean checkIfPdfFileExist(String pdfFileName, String rtfFileName)
{
int year = new DateTime().getDate().getYear();
int month = new DateTime().getDate().getMonth();
int day = new DateTime().getDate().getDay();
File pdfFile = new File(getPDFStorePath() + year + "\\" + month + "\\" + day + "\\" + pdfFileName);
boolean nonNullFileExistAnd = pdfFile.exists() && (pdfFile.length() > 0);
if (nonNullFileExistAnd)
{
long startCheck= System.currentTimeMillis();
FileInputStream fin = null;
FileChannel ch = null;
try
{
fin = new FileInputStream(pdfFile);
ch = fin.getChannel();
byte fileContent[] = new byte[(int)pdfFile.length()];
fin.read(fileContent);
fin.close();
ch.close();
long start = System.currentTimeMillis();
PdfReader pdfReader = new PdfReader(fileContent);
String textFromPdfFilePageOne = PdfTextExtractor.getTextFromPage(pdfReader, 1);
long end = System.currentTimeMillis();
System.out.println("Reading first pdf page time : " + (new Float(end-start))/1000 + " seconds");
if (Boolean.FALSE.equals(ConfigFlag.GEN.RELEASE_MODE.getValue()))
{
System.out.println(pdfFile + " first page content:");
System.out.println(textFromPdfFilePageOne);
}
long endCheck = System.currentTimeMillis();
System.out.println("Total time for checking if file is on the share : " + (new Float(endCheck - startCheck)) / 1000 + " seconds");
//If CORRESPONDENCE_ARCHIVE_RTF_FILES is set try to archive file
if (ConfigFlag.UI.CORRESPONDENCE_ARCHIVE_RTF_FILES.getValue())
{
archiveRtfFile(pdfFileName, rtfFileName);
}
}
catch (Exception e)
{
e.printStackTrace();
return false;
}
}
return nonNullFileExistAnd;
}