本文整理汇总了Java中org.apache.pdfbox.cos.COSDocument类的典型用法代码示例。如果您正苦于以下问题:Java COSDocument类的具体用法?Java COSDocument怎么用?Java COSDocument使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
COSDocument类属于org.apache.pdfbox.cos包,在下文中一共展示了COSDocument类的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: readPDFDocument
import org.apache.pdfbox.cos.COSDocument; //导入依赖的package包/类
private void readPDFDocument() {
try {
FileInputStream fs = new FileInputStream(f);
String text = "";
PDFParser parser = new PDFParser(fs);
parser.parse();
COSDocument cosDoc = parser.getDocument();
PDFTextStripper pdfStripper = new PDFTextStripper();
PDDocument pdDoc = new PDDocument(cosDoc);
text = pdfStripper.getText(pdDoc);
String[] docxLines = text.split(System.lineSeparator());
for (String line : docxLines) {
lines.add(line);
}
fs.close();
} catch (Exception e) {
JOptionPane.showMessageDialog(null, "Fehler in readPDFDocument",
"Fehler", JOptionPane.ERROR_MESSAGE);
e.printStackTrace();
}
}
示例2: pdftoText
import org.apache.pdfbox.cos.COSDocument; //导入依赖的package包/类
static String pdftoText(String fileName,int pageno) throws IOException, CryptographyException
{
File file = new File(fileName);
PDFParser parser = new PDFParser(new FileInputStream(file));
parser.parse();
COSDocument cosDoc = parser.getDocument();
PDFTextStripper pdfStripper = new PDFTextStripper();
PDDocument pdDoc = new PDDocument(cosDoc);
/*pdDoc.decrypt("");
pdDoc.setAllSecurityToBeRemoved(true);*/
pdfStripper.setStartPage(pageno);
pdfStripper.setEndPage(pageno);
String parsedText = pdfStripper.getText(pdDoc);
if (cosDoc != null)
cosDoc.close();
if (pdDoc != null)
pdDoc.close();
return parsedText;
}
示例3: pdftoText
import org.apache.pdfbox.cos.COSDocument; //导入依赖的package包/类
public String pdftoText(InputStream is, boolean stats) throws IOException {
PDDocument pdDoc = null;
COSDocument cosDoc = null;
try {
PDFParser parser = new PDFParser(is);
parser.parse();
cosDoc = parser.getDocument();
PDFTextStripper pdfStripper = new PDFTextStripper();
pdDoc = new PDDocument(cosDoc);
String text = pdfStripper.getText(pdDoc);
if (stats) {
vc.addAll(text);
}
return text;
} finally {
if (cosDoc != null) {
cosDoc.close();
}
if (pdDoc != null) {
pdDoc.close();
}
}
}
示例4: readThesaurus
import org.apache.pdfbox.cos.COSDocument; //导入依赖的package包/类
private List<SubstanceInteraction> readThesaurus(File dir) throws IOException {
File file = new File(dir, "thesaurus.pdf");
PDFParser parser = new PDFParser(new RandomAccessBufferedFileInputStream(file));
parser.parse();
COSDocument cosDoc = parser.getDocument();
ThesaurusPDFStripper pdfStripper = new ThesaurusPDFStripper();
PDDocument pdDoc = new PDDocument(cosDoc);
pdfStripper.setStartPage(2);
pdfStripper.setEndPage(pdDoc.getNumberOfPages());
pdfStripper.getText(pdDoc);
cosDoc.close();
return pdfStripper.substances;
}
示例5: parse
import org.apache.pdfbox.cos.COSDocument; //导入依赖的package包/类
@Test
public void parse() throws IOException {
File file = new File(MedicamentTest.class.getClassLoader().getResource(".").getFile(), "thesaurus.pdf");
PDFParser parser = new PDFParser(new RandomAccessBufferedFileInputStream(file));
parser.parse();
COSDocument cosDoc = parser.getDocument();
AdvancedPDFStripper pdfStripper = new AdvancedPDFStripper();
PDDocument pdDoc = new PDDocument(cosDoc);
pdfStripper.setStartPage(2);
pdfStripper.setEndPage(pdDoc.getNumberOfPages());
pdfStripper.getText(pdDoc);
System.out.println(pdfStripper.substances);
}
示例6: findpages
import org.apache.pdfbox.cos.COSDocument; //导入依赖的package包/类
public List<Container> findpages(String path, String searchKeyword) throws IOException
{
// TODO Auto-generated method stub
int i; // page no.
File file = new File(path);
PDFParser parser = new PDFParser(new FileInputStream(file));
parser.parse();
COSDocument cosDoc = parser.getDocument();
PDFTextStripper reader = new PDFTextStripper();
PDDocument doc = new PDDocument(cosDoc);
List<Container> list = new ArrayList<Container>();
for(i=0;i<=doc.getNumberOfPages();i++)
{
reader.setStartPage(i);
reader.setEndPage(i);
if(reader.getText(doc).contains(searchKeyword))
{
Container container = new Container();
container.setContent(reader.getText(doc));
container.setFilepath(path);
container.setPageno(i);
list.add(container);
}
}
return list;
}
示例7: findpages
import org.apache.pdfbox.cos.COSDocument; //导入依赖的package包/类
public List<SearchResult> findpages(String path,
List<String> searchKeywordList, int fileCounter) throws IOException {
int i; // page no.
boolean hasKeywords = false;
PDDocument finalDocument = new PDDocument();
List<PDPage> pageList = new ArrayList<PDPage>();
File file = new File(path);
PDFParser parser = new PDFParser(new RandomAccessBuffer(
new FileInputStream(file)));
parser.parse();
COSDocument cosDoc = parser.getDocument();
PDFTextStripper reader = new PDFTextStripper();
PDDocument doc = new PDDocument(cosDoc);
List<SearchResult> list = new ArrayList<SearchResult>();
for (i = 0; i <= doc.getNumberOfPages(); i++) {
reader.setStartPage(i);
reader.setEndPage(i);
hasKeywords = true;
for (String keyword : searchKeywordList) {
if (!reader.getText(doc).toLowerCase()
.contains(keyword.toLowerCase())) {
hasKeywords = false;
break;
}
}
if (hasKeywords) {
if (falseCounter > 1) {
SearchResult result = new PageResult();
result.setFileContent(reader.getText(doc));
result.setFilePath(path);
result.setPageNumber(i);
list.add(result);
pageList.add(doc.getPage(i));
}
falseCounter++;
}
}
for (PDPage page : pageList) {
finalDocument.addPage(page);
}
finalDocument
.save(ConfigCBSI.getResultPdfPath() + fileCounter + ".pdf");
finalDocument.close();
logger.info("Result Saved");
return list;
}
示例8: buildWordMap
import org.apache.pdfbox.cos.COSDocument; //导入依赖的package包/类
public static Map<String, Integer> buildWordMap(String fileName)
{
Map<String, Integer> wordMap = new HashMap<String,Integer>();
try
{
File file = new File(fileName);
PDFParser parser = new PDFParser(new FileInputStream(file));
parser.parse();
COSDocument cosDoc = parser.getDocument();
PDFTextStripper reader = new PDFTextStripper();
PDDocument doc = new PDDocument(cosDoc);
StringBuilder sb = new StringBuilder();
for (int i = 0; i < doc.getNumberOfPages(); i++)
{
reader.setStartPage(i);
reader.setEndPage(i);
sb.append(reader.getText(doc));
}
String pdffulltext = sb.toString();
String processedtext = pdffulltext.replaceAll("\\p{Punct}|\\d", "").toLowerCase();
String[] words = processedtext.split(" ");
for (String word : words)
{
if (wordMap.containsKey(word))
{
wordMap.put(word, (wordMap.get(word) + 1));
}
else
{
wordMap.put(word, 1);
}
}
}
catch (IOException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
return wordMap;
}
示例9: getDocument
import org.apache.pdfbox.cos.COSDocument; //导入依赖的package包/类
/**
* {@inheritDoc}
*/
public final COSDocument getDocument() {
return document.getDocument();
}
示例10: parse
import org.apache.pdfbox.cos.COSDocument; //导入依赖的package包/类
void parse(COSDocument doc) throws IOException
{
parse(new PDDocument(doc));
}
示例11: findpages
import org.apache.pdfbox.cos.COSDocument; //导入依赖的package包/类
public List<SearchResult> findpages(String path,
List<String> searchKeywordList, int fileCounter) throws IOException {
int i; // page no.
boolean hasKeywords = false;
boolean hasSingleKeyword = false;
PDDocument finalDocument = new PDDocument();
List<PDPage> pageList = new ArrayList<PDPage>();
File file = new File(path);
PDFParser parser = new PDFParser(new RandomAccessBuffer(
new FileInputStream(file)));
parser.parse();
COSDocument cosDoc = parser.getDocument();
PDFTextStripper reader = new PDFTextStripper();
PDDocument doc = new PDDocument(cosDoc);
List<SearchResult> list = new ArrayList<SearchResult>();
for (i = 0; i <= doc.getNumberOfPages() - 1; i++) {
reader.setStartPage(i);
reader.setEndPage(i);
hasKeywords = true;
hasSingleKeyword = false;
for (String keyword : searchKeywordList) {
if (!reader.getText(doc).toLowerCase()
.contains(keyword.toLowerCase())) {
hasKeywords = false;
break;
}
}
if (hasKeywords) {
/* if (falseCounter > 1) { */
SearchResult result = new PageResult();
result.setFileContent(reader.getText(doc));
result.setFilePath(path);
result.setPageNumber(i);
list.add(result);
pageList.add(doc.getPage(i));
/* } */
falseCounter++;
}
}
for (PDPage page : pageList) {
finalDocument.addPage(page);
validResult = true;
}
if (validResult) {
finalDocument.save(ConfigCBSI.getResultPdfPath() + fileCounter
+ ".pdf");
finalDocument.close();
logger.info("Result Saved");
validResult = false;
}
return list;
}
示例12: readInteractions
import org.apache.pdfbox.cos.COSDocument; //导入依赖的package包/类
private Map<String, List<Interaction>> readInteractions(File dir) throws IOException {
File file = new File(dir, "interactions.pdf");
PDFParser parser = new PDFParser(new RandomAccessBufferedFileInputStream(file));
parser.parse();
COSDocument cosDoc = parser.getDocument();
InteractionPDFStripper pdfStripper = new InteractionPDFStripper();
PDDocument pdDoc = new PDDocument(cosDoc);
pdfStripper.setStartPage(2);
pdfStripper.setEndPage(pdDoc.getNumberOfPages());
pdfStripper.getText(pdDoc);
cosDoc.close();
Map<String, List<Interaction>> interactions = pdfStripper.interactions;
Map<String, List<Interaction>> newInteractions = new HashMap<>();
for (List<Interaction> tmp : interactions.values()) {
for (Interaction interaction : tmp) {
String famille2 = normalize(interaction.getFamille2(), true);
// cas particuliers
switch (famille2) {
case "medicaments hyponatremiants":
famille2 = "hyponatremiants";
break;
}
List<Interaction> interactions2 = interactions.get(famille2);
if (interactions2 == null) {
LOG.warn("interaction " + interaction.getFamille2() + " inconnu");
Interaction newInteraction;
if (newInteractions.get(famille2) == null) {
newInteractions.put(famille2, new ArrayList<>());
newInteraction = pdfStripper.createEmptyInteraction(-1, interaction.getFamille2());
} else {
newInteraction = pdfStripper.createEmptyInteraction(Integer.parseInt(newInteractions.get(famille2).get(0).getId1()), interaction.getFamille2());
}
newInteraction.setId2(interaction.getId1());
newInteraction.setFamille2(interaction.getFamille1());
newInteraction.setDescription(interaction.getDescription());
newInteraction.setConseil(interaction.getConseil());
newInteractions.get(famille2).add(newInteraction);
interaction.setId2(newInteraction.getId1());
} else {
interaction.setId2(interactions2.get(0).getId1());
}
}
}
for (String key : newInteractions.keySet()) {
interactions.put(key, newInteractions.get(key));
}
return interactions;
}
示例13: getText
import org.apache.pdfbox.cos.COSDocument; //导入依赖的package包/类
/**
* @deprecated
* @see PDFTextStripper#getText( PDDocument )
* @param doc The document to extract the text from.
* @return The document text.
* @throws IOException If there is an error extracting the text.
*/
public String getText( COSDocument doc ) throws IOException
{
return getText( new PDDocument( doc ) );
}
示例14: writeText
import org.apache.pdfbox.cos.COSDocument; //导入依赖的package包/类
/**
* @deprecated
* @see PDFTextStripper#writeText( PDDocument, Writer )
* @param doc The document to extract the text.
* @param outputStream The stream to write the text to.
* @throws IOException If there is an error extracting the text.
*/
public void writeText( COSDocument doc, Writer outputStream ) throws IOException
{
writeText( new PDDocument( doc ), outputStream );
}