本文整理汇总了Java中org.apache.tika.parser.pdf.PDFParserConfig.setExtractInlineImages方法的典型用法代码示例。如果您正苦于以下问题:Java PDFParserConfig.setExtractInlineImages方法的具体用法?Java PDFParserConfig.setExtractInlineImages怎么用?Java PDFParserConfig.setExtractInlineImages使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.tika.parser.pdf.PDFParserConfig
的用法示例。
在下文中一共展示了PDFParserConfig.setExtractInlineImages方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: PDFExtract
import org.apache.tika.parser.pdf.PDFParserConfig; //导入方法依赖的package包/类
public PDFExtract(){
parser = new AutoDetectParser();
TesseractOCRConfig config = new TesseractOCRConfig();
PDFParserConfig pdfConfig = new PDFParserConfig();
pdfConfig.setExtractInlineImages(true);
parseContext = new ParseContext();
parseContext.set(TesseractOCRConfig.class, config);
parseContext.set(PDFParserConfig.class, pdfConfig);
//need to add this to make sure recursive parsing happens!
parseContext.set(Parser.class, parser);
}
示例2: Indexer
import org.apache.tika.parser.pdf.PDFParserConfig; //导入方法依赖的package包/类
public Indexer(String indexDir, boolean create, boolean fork, boolean ocr) throws IOException {
logger.entry();
this.fork = fork;
numErrors = 0;
numFiles = 0;
Directory dir = FSDirectory.open(Paths.get(indexDir));
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
if (create) {
iwc.setOpenMode(OpenMode.CREATE);
logger.info("Configuration specified to create a new index or overwrites an existing one.");
} else {
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
logger.info("Configuration specified to create a new index if one does not exist, otherwise the index will be opened and documents will be appended.");
}
writer = new IndexWriter(dir, iwc);
Parser autoDetectParser = new AutoDetectParser();
context = new ParseContext();
if (ocr) {
TesseractOCRConfig ocrConfig = new TesseractOCRConfig();
PDFParserConfig pdfConfig = new PDFParserConfig();
pdfConfig.setExtractInlineImages(true);
pdfConfig.setExtractUniqueInlineImagesOnly(false);
context.set(Parser.class, autoDetectParser);
context.set(TesseractOCRConfig.class, ocrConfig);
context.set(PDFParserConfig.class, pdfConfig);
}
if (fork) {
parser = new ForkParser(ForkParser.class.getClassLoader(), autoDetectParser);
} else {
parser = autoDetectParser;
}
logger.exit();
}
示例3: fillPdfOptions
import org.apache.tika.parser.pdf.PDFParserConfig; //导入方法依赖的package包/类
private static void fillPdfOptions(PDFParserConfig pdfParserConfig, Map<String, Object> options) {
final Object averageCharTolerance = options.get("pdfAverageCharTolerance");
final Object enableAutoSpace = options.get("pdfEnableAutoSpace");
final Object extractAcroFormContent = options.get("pdfExtractAcroFormContent");
final Object extractAnnotationText = options.get("pdfExtractAnnotationText");
final Object extractInlineImages = options.get("pdfExtractInlineImages");
final Object extractUniqueInlineImagesOnly = options.get("pdfExtractUniqueInlineImagesOnly");
final Object sortByPosition = options.get("pdfSortByPosition");
final Object spacingTolerance = options.get("pdfSpacingTolerance");
final Object suppressDuplicateOverlappingText = options.get("pdfSuppressDuplicateOverlappingText");
if (averageCharTolerance != null) {
pdfParserConfig.setAverageCharTolerance(Float.parseFloat(averageCharTolerance.toString()));
}
if (enableAutoSpace != null) {
pdfParserConfig.setEnableAutoSpace((Boolean) enableAutoSpace);
}
if (extractAcroFormContent != null) {
pdfParserConfig.setExtractAcroFormContent((Boolean) extractAcroFormContent);
}
if (extractAnnotationText != null) {
pdfParserConfig.setExtractAnnotationText((Boolean) extractAnnotationText);
}
if (extractInlineImages != null) {
pdfParserConfig.setExtractInlineImages((Boolean) extractInlineImages);
} else {
pdfParserConfig.setExtractInlineImages(true);
}
if (extractUniqueInlineImagesOnly != null) {
pdfParserConfig.setExtractUniqueInlineImagesOnly((Boolean) extractUniqueInlineImagesOnly);
}
if (sortByPosition != null) {
pdfParserConfig.setSortByPosition((Boolean) sortByPosition);
}
if (spacingTolerance != null) {
pdfParserConfig.setSpacingTolerance(Float.parseFloat(spacingTolerance.toString()));
}
if (suppressDuplicateOverlappingText != null) {
pdfParserConfig.setSuppressDuplicateOverlappingText((Boolean) suppressDuplicateOverlappingText);
}
}