本文整理汇总了Java中org.apache.poi.extractor.ExtractorFactory类的典型用法代码示例。如果您正苦于以下问题:Java ExtractorFactory类的具体用法?Java ExtractorFactory怎么用?Java ExtractorFactory使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
ExtractorFactory类属于org.apache.poi.extractor包,在下文中一共展示了ExtractorFactory类的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: readContent
import org.apache.poi.extractor.ExtractorFactory; //导入依赖的package包/类
@Override
protected String readContent(final VFSLeaf leaf) throws IOException, DocumentException {
BufferedInputStream bis = null;
final StringBuilder buffy = new StringBuilder();
try {
bis = new BufferedInputStream(leaf.getInputStream());
final POIXMLTextExtractor extractor = (POIXMLTextExtractor) ExtractorFactory.createExtractor(bis);
final POIXMLDocument document = extractor.getDocument();
if (document instanceof XWPFDocument) {
final XWPFDocument xDocument = (XWPFDocument) document;
final XWPFHeaderFooterPolicy hfPolicy = xDocument.getHeaderFooterPolicy();
extractHeaders(buffy, hfPolicy);
extractContent(buffy, xDocument);
extractFooters(buffy, hfPolicy);
}
return buffy.toString();
} catch (final Exception e) {
throw new DocumentException(e.getMessage());
} finally {
if (bis != null) {
bis.close();
}
}
}
示例2: readContent
import org.apache.poi.extractor.ExtractorFactory; //导入依赖的package包/类
@Override
protected String readContent(final VFSLeaf leaf) throws IOException, DocumentException {
BufferedInputStream bis = null;
final StringBuilder buffy = new StringBuilder();
try {
bis = new BufferedInputStream(leaf.getInputStream());
final POIXMLTextExtractor extractor = (POIXMLTextExtractor) ExtractorFactory.createExtractor(bis);
final POIXMLDocument document = extractor.getDocument();
if (document instanceof XSSFWorkbook) {
final XSSFWorkbook xDocument = (XSSFWorkbook) document;
extractContent(buffy, xDocument);
}
return buffy.toString();
} catch (final Exception e) {
throw new DocumentException(e.getMessage());
} finally {
if (bis != null) {
bis.close();
}
}
}
示例3: readContent
import org.apache.poi.extractor.ExtractorFactory; //导入依赖的package包/类
@Override
public String readContent(final VFSLeaf leaf) throws IOException, DocumentException {
BufferedInputStream bis = null;
final StringBuilder buffy = new StringBuilder();
try {
bis = new BufferedInputStream(leaf.getInputStream());
final POIXMLTextExtractor extractor = (POIXMLTextExtractor) ExtractorFactory.createExtractor(bis);
final POIXMLDocument document = extractor.getDocument();
if (document instanceof XSLFSlideShow) {
final XSLFSlideShow slideShow = (XSLFSlideShow) document;
final XMLSlideShow xmlSlideShow = new XMLSlideShow(slideShow);
extractContent(buffy, xmlSlideShow);
}
return buffy.toString();
} catch (final Exception e) {
throw new DocumentException(e.getMessage());
} finally {
if (bis != null) {
bis.close();
}
}
}
示例4: FileDocumentFactory
import org.apache.poi.extractor.ExtractorFactory; //导入依赖的package包/类
FileDocumentFactory(final SearchModule searchModule, MimeTypeProvider mimeTypeProvider) {
fileBlackList = searchModule.getFileBlackList();
pptFileEnabled = searchModule.isPptFileEnabled();
if (!pptFileEnabled) {
log.info("PPT files are disabled in indexer.");
}
excelFileEnabled = searchModule.isExcelFileEnabled();
if (!excelFileEnabled) {
log.info("Excel files are disabled in indexer.");
}
checkFileSizeSuffixes = searchModule.getFileSizeSuffixes();
maxFileSize = searchModule.getMaxFileSize();
FileDocumentFactory.mimeTypeProvider = mimeTypeProvider;
// there are two ways of how text extraction for MS Open XML documents (Office >= 2003) is handled technically:
// model based or event based (similar to DOM/SAX parsing of XML)
// for complex Excel files model based text extraction leads to intolerably long processing times!
// therefore we switched to event based text extraction (despite extraction for headers/footers
// is not implemented for this extraction method).
ExtractorFactory.setAllThreadsPreferEventExtractors(true);
}
示例5: readContent
import org.apache.poi.extractor.ExtractorFactory; //导入依赖的package包/类
@Override
protected String readContent(final VFSLeaf leaf) throws IOException, DocumentException {
BufferedInputStream bis = null;
try {
bis = new BufferedInputStream(leaf.getInputStream());
// event based text extraction in POI 3.9 doesn't consider header/footer
// (see org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor
// and comments in constructor of FileDocumentFactory)
// Previous versions of this class had implemented support for header/footer extraction
// based on the complete memory model which caused performance problems for large files.
return ExtractorFactory.createExtractor(bis).getText();
} catch (final Exception e) {
throw new DocumentException(e.getMessage());
} finally {
if (bis != null) {
bis.close();
}
}
}
示例6: readContent
import org.apache.poi.extractor.ExtractorFactory; //导入依赖的package包/类
@Override
public String readContent(final VFSLeaf leaf) throws IOException, DocumentException {
BufferedInputStream bis = null;
try {
bis = new BufferedInputStream(leaf.getInputStream());
final POITextExtractor extractor = ExtractorFactory.createExtractor(bis);
if (extractor instanceof XSLFPowerPointExtractor) {
// retrieve slide content and notes
return ((XSLFPowerPointExtractor) extractor).getText(true, true);
}
throw new AssertException("Expected XSLFPowerPointExtractor as text extractor.");
} catch (final Exception e) {
throw new DocumentException(e.getMessage());
} finally {
if (bis != null) {
bis.close();
}
}
}
示例7: detectOfficeOpenXML
import org.apache.poi.extractor.ExtractorFactory; //导入依赖的package包/类
/**
* Detects the type of an OfficeOpenXML (OOXML) file from
* opened Package
*/
public static MediaType detectOfficeOpenXML(OPCPackage pkg) {
PackageRelationshipCollection core =
pkg.getRelationshipsByType(ExtractorFactory.CORE_DOCUMENT_REL);
if (core.size() != 1) {
// Invalid OOXML Package received
return null;
}
// Get the type of the core document part
PackagePart corePart = pkg.getPart(core.getRelationship(0));
String coreType = corePart.getContentType();
// Turn that into the type of the overall document
String docType = coreType.substring(0, coreType.lastIndexOf('.'));
// The Macro Enabled formats are a little special
if(docType.toLowerCase().endsWith("macroenabled")) {
docType = docType.toLowerCase() + ".12";
}
if(docType.toLowerCase().endsWith("macroenabledtemplate")) {
docType = MACRO_TEMPLATE_PATTERN.matcher(docType).replaceAll("macroenabled.12");
}
// Build the MediaType object and return
return MediaType.parse(docType);
}