本文整理匯總了Java中org.apache.poi.extractor.ExtractorFactory類的典型用法代碼示例。如果您正苦於以下問題:Java ExtractorFactory類的具體用法?Java ExtractorFactory怎麽用?Java ExtractorFactory使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
ExtractorFactory類屬於org.apache.poi.extractor包,在下文中一共展示了ExtractorFactory類的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: readContent
import org.apache.poi.extractor.ExtractorFactory; //導入依賴的package包/類
@Override
protected String readContent(final VFSLeaf leaf) throws IOException, DocumentException {
BufferedInputStream bis = null;
final StringBuilder buffy = new StringBuilder();
try {
bis = new BufferedInputStream(leaf.getInputStream());
final POIXMLTextExtractor extractor = (POIXMLTextExtractor) ExtractorFactory.createExtractor(bis);
final POIXMLDocument document = extractor.getDocument();
if (document instanceof XWPFDocument) {
final XWPFDocument xDocument = (XWPFDocument) document;
final XWPFHeaderFooterPolicy hfPolicy = xDocument.getHeaderFooterPolicy();
extractHeaders(buffy, hfPolicy);
extractContent(buffy, xDocument);
extractFooters(buffy, hfPolicy);
}
return buffy.toString();
} catch (final Exception e) {
throw new DocumentException(e.getMessage());
} finally {
if (bis != null) {
bis.close();
}
}
}
示例2: readContent
import org.apache.poi.extractor.ExtractorFactory; //導入依賴的package包/類
@Override
protected String readContent(final VFSLeaf leaf) throws IOException, DocumentException {
BufferedInputStream bis = null;
final StringBuilder buffy = new StringBuilder();
try {
bis = new BufferedInputStream(leaf.getInputStream());
final POIXMLTextExtractor extractor = (POIXMLTextExtractor) ExtractorFactory.createExtractor(bis);
final POIXMLDocument document = extractor.getDocument();
if (document instanceof XSSFWorkbook) {
final XSSFWorkbook xDocument = (XSSFWorkbook) document;
extractContent(buffy, xDocument);
}
return buffy.toString();
} catch (final Exception e) {
throw new DocumentException(e.getMessage());
} finally {
if (bis != null) {
bis.close();
}
}
}
示例3: readContent
import org.apache.poi.extractor.ExtractorFactory; //導入依賴的package包/類
@Override
public String readContent(final VFSLeaf leaf) throws IOException, DocumentException {
BufferedInputStream bis = null;
final StringBuilder buffy = new StringBuilder();
try {
bis = new BufferedInputStream(leaf.getInputStream());
final POIXMLTextExtractor extractor = (POIXMLTextExtractor) ExtractorFactory.createExtractor(bis);
final POIXMLDocument document = extractor.getDocument();
if (document instanceof XSLFSlideShow) {
final XSLFSlideShow slideShow = (XSLFSlideShow) document;
final XMLSlideShow xmlSlideShow = new XMLSlideShow(slideShow);
extractContent(buffy, xmlSlideShow);
}
return buffy.toString();
} catch (final Exception e) {
throw new DocumentException(e.getMessage());
} finally {
if (bis != null) {
bis.close();
}
}
}
示例4: FileDocumentFactory
import org.apache.poi.extractor.ExtractorFactory; //導入依賴的package包/類
FileDocumentFactory(final SearchModule searchModule, MimeTypeProvider mimeTypeProvider) {
fileBlackList = searchModule.getFileBlackList();
pptFileEnabled = searchModule.isPptFileEnabled();
if (!pptFileEnabled) {
log.info("PPT files are disabled in indexer.");
}
excelFileEnabled = searchModule.isExcelFileEnabled();
if (!excelFileEnabled) {
log.info("Excel files are disabled in indexer.");
}
checkFileSizeSuffixes = searchModule.getFileSizeSuffixes();
maxFileSize = searchModule.getMaxFileSize();
FileDocumentFactory.mimeTypeProvider = mimeTypeProvider;
// there are two ways of how text extraction for MS Open XML documents (Office >= 2003) is handled technically:
// model based or event based (similar to DOM/SAX parsing of XML)
// for complex Excel files model based text extraction leads to intolerably long processing times!
// therefore we switched to event based text extraction (despite extraction for headers/footers
// is not implemented for this extraction method).
ExtractorFactory.setAllThreadsPreferEventExtractors(true);
}
示例5: readContent
import org.apache.poi.extractor.ExtractorFactory; //導入依賴的package包/類
@Override
protected String readContent(final VFSLeaf leaf) throws IOException, DocumentException {
BufferedInputStream bis = null;
try {
bis = new BufferedInputStream(leaf.getInputStream());
// event based text extraction in POI 3.9 doesn't consider header/footer
// (see org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor
// and comments in constructor of FileDocumentFactory)
// Previous versions of this class had implemented support for header/footer extraction
// based on the complete memory model which caused performance problems for large files.
return ExtractorFactory.createExtractor(bis).getText();
} catch (final Exception e) {
throw new DocumentException(e.getMessage());
} finally {
if (bis != null) {
bis.close();
}
}
}
示例6: readContent
import org.apache.poi.extractor.ExtractorFactory; //導入依賴的package包/類
@Override
public String readContent(final VFSLeaf leaf) throws IOException, DocumentException {
BufferedInputStream bis = null;
try {
bis = new BufferedInputStream(leaf.getInputStream());
final POITextExtractor extractor = ExtractorFactory.createExtractor(bis);
if (extractor instanceof XSLFPowerPointExtractor) {
// retrieve slide content and notes
return ((XSLFPowerPointExtractor) extractor).getText(true, true);
}
throw new AssertException("Expected XSLFPowerPointExtractor as text extractor.");
} catch (final Exception e) {
throw new DocumentException(e.getMessage());
} finally {
if (bis != null) {
bis.close();
}
}
}
示例7: detectOfficeOpenXML
import org.apache.poi.extractor.ExtractorFactory; //導入依賴的package包/類
/**
* Detects the type of an OfficeOpenXML (OOXML) file from
* opened Package
*/
public static MediaType detectOfficeOpenXML(OPCPackage pkg) {
PackageRelationshipCollection core =
pkg.getRelationshipsByType(ExtractorFactory.CORE_DOCUMENT_REL);
if (core.size() != 1) {
// Invalid OOXML Package received
return null;
}
// Get the type of the core document part
PackagePart corePart = pkg.getPart(core.getRelationship(0));
String coreType = corePart.getContentType();
// Turn that into the type of the overall document
String docType = coreType.substring(0, coreType.lastIndexOf('.'));
// The Macro Enabled formats are a little special
if(docType.toLowerCase().endsWith("macroenabled")) {
docType = docType.toLowerCase() + ".12";
}
if(docType.toLowerCase().endsWith("macroenabledtemplate")) {
docType = MACRO_TEMPLATE_PATTERN.matcher(docType).replaceAll("macroenabled.12");
}
// Build the MediaType object and return
return MediaType.parse(docType);
}