本文整理汇总了Java中org.apache.poi.hwpf.extractor.WordExtractor.getText方法的典型用法代码示例。如果您正苦于以下问题:Java WordExtractor.getText方法的具体用法?Java WordExtractor.getText怎么用?Java WordExtractor.getText使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.poi.hwpf.extractor.WordExtractor
的用法示例。
在下文中一共展示了WordExtractor.getText方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: readDoc
import org.apache.poi.hwpf.extractor.WordExtractor; //导入方法依赖的package包/类
private static String readDoc (String filePath, InputStream is) throws Exception {
String text= "";
is = FileMagic.prepareToCheckMagic(is);
try {
if (FileMagic.valueOf(is) == FileMagic.OLE2) {
WordExtractor ex = new WordExtractor(is);
text = ex.getText();
ex.close();
} else if(FileMagic.valueOf(is) == FileMagic.OOXML) {
XWPFDocument doc = new XWPFDocument(is);
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
text = extractor.getText();
extractor.close();
}
} catch (OfficeXmlFileException e) {
logger.error(filePath, e);
} finally {
if (is != null) {
is.close();
}
}
return text;
}
示例2: microsoftWordDocumentToString
import org.apache.poi.hwpf.extractor.WordExtractor; //导入方法依赖的package包/类
private static String microsoftWordDocumentToString(InputStream inputStream) throws IOException {
String strRet;
try (InputStream wordStream = new BufferedInputStream(inputStream)) {
if (POIFSFileSystem.hasPOIFSHeader(wordStream)) {
WordExtractor wordExtractor = new WordExtractor(wordStream);
strRet = wordExtractor.getText();
wordExtractor.close();
} else {
XWPFWordExtractor wordXExtractor = new XWPFWordExtractor(new XWPFDocument(wordStream));
strRet = wordXExtractor.getText();
wordXExtractor.close();
}
}
return strRet;
}
示例3: readContent
import org.apache.poi.hwpf.extractor.WordExtractor; //导入方法依赖的package包/类
@Override
public void readContent(ClassifiableContentIF cc, TextHandlerIF handler) {
try {
WordExtractor extractor = new WordExtractor(new BufferedInputStream(new ByteArrayInputStream(cc.getContent())));
String s = extractor.getText();
char[] c = s.toCharArray();
handler.startRegion("document");
handler.text(c, 0, c.length);
handler.endRegion();
} catch (Exception e) {
throw new OntopiaRuntimeException(e);
}
}
示例4: getIndexedDocument
import org.apache.poi.hwpf.extractor.WordExtractor; //导入方法依赖的package包/类
public IndexDocument getIndexedDocument(File2Index fileData)
throws SolrException {
try {
POIFSFileSystem fs = new POIFSFileSystem(new ByteArrayInputStream(fileData.data));
WordExtractor extractor = new WordExtractor(fs);
String wordText = extractor.getText();
return new IndexDocument(fileData.path, wordText, null);
} catch (IOException e) {
String msg = "Failed to write to the index";
log.error(msg, e);
throw new SolrException(ErrorCode.SERVER_ERROR, msg);
}
}
示例5: doc2text
import org.apache.poi.hwpf.extractor.WordExtractor; //导入方法依赖的package包/类
public String doc2text(InputStream is) throws IOException {
WordExtractor wd = new WordExtractor(is);
String text = wd.getText();
wd.close();
return text;
}
示例6: getText
import org.apache.poi.hwpf.extractor.WordExtractor; //导入方法依赖的package包/类
/**
* Extract text from a word 97-2003 document.
* @throws Exception
*
* @see edu.ur.ir.index.FileTextExtractor#getText(java.io.File)
*/
public String getText(File f) throws Exception {
String text = null;
if( isFileTooLarge(f) || f.length() <= 0l)
{
return text;
}
FileInputStream inputStream = null;
try
{
inputStream = new FileInputStream(f);
HWPFDocument wordDocument = new HWPFDocument(inputStream);
WordExtractor wordExtractor = new WordExtractor(wordDocument);
String myText = wordExtractor.getText();
if( myText != null && !myText.trim().equals(""))
{
text = myText;
}
}
catch(OutOfMemoryError oome)
{
text = null;
log.error("could not extract text", oome);
throw(oome);
}
catch(Exception e)
{
text = null;
log.error("could not get text for word document " + f.getAbsolutePath(), e);
throw(e);
}
finally
{
closeInputStream(inputStream);
}
return text;
}