本文整理匯總了Java中org.apache.tika.Tika.parseToString方法的典型用法代碼示例。如果您正苦於以下問題:Java Tika.parseToString方法的具體用法?Java Tika.parseToString怎麽用?Java Tika.parseToString使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.tika.Tika
的用法示例。
在下文中一共展示了Tika.parseToString方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: open
import org.apache.tika.Tika; //導入方法依賴的package包/類
public void open(File file) {
Tika tika = new Tika();
tika.setMaxStringLength(999999);
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
fileName.setText(file.getName());
int dot = file.getName().lastIndexOf('.');
String saveName = file.getName().substring(0, dot) + ".txt";
try {
//long start = System.currentTimeMillis();
String text = tika.parseToString(file);
//long end = System.currentTimeMillis();
pw.println(text);
//mimeType.setText(tika.detect(file) + " (" + NumberFormat.getNumberInstance().format(end-start) + "ms)");
} catch(Exception ex){
ex.printStackTrace(pw);
}
pw.flush();
plain.setText(sw.toString());
saveFile(sw.toString(), saveName);
plain.setCaretPosition(0);
return ;
}
示例2: parse
import org.apache.tika.Tika; //導入方法依賴的package包/類
@Override
public AldermanAttendance parse(Path targetFile) {
try {
Tika tika = new Tika();
String content = tika.parseToString(targetFile.toFile());
AldermanAttendance aldermanAttendance = ParserUtils.fromText(content);
return aldermanAttendance;
} catch (IOException | TikaException e) {
e.printStackTrace();
return null;
}
}
示例3: DocxToText
import org.apache.tika.Tika; //導入方法依賴的package包/類
public static String DocxToText(String docxFilePath)
throws IOException, InvalidFormatException, XmlException, TikaException {
String ret = "";
FileInputStream fis = new FileInputStream(docxFilePath);
Tika tika = new Tika();
ret = tika.parseToString(fis);
fis.close();
return ret;
}
示例4: processDocument
import org.apache.tika.Tika; //導入方法依賴的package包/類
@Override
public Document[] processDocument(Document document) {
byte[] rawData = document.getRawData();
if (rawData == null) {
log.debug("Skipping document without data in " + getName());
return new Document[]{document};
}
try {
Tika tika = new Tika();
tika.setMaxStringLength(document.getRawData().length);
Metadata metadata = new Metadata();
try (ByteArrayInputStream bais = new ByteArrayInputStream(rawData)) {
String textContent = tika.parseToString(bais, metadata);
document.setRawData(textContent.getBytes(Charset.forName("UTF-8")));
for (String name : metadata.names()) {
document.put(sanitize(name) + plusSuffix(), metadata.get(name));
}
} catch (IOException | TikaException e) {
log.warn("Tika processing failure!", e);
// if tika can't parse it we certainly don't want random binary crap in the index
document.setStatus(Status.DROPPED);
}
} catch (Throwable t) {
boolean isAccessControl = t instanceof AccessControlException;
boolean isSecurity = t instanceof SecurityException;
if (!isAccessControl && !isSecurity) {
throw t;
} else {
System.out.println("gotcha!");
}
}
return new Document[]{document};
}