本文整理匯總了Java中org.apache.tika.io.TikaInputStream.getFile方法的典型用法代碼示例。如果您正苦於以下問題:Java TikaInputStream.getFile方法的具體用法?Java TikaInputStream.getFile怎麽用?Java TikaInputStream.getFile使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.tika.io.TikaInputStream
的用法示例。
在下文中一共展示了TikaInputStream.getFile方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: getInputStream
import org.apache.tika.io.TikaInputStream; //導入方法依賴的package包/類
/**
* There seems to be some sort of issue with some downstream
* 3rd party libraries, and input streams that come from
* a {@link ContentReader}. This happens most often with
* JPEG and Tiff files.
* For these cases, buffer out to a local file if not
* already there
*/
protected InputStream getInputStream(ContentReader reader) throws IOException
{
// Prefer the File if available, it's generally quicker
if(reader instanceof FileContentReader)
{
return TikaInputStream.get( ((FileContentReader)reader).getFile() );
}
// Grab the InputStream for the Content
InputStream input = reader.getContentInputStream();
// Images currently always require a file
if(MimetypeMap.MIMETYPE_IMAGE_JPEG.equals(reader.getMimetype()) ||
MimetypeMap.MIMETYPE_IMAGE_TIFF.equals(reader.getMimetype()))
{
TemporaryResources tmp = new TemporaryResources();
TikaInputStream stream = TikaInputStream.get(input, tmp);
stream.getFile(); // Have it turned into File backed
return stream;
}
else
{
// The regular Content InputStream should be fine
return input;
}
}
示例2: parse
import org.apache.tika.io.TikaInputStream; //導入方法依賴的package包/類
/**
* Parsa lo stream
* @param stream stream
* @param handler handler
* @param metadata metadata
* @param context contesto
* @throws IOException eccezione
* @throws SAXException eccezione
* @throws TikaException eccezione
*/
@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
TesseractOCRConfig config = context.get(TesseractOCRConfig.class, DEFAULT_CONFIG);
// If Tesseract is not on the path with the current config, do not try to run OCR
// getSupportedTypes shouldn't have listed us as handling it, so this should only
// occur if someone directly calls this parser, not via DefaultParser or similar
if (!hasTesseract(config)) {
return;
}
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
TemporaryResources tmp = new TemporaryResources();
File output = null;
try {
TikaInputStream tikaStream = TikaInputStream.get(stream, tmp);
File input = tikaStream.getFile();
long size = tikaStream.getLength();
if (size >= config.getMinFileSizeToOcr() && size <= config.getMaxFileSizeToOcr()) {
output = tmp.createTemporaryFile();
doOCR(input, output, config);
// Tesseract appends .txt to output file name
output = new File(output.getAbsolutePath() + ".txt");
if (output.exists()) {
extractOutput(new FileInputStream(output), xhtml);
}
}
// Temporary workaround for TIKA-1445 - until we can specify
// composite parsers with strategies (eg Composite, Try In Turn),
// always send the image onwards to the regular parser to have
// the metadata for them extracted as well
_TMP_IMAGE_METADATA_PARSER.parse(tikaStream, handler, metadata, context);
} finally {
tmp.dispose();
if (output != null) {
output.delete();
}
}
}