本文整理匯總了Java中org.apache.tika.io.TikaInputStream類的典型用法代碼示例。如果您正苦於以下問題:Java TikaInputStream類的具體用法?Java TikaInputStream怎麽用?Java TikaInputStream使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
TikaInputStream類屬於org.apache.tika.io包,在下文中一共展示了TikaInputStream類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: parse
import org.apache.tika.io.TikaInputStream; //導入依賴的package包/類
public void parse(
InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
TemporaryResources tmp = new TemporaryResources();
try {
TikaInputStream tis = TikaInputStream.get(stream, tmp);
new ImageMetadataExtractor(metadata).parseJpeg(tis.getFile());
new JempboxExtractor(metadata).parse(tis);
} finally {
tmp.dispose();
}
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
xhtml.endDocument();
}
示例2: getFullText
import org.apache.tika.io.TikaInputStream; //導入依賴的package包/類
private static String getFullText(final String filepath) throws IOException, SAXException, TikaException {
StringWriter writer = new StringWriter();
final TikaInputStream inputStream = TikaInputStream.get(new File(filepath));
try {
final Detector detector = new DefaultDetector();
final Parser parser = new AutoDetectParser(detector);
final Metadata metadata = new Metadata();
final ParseContext parseContext = new ParseContext();
parseContext.set(Parser.class, parser);
ContentHandler contentHandler = new BodyContentHandler(writer);
parser.parse(inputStream, contentHandler, metadata, parseContext);
}
finally {
inputStream.close();
}
return writer.toString();
}
示例3: TikaProperties
import org.apache.tika.io.TikaInputStream; //導入依賴的package包/類
public TikaProperties(File file) throws IOException, SAXException,
TikaException {
TikaInputStream tikaStream = TikaInputStream.get(file);
metadata = new Metadata();
ContentHandler handler = new DefaultHandler();
Parser parser = new AutoDetectParser();
ParseContext context = new ParseContext();
try {
parser.parse(tikaStream, handler, metadata, context);
} finally {
try {
tikaStream.close();
} catch (Exception ie) {
// ignore
}
}
}
示例4: getMimeType
import org.apache.tika.io.TikaInputStream; //導入依賴的package包/類
public static String getMimeType(byte[] data, String fileName) throws Throwable {
TikaInputStream tikaStream = null;
Metadata metadata = new Metadata();
metadata.add(Metadata.RESOURCE_NAME_KEY, fileName);
try {
tikaStream = TikaInputStream.get(data, metadata);
return detector.detect(tikaStream, metadata).toString();
} catch (Throwable t) {
throw t;
} finally {
if (tikaStream != null) {
try {
tikaStream.close();
} catch (IOException e) {
}
}
}
}
示例5: getMimeType
import org.apache.tika.io.TikaInputStream; //導入依賴的package包/類
public String getMimeType() throws IOException
{
TikaInputStream tikaIS = null;
try
{
tikaIS = TikaInputStream.get(file);
return new DefaultDetector(MimeTypes.getDefaultMimeTypes()).detect(tikaIS, new Metadata()).toString();
} finally
{
if (tikaIS != null)
{
tikaIS.close();
}
}
}
示例6: resolveContentType
import org.apache.tika.io.TikaInputStream; //導入依賴的package包/類
private String resolveContentType(byte[] data) {
AutoDetectParser parser = new AutoDetectParser(new ImageParser());
try {
return parser.getDetector().detect(TikaInputStream.get(data), new Metadata()).toString();
} catch (IOException e) {
return MediaType.OCTET_STREAM.toString();
}
}
示例7: create
import org.apache.tika.io.TikaInputStream; //導入依賴的package包/類
@RequestMapping(method = POST)
@PreAuthorize("isAuthenticated()")
@ResponseBody
@ResponseStatus(CREATED)
public String create(
@RequestParam("assetData") final MultipartFile assetData
) throws IOException {
// Check duplicates
final GridFSDBFile file = this.gridFs.findOne(Query.query(Criteria.where("filename").is(assetData.getOriginalFilename())));
if (file != null) {
throw new DataIntegrityViolationException(String.format("Asset with name '%s' already exists", assetData.getOriginalFilename()));
} else {
try (InputStream usedStream = TikaInputStream.get(assetData.getInputStream())) {
MediaType mediaType = null;
try {
mediaType = MediaType.parse(tika.detect(usedStream, assetData.getOriginalFilename()));
} catch (IOException e) {
log.warn("Could not detect content type", e);
}
this.gridFs.store(assetData.getInputStream(), assetData.getOriginalFilename(), Optional.ofNullable(mediaType).map(MediaType::toString).orElse(null));
return assetData.getOriginalFilename();
}
}
}
示例8: detectMimeTypes
import org.apache.tika.io.TikaInputStream; //導入依賴的package包/類
@Nonnull
@Override
public List<String> detectMimeTypes(final InputStream input, @Nullable final String fileName) throws IOException {
checkNotNull(input);
List<String> detected = Lists.newArrayList();
Metadata metadata = new Metadata();
if (fileName != null) {
metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
}
MediaType mediaType;
try (final TikaInputStream tis = TikaInputStream.get(input)) {
mediaType = detector.detect(tis, metadata);
}
// unravel to least specific
unravel(detected, mediaType);
if (detected.isEmpty()) {
detected.add(MimeTypes.OCTET_STREAM);
}
return detected;
}
示例9: parseEmbedded
import org.apache.tika.io.TikaInputStream; //導入依賴的package包/類
@Override
public void parseEmbedded(final InputStream input, final ContentHandler handler, final Metadata metadata,
final boolean outputHtml) throws SAXException, IOException {
// There's no need to spawn inline embeds, like images in PDFs. These should be concatenated to the main
// document as usual.
if (TikaCoreProperties.EmbeddedResourceType.INLINE.toString().equals(metadata
.get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE))) {
final ContentHandler embedHandler = new EmbeddedContentHandler(new BodyContentHandler(handler));
if (outputHtml) {
writeStart(handler, metadata);
}
delegateParsing(input, embedHandler, metadata);
if (outputHtml) {
writeEnd(handler);
}
} else {
try (final TikaInputStream tis = TikaInputStream.get(input)) {
spawnEmbedded(tis, metadata);
}
}
}
示例10: init
import org.apache.tika.io.TikaInputStream; //導入依賴的package包/類
private void init(String _name, String _ext, InputStream is) {
if (Strings.isEmpty(_ext)) {
int idx = _name.lastIndexOf('.');
name = idx < 0 ? _name : _name.substring(0, idx);
ext = getFileExt(_name);
} else {
name = _name;
ext = _ext.toLowerCase();
}
Metadata md = new Metadata();
md.add(RESOURCE_NAME_KEY, String.format(FILE_NAME_FMT, name, ext));
try {
mime = tika.getDetector().detect(is == null ? null : TikaInputStream.get(is), md);
} catch (Throwable e) {
mime = null;
log.error("Unexpected exception while detecting mime type", e);
}
}
示例11: extractImageLinks
import org.apache.tika.io.TikaInputStream; //導入依賴的package包/類
/**
* To extract image links form a URL. Needs Improvement
* @param url
* @return
* @throws IOException
* @throws SAXException
* @throws TikaException
*/
public Object extractImageLinks(String url) throws IOException, SAXException, TikaException {
Set<String> imageLinks = new HashSet<String>();
InputStream is = null;
try {
is = TikaInputStream.get(new URL(url).openStream());
Metadata metadata = new Metadata();
LinkContentHandler handler = new LinkContentHandler();
AutoDetectParser parser = new AutoDetectParser();
parser.parse(is, handler, metadata);
List<Link> links = handler.getLinks();
Iterator<Link> iter = links.iterator();
while(iter.hasNext()) {
Link link = iter.next();
if(link.isImage())
imageLinks.add(link.getUri());
}
}
finally {
is.close();
}
return imageLinks.toArray();
}
示例12: handleEmbedded
import org.apache.tika.io.TikaInputStream; //導入依賴的package包/類
private void handleEmbedded(String name, String type, byte[] contents,
EmbeddedDocumentExtractor embeddedExtractor, ContentHandler handler)
throws IOException, SAXException, TikaException {
Metadata metadata = new Metadata();
if(name != null)
metadata.set(Metadata.RESOURCE_NAME_KEY, name);
if(type != null)
metadata.set(Metadata.CONTENT_TYPE, type);
if (embeddedExtractor.shouldParseEmbedded(metadata)) {
embeddedExtractor.parseEmbedded(
TikaInputStream.get(contents),
new EmbeddedContentHandler(handler),
metadata, false);
}
}
示例13: handleEmbeddedResource
import org.apache.tika.io.TikaInputStream; //導入依賴的package包/類
protected void handleEmbeddedResource(TikaInputStream resource, String filename,
String relationshipID, String mediaType, XHTMLContentHandler xhtml,
boolean outputHtml)
throws IOException, SAXException, TikaException {
try {
Metadata metadata = new Metadata();
if (filename != null) {
metadata.set(Metadata.TIKA_MIME_FILE, filename);
metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
}
if (relationshipID != null) {
metadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, relationshipID);
}
if (mediaType != null) {
metadata.set(Metadata.CONTENT_TYPE, mediaType);
}
if (extractor.shouldParseEmbedded(metadata)) {
extractor.parseEmbedded(resource, xhtml, metadata, outputHtml);
}
} finally {
resource.close();
}
}
示例14: handleEmbeddedFile
import org.apache.tika.io.TikaInputStream; //導入依賴的package包/類
/**
* Handles an embedded file in the document
*/
protected void handleEmbeddedFile(PackagePart part, ContentHandler handler, String rel)
throws SAXException, IOException {
Metadata metadata = new Metadata();
metadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, rel);
// Get the name
String name = part.getPartName().getName();
metadata.set(
Metadata.RESOURCE_NAME_KEY,
name.substring(name.lastIndexOf('/') + 1));
// Get the content type
metadata.set(
Metadata.CONTENT_TYPE, part.getContentType());
// Call the recursing handler
if (embeddedExtractor.shouldParseEmbedded(metadata)) {
embeddedExtractor.parseEmbedded(
TikaInputStream.get(part.getInputStream()),
new EmbeddedContentHandler(handler),
metadata, false);
}
}
示例15: parse
import org.apache.tika.io.TikaInputStream; //導入依賴的package包/類
public void parse(
InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
TemporaryResources tmp = new TemporaryResources();
try {
TikaInputStream tis = TikaInputStream.get(stream, tmp);
new ImageMetadataExtractor(metadata).parseTiff(tis.getFile());
new JempboxExtractor(metadata).parse(tis);
} finally {
tmp.dispose();
}
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
xhtml.endDocument();
}