本文整理匯總了Java中org.apache.tika.io.TikaInputStream.get方法的典型用法代碼示例。如果您正苦於以下問題:Java TikaInputStream.get方法的具體用法?Java TikaInputStream.get怎麽用?Java TikaInputStream.get使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.tika.io.TikaInputStream
的用法示例。
在下文中一共展示了TikaInputStream.get方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: parse
import org.apache.tika.io.TikaInputStream; //導入方法依賴的package包/類
public void parse(
InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
TemporaryResources tmp = new TemporaryResources();
try {
TikaInputStream tis = TikaInputStream.get(stream, tmp);
new ImageMetadataExtractor(metadata).parseJpeg(tis.getFile());
new JempboxExtractor(metadata).parse(tis);
} finally {
tmp.dispose();
}
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
xhtml.endDocument();
}
示例2: getFullText
import org.apache.tika.io.TikaInputStream; //導入方法依賴的package包/類
private static String getFullText(final String filepath) throws IOException, SAXException, TikaException {
StringWriter writer = new StringWriter();
final TikaInputStream inputStream = TikaInputStream.get(new File(filepath));
try {
final Detector detector = new DefaultDetector();
final Parser parser = new AutoDetectParser(detector);
final Metadata metadata = new Metadata();
final ParseContext parseContext = new ParseContext();
parseContext.set(Parser.class, parser);
ContentHandler contentHandler = new BodyContentHandler(writer);
parser.parse(inputStream, contentHandler, metadata, parseContext);
}
finally {
inputStream.close();
}
return writer.toString();
}
示例3: TikaProperties
import org.apache.tika.io.TikaInputStream; //導入方法依賴的package包/類
public TikaProperties(File file) throws IOException, SAXException,
TikaException {
TikaInputStream tikaStream = TikaInputStream.get(file);
metadata = new Metadata();
ContentHandler handler = new DefaultHandler();
Parser parser = new AutoDetectParser();
ParseContext context = new ParseContext();
try {
parser.parse(tikaStream, handler, metadata, context);
} finally {
try {
tikaStream.close();
} catch (Exception ie) {
// ignore
}
}
}
示例4: getMimeType
import org.apache.tika.io.TikaInputStream; //導入方法依賴的package包/類
public static String getMimeType(byte[] data, String fileName) throws Throwable {
TikaInputStream tikaStream = null;
Metadata metadata = new Metadata();
metadata.add(Metadata.RESOURCE_NAME_KEY, fileName);
try {
tikaStream = TikaInputStream.get(data, metadata);
return detector.detect(tikaStream, metadata).toString();
} catch (Throwable t) {
throw t;
} finally {
if (tikaStream != null) {
try {
tikaStream.close();
} catch (IOException e) {
}
}
}
}
示例5: getMimeType
import org.apache.tika.io.TikaInputStream; //導入方法依賴的package包/類
public String getMimeType() throws IOException
{
TikaInputStream tikaIS = null;
try
{
tikaIS = TikaInputStream.get(file);
return new DefaultDetector(MimeTypes.getDefaultMimeTypes()).detect(tikaIS, new Metadata()).toString();
} finally
{
if (tikaIS != null)
{
tikaIS.close();
}
}
}
示例6: create
import org.apache.tika.io.TikaInputStream; //導入方法依賴的package包/類
@RequestMapping(method = POST)
@PreAuthorize("isAuthenticated()")
@ResponseBody
@ResponseStatus(CREATED)
public String create(
@RequestParam("assetData") final MultipartFile assetData
) throws IOException {
// Check duplicates
final GridFSDBFile file = this.gridFs.findOne(Query.query(Criteria.where("filename").is(assetData.getOriginalFilename())));
if (file != null) {
throw new DataIntegrityViolationException(String.format("Asset with name '%s' already exists", assetData.getOriginalFilename()));
} else {
try (InputStream usedStream = TikaInputStream.get(assetData.getInputStream())) {
MediaType mediaType = null;
try {
mediaType = MediaType.parse(tika.detect(usedStream, assetData.getOriginalFilename()));
} catch (IOException e) {
log.warn("Could not detect content type", e);
}
this.gridFs.store(assetData.getInputStream(), assetData.getOriginalFilename(), Optional.ofNullable(mediaType).map(MediaType::toString).orElse(null));
return assetData.getOriginalFilename();
}
}
}
示例7: detectMimeTypes
import org.apache.tika.io.TikaInputStream; //導入方法依賴的package包/類
@Nonnull
@Override
public List<String> detectMimeTypes(final InputStream input, @Nullable final String fileName) throws IOException {
checkNotNull(input);
List<String> detected = Lists.newArrayList();
Metadata metadata = new Metadata();
if (fileName != null) {
metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
}
MediaType mediaType;
try (final TikaInputStream tis = TikaInputStream.get(input)) {
mediaType = detector.detect(tis, metadata);
}
// unravel to least specific
unravel(detected, mediaType);
if (detected.isEmpty()) {
detected.add(MimeTypes.OCTET_STREAM);
}
return detected;
}
示例8: parseEmbedded
import org.apache.tika.io.TikaInputStream; //導入方法依賴的package包/類
@Override
public void parseEmbedded(final InputStream input, final ContentHandler handler, final Metadata metadata,
final boolean outputHtml) throws SAXException, IOException {
// There's no need to spawn inline embeds, like images in PDFs. These should be concatenated to the main
// document as usual.
if (TikaCoreProperties.EmbeddedResourceType.INLINE.toString().equals(metadata
.get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE))) {
final ContentHandler embedHandler = new EmbeddedContentHandler(new BodyContentHandler(handler));
if (outputHtml) {
writeStart(handler, metadata);
}
delegateParsing(input, embedHandler, metadata);
if (outputHtml) {
writeEnd(handler);
}
} else {
try (final TikaInputStream tis = TikaInputStream.get(input)) {
spawnEmbedded(tis, metadata);
}
}
}
示例9: extractImageLinks
import org.apache.tika.io.TikaInputStream; //導入方法依賴的package包/類
/**
* To extract image links form a URL. Needs Improvement
* @param url
* @return
* @throws IOException
* @throws SAXException
* @throws TikaException
*/
public Object extractImageLinks(String url) throws IOException, SAXException, TikaException {
Set<String> imageLinks = new HashSet<String>();
InputStream is = null;
try {
is = TikaInputStream.get(new URL(url).openStream());
Metadata metadata = new Metadata();
LinkContentHandler handler = new LinkContentHandler();
AutoDetectParser parser = new AutoDetectParser();
parser.parse(is, handler, metadata);
List<Link> links = handler.getLinks();
Iterator<Link> iter = links.iterator();
while(iter.hasNext()) {
Link link = iter.next();
if(link.isImage())
imageLinks.add(link.getUri());
}
}
finally {
is.close();
}
return imageLinks.toArray();
}
示例10: parse
import org.apache.tika.io.TikaInputStream; //導入方法依賴的package包/類
public void parse(
InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
TemporaryResources tmp = new TemporaryResources();
try {
TikaInputStream tis = TikaInputStream.get(stream, tmp);
new ImageMetadataExtractor(metadata).parseTiff(tis.getFile());
new JempboxExtractor(metadata).parse(tis);
} finally {
tmp.dispose();
}
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
xhtml.endDocument();
}
示例11: testEmbeddedFilesInChildren
import org.apache.tika.io.TikaInputStream; //導入方法依賴的package包/類
public void testEmbeddedFilesInChildren() throws Exception {
String xml = getXML("/testPDF_childAttachments.pdf").xml;
//"regressiveness" exists only in Unit10.doc not in the container pdf document
assertTrue(xml.contains("regressiveness"));
TrackingHandler tracker = new TrackingHandler();
TikaInputStream tis = null;
ContainerExtractor ex = new ParserContainerExtractor();
try{
tis= TikaInputStream.get(
getResourceAsStream("/test-documents/testPDF_childAttachments.pdf"));
ex.extract(tis, ex, tracker);
} finally {
if (tis != null){
tis.close();
}
}
assertEquals(2, tracker.filenames.size());
assertEquals(2, tracker.mediaTypes.size());
assertEquals("Press Quality(1).joboptions", tracker.filenames.get(0));
assertEquals("Unit10.doc", tracker.filenames.get(1));
assertEquals(TYPE_TEXT, tracker.mediaTypes.get(0));
assertEquals(TYPE_DOC, tracker.mediaTypes.get(1));
}
示例12: getImgInfo
import org.apache.tika.io.TikaInputStream; //導入方法依賴的package包/類
public Metadata getImgInfo(String urlStr) throws Exception {
if (StringUtils.isBlank(urlStr)) return null;
URL url = new URL(urlStr);
Metadata md = new Metadata();
StringWriter htmlBuffer = new StringWriter();
Parser parser = new AutoDetectParser();
imageParser= new ImageSavingParser(parser);
TikaInputStream stream = TikaInputStream.get(url, md);
try {
ContentHandler handler = new TeeContentHandler(getHtmlHandler(htmlBuffer));
parser.parse(stream, handler, md, new ParseContext());
} finally {
stream.close();
}
return md;
}
示例13: detect
import org.apache.tika.io.TikaInputStream; //導入方法依賴的package包/類
/**
* Reads an input stream and checks if it has a CSV format.
*
* The general contract of a detector is to not close the specified stream before returning. It is to the
* responsibility of the caller to close it. The detector should leverage the mark/reset feature of the specified
* {@see TikaInputStream} in order to let the stream always return the same bytes.
*
* @param metadata the specified TIKA {@link Metadata}
* @param inputStream the specified input stream
* @return either null or an CSV format
* @throws IOException
*/
@Override
public Format detect(Metadata metadata, TikaInputStream inputStream) throws IOException {
Format result = detectText(metadata, inputStream);
if (result == null) {
inputStream.mark(FormatUtils.META_TAG_BUFFER_SIZE);
byte[] buffer = new byte[FormatUtils.META_TAG_BUFFER_SIZE];
int n = 0;
for (int m = inputStream.read(buffer); m != -1
&& n < buffer.length; m = inputStream.read(buffer, n, buffer.length - n)) {
n += m;
}
inputStream.reset();
String head = FormatUtils.readFromBuffer(buffer, 0, n);
try (InputStream stream = TikaInputStream.get(IOUtils.toInputStream(head))) {
result = detectText(new Metadata(), stream);
}
}
return result;
}
示例14: createInputStream
import org.apache.tika.io.TikaInputStream; //導入方法依賴的package包/類
private static TikaInputStream createInputStream(String uri, Metadata metadata) throws FileNotFoundException, MalformedURLException, IOException {
InputStream inputStream;
if (uri.startsWith("http://") || uri.startsWith("https://") || uri.startsWith("ftp://")) {
final URLConnection urlConnection = new URL(uri).openConnection();
// If a metadata object was passed, fill it with the content-type returned from the server.
if (metadata != null) {
fillMetadata(metadata, urlConnection.getContentType());
}
inputStream = urlConnection.getInputStream();
} else {
inputStream = new FileInputStream(uri);
}
return TikaInputStream.get(inputStream);
}
示例15: parseEntry
import org.apache.tika.io.TikaInputStream; //導入方法依賴的package包/類
private boolean parseEntry(ArchiveInputStream archive, ArchiveEntry entry, EmbeddedExtractor extractor, Record record) {
String name = entry.getName();
if (archive.canReadEntryData(entry)) {
Record entrydata = new Record(); // TODO: or pass myself?
//Record entrydata = record.copy();
// For detectors to work, we need a mark/reset supporting
// InputStream, which ArchiveInputStream isn't, so wrap
TemporaryResources tmp = new TemporaryResources();
try {
TikaInputStream tis = TikaInputStream.get(archive, tmp);
return extractor.parseEmbedded(tis, entrydata, name, getChild());
} finally {
try {
tmp.dispose();
} catch (TikaException e) {
LOG.warn("Cannot dispose of tmp Tika resources", e);
}
}
} else {
return false;
}
}