本文整理汇总了Java中org.apache.tika.detect.Detector类的典型用法代码示例。如果您正苦于以下问题:Java Detector类的具体用法?Java Detector怎么用?Java Detector使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Detector类属于org.apache.tika.detect包,在下文中一共展示了Detector类的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: findMediaType
import org.apache.tika.detect.Detector; //导入依赖的package包/类
/**
* Finds media type (through Apache Tika library), based on filename and magic numbers.
* @throws IOException
*/
public static MediaType findMediaType(InputStream is, String fileName) throws IOException {
BufferedInputStream bis = new BufferedInputStream(is);
try {
AutoDetectParser parser = new AutoDetectParser();
Detector detector = parser.getDetector();
Metadata md = new Metadata();
md.add(Metadata.RESOURCE_NAME_KEY, fileName);
MediaType mediaType = detector.detect(bis, md);
return mediaType;
} finally {
try {
bis.close();
} catch (IOException e) {
;
}
}
}
示例2: getFullText
import org.apache.tika.detect.Detector; //导入依赖的package包/类
private static String getFullText(final String filepath) throws IOException, SAXException, TikaException {
StringWriter writer = new StringWriter();
final TikaInputStream inputStream = TikaInputStream.get(new File(filepath));
try {
final Detector detector = new DefaultDetector();
final Parser parser = new AutoDetectParser(detector);
final Metadata metadata = new Metadata();
final ParseContext parseContext = new ParseContext();
parseContext.set(Parser.class, parser);
ContentHandler contentHandler = new BodyContentHandler(writer);
parser.parse(inputStream, contentHandler, metadata, parseContext);
}
finally {
inputStream.close();
}
return writer.toString();
}
示例3: detectContentType
import org.apache.tika.detect.Detector; //导入依赖的package包/类
public static String detectContentType(String uri) throws FileNotFoundException, IOException, TikaException {
final Detector detector = config.getDetector();
final TikaInputStream inputStream = createInputStream(uri);
final Metadata metadata = new Metadata();
// Set the file name. This provides some level of type-hinting.
metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName());
// Detect the content type.
String contentType = detector.detect(inputStream, metadata).toString();
inputStream.close();
// Return the default content-type if undetermined.
if (contentType == null || contentType.isEmpty()) {
return MediaType.OCTET_STREAM.toString();
}
return contentType;
}
示例4: isImage
import org.apache.tika.detect.Detector; //导入依赖的package包/类
/**
* Check is input file is image
*
* @param fileContent
*/
public static boolean isImage(byte[] fileContent) {
AutoDetectParser parser = new AutoDetectParser();
Detector detector = parser.getDetector();
MediaType mediaType;
try {
mediaType = detector.detect(TikaInputStream.get(fileContent), new Metadata());
} catch (Exception e) {
logger.error("Unable to read file content.", e);
throw new InternalReportPortalClientException("Unable to read file content.", e);
}
return mediaType.toString().contains(IMAGE_TYPE);
}
示例5: getMimeType
import org.apache.tika.detect.Detector; //导入依赖的package包/类
private static MediaType getMimeType(InputStream stream, Metadata md) throws IOException {
MediaType mediaType;
try (BufferedInputStream bis = new BufferedInputStream(stream)) {
AutoDetectParser parser = new AutoDetectParser();
Detector detector = parser.getDetector();
mediaType = detector.detect(bis, md);
}
return mediaType;
}
示例6: AbstractPOIFSExtractor
import org.apache.tika.detect.Detector; //导入依赖的package包/类
protected AbstractPOIFSExtractor(ParseContext context) {
EmbeddedDocumentExtractor ex = context.get(EmbeddedDocumentExtractor.class);
if (ex == null) {
this.extractor = new ParsingEmbeddedDocumentExtractor(context);
} else {
this.extractor = ex;
}
tikaConfig = context.get(TikaConfig.class);
mimeTypes = context.get(MimeTypes.class);
detector = context.get(Detector.class);
}
示例7: getType
import org.apache.tika.detect.Detector; //导入依赖的package包/类
/**
* Returns a data type.
*/
public final MediaType getType(final String alias)
throws StorageIOException{
try (InputStream in = this.getInputStream(alias)) {
AutoDetectParser parser = new AutoDetectParser();
Detector detector = parser.getDetector();
Metadata md = new Metadata();
md.add(Metadata.RESOURCE_NAME_KEY, alias);
return detector.detect(in, md);
} catch (IOException exc) {
throw new StorageIOException(alias, exc);
}
}
示例8: detectContentTypeAndCharset
import org.apache.tika.detect.Detector; //导入依赖的package包/类
public static String detectContentTypeAndCharset(String uri) throws FileNotFoundException, IOException, TikaException {
final Detector detector = config.getDetector();
final TikaInputStream inputStream = createInputStream(uri);
final Metadata metadata = new Metadata();
// Set the file name. This provides some level of type-hinting.
metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName());
// Detect the content type.
String contentType = detector.detect(inputStream, metadata).toString();
// Use metadata to provide type-hinting to the AutoDetectReader.
fillMetadata(metadata, contentType, uri);
// Detect the character set.
final AutoDetectReader reader = new AutoDetectReader(inputStream, metadata);
String charset = reader.getCharset().toString();
inputStream.close();
// Return the default content-type if undetermined.
if (contentType == null || contentType.isEmpty()) {
return MediaType.OCTET_STREAM.toString();
}
// Append the charset if the content-type was determined.
if (charset != null && !charset.isEmpty()) {
return contentType + "; charset=" + charset;
}
return contentType;
}
示例9: doGet
import org.apache.tika.detect.Detector; //导入依赖的package包/类
protected void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
HttpSession session = request.getSession();
// Pulls the candidateID of the candidate to retrieve photo of
int candidateID = (int) session.getAttribute("candidateID");
// Creates EntityManager to query database
EntityManager em = EMFUtil.getEMFactory().createEntityManager();
// Retrieves user from database based on userID
Candidates candidate = em.find(Candidates.class, candidateID);
// Retrieves resume from candidate's profile.
byte[] pictureBlob = candidate.getPhoto();
// If photo exists
if (pictureBlob != null) {
// Uses APACHE Tika api to obtain MIMETYPE
String mimeType = "";
MimeTypes allTypes = MimeTypes.getDefaultMimeTypes();
final Detector DETECTOR = new DefaultDetector(allTypes);
MimeType extension = null;
TikaInputStream tikaIS = null;
try {
tikaIS = TikaInputStream.get(pictureBlob);
final Metadata metadata = new Metadata();
mimeType = DETECTOR.detect(tikaIS, metadata).toString();
extension = allTypes.forName(mimeType);
} catch (Exception e) {
e.printStackTrace();
System.out.println("Error getting MIME type");
}
// Tells web-page to prepare and download a picture file
response.setContentType(mimeType);
response.setContentLength(pictureBlob.length);
response.getOutputStream().write(pictureBlob);
response.setHeader("Content-Disposition", "attachment;filename=" + candidateID + extension);
}
}
示例10: getDetector
import org.apache.tika.detect.Detector; //导入依赖的package包/类
protected Detector getDetector() {
if (detector != null) return detector;
detector = getTikaConfig().getDetector();
return detector;
}
示例11: main
import org.apache.tika.detect.Detector; //导入依赖的package包/类
/**
* Main function.
*/
public static void main(String[] args) {
try {
// Tika tika = new Tika();
// File xpsFile = new File("/home/foo/a/temp/xlsx.xlsx");
// InputStream inputStream = new FileInputStream(xpsFile);
// String FileName = xpsFile.getName();
// Metadata metadata = new Metadata();
// if (FileName != null && FileName.length() > 0)
// metadata.add(Metadata.RESOURCE_NAME_KEY, FileName);
// String MimeType = tika.detect(inputStream, metadata);
// metadata.add(Metadata.CONTENT_TYPE, MimeType);
// inputStream.close();
// inputStream = new FileInputStream(xpsFile);
// Reader reader = tika.parse(inputStream, metadata);
// String content = IOUtils.toString(reader);
// System.out.println(new AutoDetectParser().getParsers().keySet());
// System.out.println("shit: " + tika.getParser() + " " + MimeType);
// System.out.println(content);
// inputStream.close();
ClassLoader loader = Thread.currentThread().getContextClassLoader();
TikaConfig config = new TikaConfig(new File("/home/foo/a/code/big_bang/tika-1.5/"
+ "tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml"));
final AutoDetectParser autoDetectParser = new AutoDetectParser(config);
final Detector detector = config.getDetector();
final Tika tika = new Tika();
File xpsFile = new File("/home/foo/a/temp/xlsx.xlsx");
InputStream inputStream = new FileInputStream(xpsFile);
String FileName = xpsFile.getName();
Metadata metadata = new Metadata();
if (FileName != null && FileName.length() > 0)
metadata.add(Metadata.RESOURCE_NAME_KEY, FileName);
String MimeType = tika.detect(inputStream, metadata);
// metadata.add(Metadata.CONTENT_TYPE, MimeType);
// ContentHandler handler = new XHTMLContentHandler(System.out);
// ContentHandler bch = new BodyContentHandler(System.out);
// ContentHandler handler = new BodyContentHandler();
// ContentHandler xhtml = new XHTMLContentHandler(handler,
// metadata);
StringWriter sw = new StringWriter();
SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
TransformerHandler handler = factory.newTransformerHandler();
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "no");
handler.setResult(new StreamResult(sw));
BodyContentHandler bch = new BodyContentHandler(handler);
handler.startDocument();
inputStream.close();
inputStream = new FileInputStream(xpsFile);
autoDetectParser.parse(inputStream, bch, metadata);
String x = sw.toString();
System.out.println(x);
// Document doc = Jsoup.parse(x);
// Elements elements = doc.getElementsByTag("p");
// for (Element element : elements) {
// System.out.println(element.text());
// }
} catch (Exception e) {
e.printStackTrace();
}
}
示例12: testExcelXLSB
import org.apache.tika.detect.Detector; //导入依赖的package包/类
/**
* We don't currently support the .xlsb file format
* (an OOXML container with binary blobs), but we
* shouldn't break on these files either (TIKA-826)
*/
@Test
public void testExcelXLSB() throws Exception {
Detector detector = new DefaultDetector();
AutoDetectParser parser = new AutoDetectParser();
InputStream input = ExcelParserTest.class.getResourceAsStream(
"/test-documents/testEXCEL.xlsb");
Metadata m = new Metadata();
m.add(Metadata.RESOURCE_NAME_KEY, "excel.xlsb");
// Should be detected correctly
MediaType type = null;
try {
type = detector.detect(input, m);
assertEquals("application/vnd.ms-excel.sheet.binary.macroenabled.12", type.toString());
} finally {
input.close();
}
// OfficeParser won't handle it
assertEquals(false, (new OfficeParser()).getSupportedTypes(new ParseContext()).contains(type));
// OOXMLParser won't handle it
assertEquals(false, (new OOXMLParser()).getSupportedTypes(new ParseContext()).contains(type));
// AutoDetectParser doesn't break on it
input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL.xlsb");
try {
ContentHandler handler = new BodyContentHandler(-1);
ParseContext context = new ParseContext();
context.set(Locale.class, Locale.US);
parser.parse(input, handler, m, context);
String content = handler.toString();
assertEquals("", content);
} finally {
input.close();
}
}
示例13: testExcel95
import org.apache.tika.detect.Detector; //导入依赖的package包/类
/**
* We don't currently support the old Excel 95 .xls file format,
* but we shouldn't break on these files either (TIKA-976)
*/
@Test
public void testExcel95() throws Exception {
Detector detector = new DefaultDetector();
AutoDetectParser parser = new AutoDetectParser();
InputStream input = ExcelParserTest.class.getResourceAsStream(
"/test-documents/testEXCEL_95.xls");
Metadata m = new Metadata();
m.add(Metadata.RESOURCE_NAME_KEY, "excel_95.xls");
// Should be detected correctly
MediaType type = null;
try {
type = detector.detect(input, m);
assertEquals("application/vnd.ms-excel", type.toString());
} finally {
input.close();
}
// OfficeParser will claim to handle it
assertEquals(true, (new OfficeParser()).getSupportedTypes(new ParseContext()).contains(type));
// OOXMLParser won't handle it
assertEquals(false, (new OOXMLParser()).getSupportedTypes(new ParseContext()).contains(type));
// AutoDetectParser doesn't break on it
input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_95.xls");
try {
ContentHandler handler = new BodyContentHandler(-1);
ParseContext context = new ParseContext();
context.set(Locale.class, Locale.US);
parser.parse(input, handler, m, context);
String content = handler.toString();
assertEquals("", content);
} finally {
input.close();
}
}
示例14: getDetector
import org.apache.tika.detect.Detector; //导入依赖的package包/类
protected Detector getDetector() {
return detector;
}