当前位置: 首页>>代码示例>>Java>>正文


Java Detector类代码示例

本文整理汇总了Java中org.apache.tika.detect.Detector的典型用法代码示例。如果您正苦于以下问题:Java Detector类的具体用法?Java Detector怎么用?Java Detector使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


Detector类属于org.apache.tika.detect包,在下文中一共展示了Detector类的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: findMediaType

import org.apache.tika.detect.Detector; //导入依赖的package包/类
/**
 * Finds media type (through Apache Tika library), based on filename and magic numbers.
 * @throws IOException
 */
public static MediaType findMediaType(InputStream is, String fileName) throws IOException {
    BufferedInputStream bis = new BufferedInputStream(is);
    try {
        AutoDetectParser parser = new AutoDetectParser();
        Detector detector = parser.getDetector();
        Metadata md = new Metadata();
        md.add(Metadata.RESOURCE_NAME_KEY, fileName);
        MediaType mediaType = detector.detect(bis, md);
        return mediaType;
    } finally {
        try {
            bis.close();
        } catch (IOException e) {
            ;
        }
    }
}
 
开发者ID:ilscipio,项目名称:scipio-erp,代码行数:22,代码来源:TikaUtil.java

示例2: getFullText

import org.apache.tika.detect.Detector; //导入依赖的package包/类
private static String getFullText(final String filepath) throws IOException, SAXException, TikaException {
    StringWriter writer = new StringWriter();
    
    final TikaInputStream inputStream =  TikaInputStream.get(new File(filepath));
    try {
        final Detector detector = new DefaultDetector();
        final Parser parser = new AutoDetectParser(detector);

        final Metadata metadata = new Metadata();
        final ParseContext parseContext = new ParseContext();
        parseContext.set(Parser.class, parser);

        
        ContentHandler contentHandler = new BodyContentHandler(writer);
        parser.parse(inputStream, contentHandler, metadata, parseContext);
    }
    finally {
        inputStream.close();
    }
    
    return writer.toString();
}
 
开发者ID:CoEIA,项目名称:DEM,代码行数:23,代码来源:ItemFactory.java

示例3: detectContentType

import org.apache.tika.detect.Detector; //导入依赖的package包/类
public static String detectContentType(String uri) throws FileNotFoundException, IOException, TikaException {
	final Detector detector = config.getDetector();
	final TikaInputStream inputStream = createInputStream(uri);
	final Metadata metadata = new Metadata();

	// Set the file name. This provides some level of type-hinting.
	metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName());

	// Detect the content type.
	String contentType = detector.detect(inputStream, metadata).toString();

	inputStream.close();

	// Return the default content-type if undetermined.
	if (contentType == null || contentType.isEmpty()) {
		return MediaType.OCTET_STREAM.toString();
	}

	return contentType;
}
 
开发者ID:ICIJ,项目名称:node-tika,代码行数:21,代码来源:NodeTika.java

示例4: isImage

import org.apache.tika.detect.Detector; //导入依赖的package包/类
/**
 * Check is input file is image
 * 
 * @param fileContent
 */
public static boolean isImage(byte[] fileContent) {

	AutoDetectParser parser = new AutoDetectParser();
	Detector detector = parser.getDetector();
	MediaType mediaType;
	try {
		mediaType = detector.detect(TikaInputStream.get(fileContent), new Metadata());
	} catch (Exception e) {
		logger.error("Unable to read file content.", e);
		throw new InternalReportPortalClientException("Unable to read file content.", e);
	}
	return mediaType.toString().contains(IMAGE_TYPE);
}
 
开发者ID:reportportal,项目名称:client-java-core,代码行数:19,代码来源:ImageConverter.java

示例5: getMimeType

import org.apache.tika.detect.Detector; //导入依赖的package包/类
private static MediaType getMimeType(InputStream stream, Metadata md) throws IOException {
	MediaType mediaType;
	try (BufferedInputStream bis = new BufferedInputStream(stream)) {
		AutoDetectParser parser = new AutoDetectParser();
		Detector detector = parser.getDetector();
		mediaType = detector.detect(bis, md);
	}
	return mediaType;
}
 
开发者ID:U-QASAR,项目名称:u-qasar.platform,代码行数:10,代码来源:FileUploadUtil.java

示例6: AbstractPOIFSExtractor

import org.apache.tika.detect.Detector; //导入依赖的package包/类
protected AbstractPOIFSExtractor(ParseContext context) {
  EmbeddedDocumentExtractor ex = context.get(EmbeddedDocumentExtractor.class);

  if (ex == null) {
    this.extractor = new ParsingEmbeddedDocumentExtractor(context);
  } else {
    this.extractor = ex;
  }

  tikaConfig = context.get(TikaConfig.class);
  mimeTypes = context.get(MimeTypes.class);
  detector = context.get(Detector.class);
}
 
开发者ID:kolbasa,项目名称:OCRaptor,代码行数:14,代码来源:AbstractPOIFSExtractor.java

示例7: getType

import org.apache.tika.detect.Detector; //导入依赖的package包/类
/**
 * Returns a data type.
 */
public final MediaType getType(final String alias)
		throws StorageIOException{
	try (InputStream in = this.getInputStream(alias)) {
	    AutoDetectParser parser = new AutoDetectParser();
	    Detector detector = parser.getDetector();
	    Metadata md = new Metadata();
	    md.add(Metadata.RESOURCE_NAME_KEY, alias);
	    return detector.detect(in, md);
	} catch (IOException exc) {
		throw new StorageIOException(alias, exc);
	}
}
 
开发者ID:SHAF-WORK,项目名称:shaf,代码行数:16,代码来源:AbstractStorageDriver.java

示例8: detectContentTypeAndCharset

import org.apache.tika.detect.Detector; //导入依赖的package包/类
public static String detectContentTypeAndCharset(String uri) throws FileNotFoundException, IOException, TikaException {
	final Detector detector = config.getDetector();
	final TikaInputStream inputStream = createInputStream(uri);
	final Metadata metadata = new Metadata();

	// Set the file name. This provides some level of type-hinting.
	metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName());

	// Detect the content type.
	String contentType = detector.detect(inputStream, metadata).toString();

	// Use metadata to provide type-hinting to the AutoDetectReader.
	fillMetadata(metadata, contentType, uri);

	// Detect the character set.
	final AutoDetectReader reader = new AutoDetectReader(inputStream, metadata);
	String charset = reader.getCharset().toString();

	inputStream.close();

	// Return the default content-type if undetermined.
	if (contentType == null || contentType.isEmpty()) {
		return MediaType.OCTET_STREAM.toString();
	}

	// Append the charset if the content-type was determined.
	if (charset != null && !charset.isEmpty()) {
		return contentType + "; charset=" + charset;
	}

	return contentType;
}
 
开发者ID:ICIJ,项目名称:node-tika,代码行数:33,代码来源:NodeTika.java

示例9: doGet

import org.apache.tika.detect.Detector; //导入依赖的package包/类
protected void doGet(HttpServletRequest request, HttpServletResponse response)
		throws ServletException, IOException {

	HttpSession session = request.getSession();

	// Pulls the candidateID of the candidate to retrieve photo of
	int candidateID = (int) session.getAttribute("candidateID");

	// Creates EntityManager to query database
	EntityManager em = EMFUtil.getEMFactory().createEntityManager();

	// Retrieves user from database based on userID
	Candidates candidate = em.find(Candidates.class, candidateID);

	// Retrieves resume from candidate's profile.
	byte[] pictureBlob = candidate.getPhoto();

	// If photo exists
	if (pictureBlob != null) {

		// Uses APACHE Tika api to obtain MIMETYPE
		String mimeType = "";
		MimeTypes allTypes = MimeTypes.getDefaultMimeTypes();
		final Detector DETECTOR = new DefaultDetector(allTypes);
		MimeType extension = null;

		TikaInputStream tikaIS = null;
		try {
			tikaIS = TikaInputStream.get(pictureBlob);
			final Metadata metadata = new Metadata();
			mimeType = DETECTOR.detect(tikaIS, metadata).toString();
			extension = allTypes.forName(mimeType);
		} catch (Exception e) {
			e.printStackTrace();
			System.out.println("Error getting MIME type");
		}

		// Tells web-page to prepare and download a picture file
		response.setContentType(mimeType);
		response.setContentLength(pictureBlob.length);
		response.getOutputStream().write(pictureBlob);
		response.setHeader("Content-Disposition", "attachment;filename=" + candidateID + extension);
	}
}
 
开发者ID:faizan-ali,项目名称:full-javaee-app,代码行数:45,代码来源:PictureRetrieverServlet.java

示例10: getDetector

import org.apache.tika.detect.Detector; //导入依赖的package包/类
protected Detector getDetector() {
  if (detector != null) return detector;

  detector = getTikaConfig().getDetector();
  return detector;
}
 
开发者ID:kolbasa,项目名称:OCRaptor,代码行数:7,代码来源:AbstractPOIFSExtractor.java

示例11: main

import org.apache.tika.detect.Detector; //导入依赖的package包/类
/**
 * Main function.
 */
public static void main(String[] args) {

        try {
                // Tika tika = new Tika();
                // File xpsFile = new File("/home/foo/a/temp/xlsx.xlsx");
                // InputStream inputStream = new FileInputStream(xpsFile);
                // String FileName = xpsFile.getName();
                // Metadata metadata = new Metadata();
                // if (FileName != null && FileName.length() > 0)
                // metadata.add(Metadata.RESOURCE_NAME_KEY, FileName);
                // String MimeType = tika.detect(inputStream, metadata);

                // metadata.add(Metadata.CONTENT_TYPE, MimeType);
                // inputStream.close();
                // inputStream = new FileInputStream(xpsFile);
                // Reader reader = tika.parse(inputStream, metadata);
                // String content = IOUtils.toString(reader);

                // System.out.println(new AutoDetectParser().getParsers().keySet());
                // System.out.println("shit: " + tika.getParser() + " " + MimeType);
                // System.out.println(content);
                // inputStream.close();

                ClassLoader loader = Thread.currentThread().getContextClassLoader();
                TikaConfig config = new TikaConfig(new File("/home/foo/a/code/big_bang/tika-1.5/"
                                + "tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml"));

                final AutoDetectParser autoDetectParser = new AutoDetectParser(config);

                final Detector detector = config.getDetector();
                final Tika tika = new Tika();


                File xpsFile = new File("/home/foo/a/temp/xlsx.xlsx");
                InputStream inputStream = new FileInputStream(xpsFile);
                String FileName = xpsFile.getName();
                Metadata metadata = new Metadata();
                if (FileName != null && FileName.length() > 0)
                        metadata.add(Metadata.RESOURCE_NAME_KEY, FileName);

                String MimeType = tika.detect(inputStream, metadata);
                // metadata.add(Metadata.CONTENT_TYPE, MimeType);
                // ContentHandler handler = new XHTMLContentHandler(System.out);

                // ContentHandler bch = new BodyContentHandler(System.out);
                // ContentHandler handler = new BodyContentHandler();
                // ContentHandler xhtml = new XHTMLContentHandler(handler,
                // metadata);

                StringWriter sw = new StringWriter();

                SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
                TransformerHandler handler = factory.newTransformerHandler();
                handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
                handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "no");
                handler.setResult(new StreamResult(sw));
                BodyContentHandler bch = new BodyContentHandler(handler);
                handler.startDocument();
                inputStream.close();
                inputStream = new FileInputStream(xpsFile);
                autoDetectParser.parse(inputStream, bch, metadata);
                String x = sw.toString();
                System.out.println(x);

                // Document doc = Jsoup.parse(x);

                // Elements elements = doc.getElementsByTag("p");
                // for (Element element : elements) {
                //         System.out.println(element.text());
                // }

        } catch (Exception e) {
                e.printStackTrace();
        }
}
 
开发者ID:kolbasa,项目名称:OCRaptor,代码行数:79,代码来源:OOXMLParser.java

示例12: testExcelXLSB

import org.apache.tika.detect.Detector; //导入依赖的package包/类
/**
 * We don't currently support the .xlsb file format 
 *  (an OOXML container with binary blobs), but we 
 *  shouldn't break on these files either (TIKA-826)  
 */
@Test
public void testExcelXLSB() throws Exception {
   Detector detector = new DefaultDetector();
   AutoDetectParser parser = new AutoDetectParser();
   
   InputStream input = ExcelParserTest.class.getResourceAsStream(
         "/test-documents/testEXCEL.xlsb");
   Metadata m = new Metadata();
   m.add(Metadata.RESOURCE_NAME_KEY, "excel.xlsb");
   
   // Should be detected correctly
   MediaType type = null;
   try {
      type = detector.detect(input, m);
      assertEquals("application/vnd.ms-excel.sheet.binary.macroenabled.12", type.toString());
   } finally {
      input.close();
   }
   
   // OfficeParser won't handle it
   assertEquals(false, (new OfficeParser()).getSupportedTypes(new ParseContext()).contains(type));
   
   // OOXMLParser won't handle it
   assertEquals(false, (new OOXMLParser()).getSupportedTypes(new ParseContext()).contains(type));
   
   // AutoDetectParser doesn't break on it
   input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL.xlsb");

   try {
      ContentHandler handler = new BodyContentHandler(-1);
      ParseContext context = new ParseContext();
      context.set(Locale.class, Locale.US);
      parser.parse(input, handler, m, context);

      String content = handler.toString();
      assertEquals("", content);
   } finally {
      input.close();
   }
}
 
开发者ID:kanrourou,项目名称:software-testing,代码行数:46,代码来源:ExcelParserTest.java

示例13: testExcel95

import org.apache.tika.detect.Detector; //导入依赖的package包/类
/**
 * We don't currently support the old Excel 95 .xls file format, 
 *  but we shouldn't break on these files either (TIKA-976)  
 */
@Test
public void testExcel95() throws Exception {
   Detector detector = new DefaultDetector();
   AutoDetectParser parser = new AutoDetectParser();
   
   InputStream input = ExcelParserTest.class.getResourceAsStream(
         "/test-documents/testEXCEL_95.xls");
   Metadata m = new Metadata();
   m.add(Metadata.RESOURCE_NAME_KEY, "excel_95.xls");
   
   // Should be detected correctly
   MediaType type = null;
   try {
      type = detector.detect(input, m);
      assertEquals("application/vnd.ms-excel", type.toString());
   } finally {
      input.close();
   }
   
   // OfficeParser will claim to handle it
   assertEquals(true, (new OfficeParser()).getSupportedTypes(new ParseContext()).contains(type));
   
   // OOXMLParser won't handle it
   assertEquals(false, (new OOXMLParser()).getSupportedTypes(new ParseContext()).contains(type));
   
   // AutoDetectParser doesn't break on it
   input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_95.xls");

   try {
      ContentHandler handler = new BodyContentHandler(-1);
      ParseContext context = new ParseContext();
      context.set(Locale.class, Locale.US);
      parser.parse(input, handler, m, context);

      String content = handler.toString();
      assertEquals("", content);
   } finally {
      input.close();
   }
}
 
开发者ID:kanrourou,项目名称:software-testing,代码行数:45,代码来源:ExcelParserTest.java

示例14: getDetector

import org.apache.tika.detect.Detector; //导入依赖的package包/类
protected Detector getDetector() {
  return detector;
}
 
开发者ID:cloudera,项目名称:cdk,代码行数:4,代码来源:DetectMimeTypeBuilder.java


注:本文中的org.apache.tika.detect.Detector类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。