本文整理汇总了Java中org.apache.tika.detect.DefaultDetector类的典型用法代码示例。如果您正苦于以下问题:Java DefaultDetector类的具体用法?Java DefaultDetector怎么用?Java DefaultDetector使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
DefaultDetector类属于org.apache.tika.detect包,在下文中一共展示了DefaultDetector类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getFullText
import org.apache.tika.detect.DefaultDetector; //导入依赖的package包/类
private static String getFullText(final String filepath) throws IOException, SAXException, TikaException {
StringWriter writer = new StringWriter();
final TikaInputStream inputStream = TikaInputStream.get(new File(filepath));
try {
final Detector detector = new DefaultDetector();
final Parser parser = new AutoDetectParser(detector);
final Metadata metadata = new Metadata();
final ParseContext parseContext = new ParseContext();
parseContext.set(Parser.class, parser);
ContentHandler contentHandler = new BodyContentHandler(writer);
parser.parse(inputStream, contentHandler, metadata, parseContext);
}
finally {
inputStream.close();
}
return writer.toString();
}
示例2: setTikaConfig
import org.apache.tika.detect.DefaultDetector; //导入依赖的package包/类
/**
* Injects the TikaConfig to use
*
* @param tikaConfig The Tika Config to use
*/
public void setTikaConfig(TikaConfig tikaConfig)
{
this.config = tikaConfig;
// Setup the detector and parser
detector = new DefaultDetector(config.getMimeRepository());
parser = new AutoDetectParser(detector);
}
示例3: getMimeType
import org.apache.tika.detect.DefaultDetector; //导入依赖的package包/类
public String getMimeType() throws IOException
{
TikaInputStream tikaIS = null;
try
{
tikaIS = TikaInputStream.get(file);
return new DefaultDetector(MimeTypes.getDefaultMimeTypes()).detect(tikaIS, new Metadata()).toString();
} finally
{
if (tikaIS != null)
{
tikaIS.close();
}
}
}
示例4: initialise
import org.apache.tika.detect.DefaultDetector; //导入依赖的package包/类
@PostConstruct
public void initialise() {
detector = new DefaultDetector();
defaultDir = configurationService.getHome()+File.separator+"resources";
log.debug("FileSystem Content Reader started (default directory: {})",defaultDir);
}
示例5: MimeTypeUtils
import org.apache.tika.detect.DefaultDetector; //导入依赖的package包/类
public MimeTypeUtils(InputStream mimeIs, boolean magic) {
try {
this.mimeTypes = MimeTypesFactory.create(mimeIs);
this.mimeMagic = magic;
this.tika = new Tika(new DefaultDetector(this.mimeTypes));
}catch (Exception e) {
LOG.log(Level.SEVERE, "Failed to load MimeType Registry : " + e.getMessage(), e);
}
}
示例6: doGet
import org.apache.tika.detect.DefaultDetector; //导入依赖的package包/类
protected void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
HttpSession session = request.getSession();
// Pulls the candidateID of the candidate to retrieve photo of
int candidateID = (int) session.getAttribute("candidateID");
// Creates EntityManager to query database
EntityManager em = EMFUtil.getEMFactory().createEntityManager();
// Retrieves user from database based on userID
Candidates candidate = em.find(Candidates.class, candidateID);
// Retrieves resume from candidate's profile.
byte[] pictureBlob = candidate.getPhoto();
// If photo exists
if (pictureBlob != null) {
// Uses APACHE Tika api to obtain MIMETYPE
String mimeType = "";
MimeTypes allTypes = MimeTypes.getDefaultMimeTypes();
final Detector DETECTOR = new DefaultDetector(allTypes);
MimeType extension = null;
TikaInputStream tikaIS = null;
try {
tikaIS = TikaInputStream.get(pictureBlob);
final Metadata metadata = new Metadata();
mimeType = DETECTOR.detect(tikaIS, metadata).toString();
extension = allTypes.forName(mimeType);
} catch (Exception e) {
e.printStackTrace();
System.out.println("Error getting MIME type");
}
// Tells web-page to prepare and download a picture file
response.setContentType(mimeType);
response.setContentLength(pictureBlob.length);
response.getOutputStream().write(pictureBlob);
response.setHeader("Content-Disposition", "attachment;filename=" + candidateID + extension);
}
}
示例7: AbstractExtractor
import org.apache.tika.detect.DefaultDetector; //导入依赖的package包/类
public AbstractExtractor(Path warcPath) {
this.warcPath = warcPath;
this.detector = new DefaultDetector();
this.tikaConfig = TikaConfig.getDefaultConfig();
extractorStats = new ExtractorStats();
}
示例8: testExcelXLSB
import org.apache.tika.detect.DefaultDetector; //导入依赖的package包/类
/**
* We don't currently support the .xlsb file format
* (an OOXML container with binary blobs), but we
* shouldn't break on these files either (TIKA-826)
*/
@Test
public void testExcelXLSB() throws Exception {
Detector detector = new DefaultDetector();
AutoDetectParser parser = new AutoDetectParser();
InputStream input = ExcelParserTest.class.getResourceAsStream(
"/test-documents/testEXCEL.xlsb");
Metadata m = new Metadata();
m.add(Metadata.RESOURCE_NAME_KEY, "excel.xlsb");
// Should be detected correctly
MediaType type = null;
try {
type = detector.detect(input, m);
assertEquals("application/vnd.ms-excel.sheet.binary.macroenabled.12", type.toString());
} finally {
input.close();
}
// OfficeParser won't handle it
assertEquals(false, (new OfficeParser()).getSupportedTypes(new ParseContext()).contains(type));
// OOXMLParser won't handle it
assertEquals(false, (new OOXMLParser()).getSupportedTypes(new ParseContext()).contains(type));
// AutoDetectParser doesn't break on it
input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL.xlsb");
try {
ContentHandler handler = new BodyContentHandler(-1);
ParseContext context = new ParseContext();
context.set(Locale.class, Locale.US);
parser.parse(input, handler, m, context);
String content = handler.toString();
assertEquals("", content);
} finally {
input.close();
}
}
示例9: testExcel95
import org.apache.tika.detect.DefaultDetector; //导入依赖的package包/类
/**
* We don't currently support the old Excel 95 .xls file format,
* but we shouldn't break on these files either (TIKA-976)
*/
@Test
public void testExcel95() throws Exception {
Detector detector = new DefaultDetector();
AutoDetectParser parser = new AutoDetectParser();
InputStream input = ExcelParserTest.class.getResourceAsStream(
"/test-documents/testEXCEL_95.xls");
Metadata m = new Metadata();
m.add(Metadata.RESOURCE_NAME_KEY, "excel_95.xls");
// Should be detected correctly
MediaType type = null;
try {
type = detector.detect(input, m);
assertEquals("application/vnd.ms-excel", type.toString());
} finally {
input.close();
}
// OfficeParser will claim to handle it
assertEquals(true, (new OfficeParser()).getSupportedTypes(new ParseContext()).contains(type));
// OOXMLParser won't handle it
assertEquals(false, (new OOXMLParser()).getSupportedTypes(new ParseContext()).contains(type));
// AutoDetectParser doesn't break on it
input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_95.xls");
try {
ContentHandler handler = new BodyContentHandler(-1);
ParseContext context = new ParseContext();
context.set(Locale.class, Locale.US);
parser.parse(input, handler, m, context);
String content = handler.toString();
assertEquals("", content);
} finally {
input.close();
}
}
示例10: AssetMimeHandler
import org.apache.tika.detect.DefaultDetector; //导入依赖的package包/类
AssetMimeHandler() {
detector = new DefaultDetector();
}
示例11: TikaIdentification
import org.apache.tika.detect.DefaultDetector; //导入依赖的package包/类
/**
* Constructor which initialises tika
*
* @throws IOException
*/
public TikaIdentification() throws IOException {
tika = new Tika();
detector = new DefaultDetector();
}