本文整理汇总了Java中org.apache.tika.Tika类的典型用法代码示例。如果您正苦于以下问题:Java Tika类的具体用法?Java Tika怎么用?Java Tika使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Tika类属于org.apache.tika包,在下文中一共展示了Tika类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: BinaryImportDestinationChooser
import org.apache.tika.Tika; //导入依赖的package包/类
/**
* Creates a new {@link RepositoryLocationChooser} that allows to specify a media or MIME type
* for the given data source.
*
* @param source
* the data source
* @param initialDestination
* the initial location (optional)
*/
public BinaryImportDestinationChooser(BinaryDataSource source, String initialDestination) {
super(null, null, initialDestination, true, false, true, true, Colors.WHITE);
// Use generic mime type as default and try to guess more specific.
String type = "application/octet-stream";
Tika tika = new Tika();
try {
type = tika.detect(source.getLocation());
} catch (IOException e) {
// ignore
}
JLabel mediaTypelabel = new ResourceLabel("repository_chooser.mime_type");
mediaType = new JTextArea(type);
GridBagConstraints c = new GridBagConstraints();
c.insets = new Insets(ButtonDialog.GAP, 0, 0, ButtonDialog.GAP);
c.gridwidth = GridBagConstraints.RELATIVE;
add(mediaTypelabel, c);
c.insets = new Insets(ButtonDialog.GAP, 0, 0, 0);
c.gridwidth = GridBagConstraints.REMAINDER;
c.fill = GridBagConstraints.HORIZONTAL;
add(mediaType, c);
}
示例2: setUp
import org.apache.tika.Tika; //导入依赖的package包/类
@Before
public void setUp() throws TikaException, IOException, SAXException {
VelocityEngine engine = new VelocityEngine();
engine.setProperty(RuntimeConstants.RESOURCE_LOADER, "classpath");
engine.setProperty("classpath.resource.loader.class", ClasspathResourceLoader.class.getName());
engine.init();
Templater templater = new Templater();
templater.setEngine(engine);
exporter = new HtmlExporter();
exporter.setTemplater(templater);
TikaProvider provider = new TikaProvider();
Tika tika = provider.tika();
transformer = new TikaTransformer();
transformer.setTika(tika);
}
示例3: setUp
import org.apache.tika.Tika; //导入依赖的package包/类
@Before
public void setUp() throws TikaException, IOException, SAXException {
VelocityEngine engine = new VelocityEngine();
engine.setProperty(RuntimeConstants.RESOURCE_LOADER, "classpath");
engine.setProperty("classpath.resource.loader.class", ClasspathResourceLoader.class.getName());
engine.init();
Templater templater = new Templater();
templater.setEngine(engine);
exporter = new PdfExporter();
exporter.setTemplater(templater);
TikaProvider provider = new TikaProvider();
Tika tika = provider.tika();
transformer = new TikaTransformer();
transformer.setTika(tika);
}
示例4: detect
import org.apache.tika.Tika; //导入依赖的package包/类
/**
* 利用 Tika 分析 Mime Type
* 因为 Tika 要解析 File 、 URL 数据流,所以解析需要一定时间。不要用解析扩展名的方法,无法动态判断,不准。
* <p>
* Parses the given file and returns the extracted text content.
*
* @param file
* @return
*/
public static String detect(File file) throws Exception {
//文件不存在
if (!file.exists()) {
throw new Exception("exception ! " + file.getAbsoluteFile() + " not existes.");
}
Tika t = new Tika();
return t.detect(file);
}
示例5: failedMimetypeDetectionShouldWork
import org.apache.tika.Tika; //导入依赖的package包/类
@Test
@DirtiesContext
public void failedMimetypeDetectionShouldWork() throws Exception {
final Reflect controllerReflect = Reflect.on(this.controller);
// Much more evil isn't possible, i guess... DirtiesContext!!!!
Tika tika = controllerReflect.field("tika").get();
tika = spy(tika);
when(tika.detect(any(InputStream.class), any(String.class))).thenThrow(IOException.class);
controllerReflect.set("tika", tika);
final MockMultipartFile multipartFile = new MockMultipartFile("assetData", "asset.png", null, this.getClass().getResourceAsStream("/eu/euregjug/site/assets/asset.png"));
when(this.gridFsTemplate.findOne(any(Query.class))).thenReturn(null);
mvc
.perform(
fileUpload("/api/assets")
.file(multipartFile)
)
.andExpect(status().isCreated())
.andExpect(content().string("asset.png"));
verify(this.gridFsTemplate).findOne(any(Query.class));
verify(this.gridFsTemplate).store(any(InputStream.class), eq("asset.png"), isNull(String.class));
verifyNoMoreInteractions(this.gridFsTemplate);
}
示例6: getMimeType
import org.apache.tika.Tika; //导入依赖的package包/类
/**
* return the mime type of a file, dont check extension
* @param barr
* @return mime type of the file
* @throws IOException
*/
public static String getMimeType(byte[] barr, String defaultValue) {
//String mt = getMimeType(new ByteArrayInputStream(barr), null);
//if(!StringUtil.isEmpty(mt,true)) return mt;
PrintStream out = System.out;
try {
Tika tika = new Tika();
return tika.detect(barr);
}
catch(Throwable t) {
ExceptionUtil.rethrowIfNecessary(t);
return defaultValue;
}
}
示例7: initialise
import org.apache.tika.Tika; //导入依赖的package包/类
@PostConstruct
public void initialise() {
// find all kiwi-module.properties and check whether they contain a baseurl property to map module web
// resources to a certain path prefix; if yes, store the prefix and jar URL in the map for lookup and
// resource resolving by the filter
this.resourceMap = new HashMap<>();
for(String module : moduleService.listModules()) {
ModuleConfiguration config = moduleService.getModuleConfiguration(module);
if(config.getConfiguration().containsKey("baseurl")) {
String path = config.getConfiguration().getString("baseurl");
resourceMap.put(path.startsWith("/")?path:"/"+path,moduleService.getModuleJar(module).toString());
}
}
this.tika = new Tika();
}
示例8: DownloadedFiles
import org.apache.tika.Tika; //导入依赖的package包/类
public DownloadedFiles(int thread) {
File file = new File("./download_cache" + thread);
Collection<File> list = FileUtils.listFiles(file, null, false);
if (!list.isEmpty()) {
try {
File download = list.iterator().next();
byte[] bytes = FileUtils.readFileToByteArray(download);
content = Base64.encodeBase64String(bytes);
filename = download.getName();
mimeType = new Tika().detect(bytes, filename);
int index = filename.lastIndexOf(".");
if (index > -1 && index < filename.length()) {
extension = filename.substring(index + 1).toLowerCase();
filename = filename.substring(0, index);
}
} catch (Throwable t) {
Log.exception(t);
} finally {
for (File cur : list) {
FileUtils.deleteQuietly(cur);
}
}
}
}
示例9: makeMedia
import org.apache.tika.Tika; //导入依赖的package包/类
private Media makeMedia(String filename) throws FileTikaException {
File file = new File(filename);
if (!isBeneathParent(root, file)) {
throw new FileTikaException(file + " is not a child of " + root);
}
Tika tika = new Tika();
Metadata metadata = new Metadata();
try {
logger.trace("Reading metadata for {}", filename);
InputStream in = TikaInputStream.get(file, metadata);
Reader reader = tika.parse(in, metadata);
logger.trace("Metadata read to {}", metadata);
in.close();
reader.close();
return new FileTikaMediaImpl(file, metadata);
} catch (Exception ex) {
throw new FileTikaException(ex);
}
}
示例10: onTrigger
import org.apache.tika.Tika; //导入依赖的package包/类
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
flowFile = session.create();
}
try {
flowFile.getAttributes();
flowFile = session.putAttribute(flowFile, "mime.type", "application/json");
flowFile = session.write(flowFile, new StreamCallback() {
@Override
public void process(InputStream inputStream, OutputStream outputStream) throws IOException {
Tika tika = new Tika();
String text = "";
try {
text = tika.parseToString(inputStream);
} catch (TikaException e) {
getLogger().error("Apache Tika failed to parse input " + e.getLocalizedMessage());
e.printStackTrace();
}
// TODO: wrap in JSON???
outputStream.write(text.getBytes());
}
});
session.transfer(flowFile, REL_SUCCESS);
session.commit();
} catch (final Throwable t) {
getLogger().error("Unable to process ExtractTextProcessor file " + t.getLocalizedMessage());
getLogger().error("{} failed to process due to {}; rolling back session", new Object[] { this, t });
throw t;
}
}
示例11: open
import org.apache.tika.Tika; //导入依赖的package包/类
public void open(File file) {
Tika tika = new Tika();
tika.setMaxStringLength(999999);
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
fileName.setText(file.getName());
int dot = file.getName().lastIndexOf('.');
String saveName = file.getName().substring(0, dot) + ".txt";
try {
//long start = System.currentTimeMillis();
String text = tika.parseToString(file);
//long end = System.currentTimeMillis();
pw.println(text);
//mimeType.setText(tika.detect(file) + " (" + NumberFormat.getNumberInstance().format(end-start) + "ms)");
} catch(Exception ex){
ex.printStackTrace(pw);
}
pw.flush();
plain.setText(sw.toString());
saveFile(sw.toString(), saveName);
plain.setCaretPosition(0);
return ;
}
示例12: setUp
import org.apache.tika.Tika; //导入依赖的package包/类
@Before
public void setUp() throws IOException, TikaException, SAXException {
MockitoAnnotations.initMocks(this);
createDirectories(Paths.get("fileTestFiles"));
docxExporter = new DocxExporter();
xlsxExporter = new XlsxExporter();
VelocityEngine engine = new VelocityEngine();
engine.setProperty(RuntimeConstants.RESOURCE_LOADER, "classpath");
engine.setProperty("classpath.resource.loader.class", ClasspathResourceLoader.class.getName());
engine.init();
templater = new Templater();
templater.setEngine(engine);
pdfExporter = new PdfExporter();
pdfExporter.setTemplater(templater);
ObjectMapperProducer objectMapperProducer = new ObjectMapperProducer();
mapper = objectMapperProducer.objectMapper(false, false);
TikaProvider provider = new TikaProvider();
Tika tika = provider.tika();
transformer = new TikaTransformer();
transformer.setTika(tika);
}
示例13: setUp
import org.apache.tika.Tika; //导入依赖的package包/类
@Before
public void setUp() throws TikaException, IOException, SAXException {
TikaProvider provider = new TikaProvider();
Tika tika = provider.tika();
transformer = new TikaTransformer();
transformer.setTika(tika);
}
示例14: getMimeType
import org.apache.tika.Tika; //导入依赖的package包/类
public String getMimeType() {
try {
Tika tika = new Tika();
return tika.detect(Paths.get(getFilePath()));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
示例15: parse
import org.apache.tika.Tika; //导入依赖的package包/类
@Override
public AldermanAttendance parse(Path targetFile) {
try {
Tika tika = new Tika();
String content = tika.parseToString(targetFile.toFile());
AldermanAttendance aldermanAttendance = ParserUtils.fromText(content);
return aldermanAttendance;
} catch (IOException | TikaException e) {
e.printStackTrace();
return null;
}
}