本文整理汇总了Java中org.apache.tika.Tika.detect方法的典型用法代码示例。如果您正苦于以下问题:Java Tika.detect方法的具体用法?Java Tika.detect怎么用?Java Tika.detect使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.tika.Tika
的用法示例。
在下文中一共展示了Tika.detect方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: BinaryImportDestinationChooser
import org.apache.tika.Tika; //导入方法依赖的package包/类
/**
* Creates a new {@link RepositoryLocationChooser} that allows to specify a media or MIME type
* for the given data source.
*
* @param source
* the data source
* @param initialDestination
* the initial location (optional)
*/
public BinaryImportDestinationChooser(BinaryDataSource source, String initialDestination) {
super(null, null, initialDestination, true, false, true, true, Colors.WHITE);
// Use generic mime type as default and try to guess more specific.
String type = "application/octet-stream";
Tika tika = new Tika();
try {
type = tika.detect(source.getLocation());
} catch (IOException e) {
// ignore
}
JLabel mediaTypelabel = new ResourceLabel("repository_chooser.mime_type");
mediaType = new JTextArea(type);
GridBagConstraints c = new GridBagConstraints();
c.insets = new Insets(ButtonDialog.GAP, 0, 0, ButtonDialog.GAP);
c.gridwidth = GridBagConstraints.RELATIVE;
add(mediaTypelabel, c);
c.insets = new Insets(ButtonDialog.GAP, 0, 0, 0);
c.gridwidth = GridBagConstraints.REMAINDER;
c.fill = GridBagConstraints.HORIZONTAL;
add(mediaType, c);
}
示例2: detect
import org.apache.tika.Tika; //导入方法依赖的package包/类
/**
* 利用 Tika 分析 Mime Type
* 因为 Tika 要解析 File 、 URL 数据流,所以解析需要一定时间。不要用解析扩展名的方法,无法动态判断,不准。
* <p>
* Parses the given file and returns the extracted text content.
*
* @param file
* @return
*/
public static String detect(File file) throws Exception {
//文件不存在
if (!file.exists()) {
throw new Exception("exception ! " + file.getAbsoluteFile() + " not existes.");
}
Tika t = new Tika();
return t.detect(file);
}
示例3: getMimeType
import org.apache.tika.Tika; //导入方法依赖的package包/类
/**
* return the mime type of a file, dont check extension
* @param barr
* @return mime type of the file
* @throws IOException
*/
public static String getMimeType(byte[] barr, String defaultValue) {
//String mt = getMimeType(new ByteArrayInputStream(barr), null);
//if(!StringUtil.isEmpty(mt,true)) return mt;
PrintStream out = System.out;
try {
Tika tika = new Tika();
return tika.detect(barr);
}
catch(Throwable t) {
ExceptionUtil.rethrowIfNecessary(t);
return defaultValue;
}
}
示例4: getMimeType
import org.apache.tika.Tika; //导入方法依赖的package包/类
public String getMimeType() {
try {
Tika tika = new Tika();
return tika.detect(Paths.get(getFilePath()));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
示例5: getMimeType
import org.apache.tika.Tika; //导入方法依赖的package包/类
private String getMimeType(String filename) {
Path fullPath = Paths.get(filename);
String mimeType = null;
try {
mimeType = Files.probeContentType(fullPath);
} catch (IOException e) {
e.printStackTrace();
}
if (mimeType == null) {
Tika tika = new Tika();
mimeType = tika.detect(filename);
}
return mimeType;
}
开发者ID:geetools,项目名称:geeCommerce-Java-Shop-Software-and-PIM,代码行数:15,代码来源:DefaultMediaAssetService.java
示例6: from
import org.apache.tika.Tika; //导入方法依赖的package包/类
/**
* Detects the {@link MimeType} from the bytes stream
* @see http://tika.apache.org/1.4/detection.html
* @param is
* @param fileName
* @param contentType
* @return
* @throws IOException
*/
public static MimeType from(final InputStream is,
final String fileName,final String contentType) throws IOException {
Metadata md = new Metadata();
if (Strings.isNOTNullOrEmpty(fileName)) md.add(TikaMetadataKeys.RESOURCE_NAME_KEY ,fileName);
if (Strings.isNOTNullOrEmpty(contentType)) md.add(HttpHeaders.CONTENT_TYPE,contentType);
Tika tika = new Tika();
String mimeTypeStr = tika.detect(is,fileName);
return MimeType.forName(mimeTypeStr);
}
示例7: ensurePNG
import org.apache.tika.Tika; //导入方法依赖的package包/类
private boolean ensurePNG(byte[] array) {
Tika tika = new Tika();
try {
String found = tika.detect(new ByteArrayInputStream(array));
return "image/png".equals(found);
} catch (IOException e) {
logger.error("Image file cannot be analyzed", e);
}
return false;
}
示例8: getContentType
import org.apache.tika.Tika; //导入方法依赖的package包/类
private String getContentType(final File file) {
try {
final Tika tika = new Tika();
return tika.detect(file);
} catch (final IOException e) {
return ContentType.DEFAULT_BINARY.toString();
}
}
示例9: getImageType
import org.apache.tika.Tika; //导入方法依赖的package包/类
public static String getImageType(byte[] inputByteArray) throws Exception
{
Tika tika = new Tika();
return tika.detect(inputByteArray);
}
示例10: getMimeType
import org.apache.tika.Tika; //导入方法依赖的package包/类
public static String getMimeType(InputStream s, String fileName) throws IOException {
final Tika t = new Tika();
return t.detect(s, fileName);
}
示例11: getMimeTypeWithByteBuffer
import org.apache.tika.Tika; //导入方法依赖的package包/类
public static String getMimeTypeWithByteBuffer(java.nio.ByteBuffer buffer) throws IOException {
byte[] b = buffer.array();
Tika tika = new Tika();
return tika.detect(b);
}
示例12: extractText
import org.apache.tika.Tika; //导入方法依赖的package包/类
public String extractText(InputStream input, String url,
List<Outlink> outLinksList) throws IOException {
String resultText = "";
ZipInputStream zin = new ZipInputStream(input);
ZipEntry entry;
while ((entry = zin.getNextEntry()) != null) {
if (!entry.isDirectory()) {
int size = (int) entry.getSize();
byte[] b = new byte[size];
for (int x = 0; x < size; x++) {
int err = zin.read();
if (err != -1) {
b[x] = (byte) err;
}
}
String newurl = url + "/";
String fname = entry.getName();
newurl += fname;
URL aURL = new URL(newurl);
String base = aURL.toString();
int i = fname.lastIndexOf('.');
if (i != -1) {
// Trying to resolve the Mime-Type
Tika tika = new Tika();
String contentType = tika.detect(fname);
try {
Metadata metadata = new Metadata();
metadata.set(Response.CONTENT_LENGTH,
Long.toString(entry.getSize()));
metadata.set(Response.CONTENT_TYPE, contentType);
Content content = new Content(newurl, base, b, contentType,
metadata, this.conf);
Parse parse = new ParseUtil(this.conf).parse(content).get(
content.getUrl());
ParseData theParseData = parse.getData();
Outlink[] theOutlinks = theParseData.getOutlinks();
for (int count = 0; count < theOutlinks.length; count++) {
outLinksList.add(new Outlink(theOutlinks[count].getToUrl(),
theOutlinks[count].getAnchor()));
}
resultText += entry.getName() + " " + parse.getText() + " ";
} catch (ParseException e) {
if (LOG.isInfoEnabled()) {
LOG.info("fetch okay, but can't parse " + fname + ", reason: "
+ e.getMessage());
}
}
}
}
}
return resultText;
}
示例13: doProcess
import org.apache.tika.Tika; //导入方法依赖的package包/类
private PhylogeneticTree doProcess(Multimedia item) throws Exception {
logger.debug("doProcess " + item);
Resource resource = resourceService.findByResourceUri(item.getIdentifier());
PhylogeneticTree phylogeneticTree = null;
if(resource == null) {
logger.debug("No Resource prexisting for " + item.getIdentifier());
Tika tika = new Tika();
try {
String mimeType = tika.detect(new URL(item.getIdentifier()));
logger.debug("Mime type is " + mimeType);
phylogeneticTree = conversionService.convert(item, PhylogeneticTree.class);
resource = new Resource();
resource.setOrganisation(getSource());
resource.setIdentifier(UUID.randomUUID().toString());
resource.setUri(item.getIdentifier());
resource.setResourceType(ResourceType.PHYLOGENETIC_TREE);
resource.setTitle("Resource " + item.getIdentifier());
if(mimeType.equals("application/phyloxml+xml")) {
resource.getParameters().put("input.file.extension", "xml");
resourceService.saveOrUpdate(resource);
} else if(mimeType.equals("application/newick")) {
resource.getParameters().put("input.file.extension", "nwk");
resourceService.saveOrUpdate(resource);
} else if(mimeType.equals("application/nexus")) {
resource.getParameters().put("input.file.extension", "nex");
resourceService.saveOrUpdate(resource);
} else if(mimeType.equals("application/new-hampshire-extended")) {
resource.getParameters().put("input.file.extension", "nhx");
resourceService.saveOrUpdate(resource);
} else {
logger.debug("Returning null");
return null;
}
} catch (Exception e) {
ImageRetrievalException ire = new ImageRetrievalException(item.getIdentifier());
ire.initCause(e);
throw ire;
}
} else if(resource.getResourceType().equals(ResourceType.PHYLOGENETIC_TREE)) {
logger.debug("Resource " + resource + " exists for " + item.getIdentifier());
phylogeneticTree = conversionService.convert(item, PhylogeneticTree.class);
} else {
return null;
}
logger.debug("Processing " + phylogeneticTree);
phylogeneticTree = processor.process(phylogeneticTree);
logger.debug("Processing delegate returned " + phylogeneticTree);
if(phylogeneticTree != null) {
try {
resourceService.harvestResource(resource.getId(), true);
} catch(ResourceAlreadyBeingHarvestedException rabhe) {
logger.warn("Tried to harvest " + item.getIdentifier() + " but it is already being harvested");
}
}
return phylogeneticTree;
}
示例14: doProcess
import org.apache.tika.Tika; //导入方法依赖的package包/类
private IdentificationKey doProcess(Multimedia item) throws Exception {
logger.debug("doProcess " + item);
Resource resource = resourceService.findByResourceUri(item.getIdentifier());
IdentificationKey identificationKey = null;
if(resource == null) {
logger.debug("No Resource prexisting for " + item.getIdentifier());
Tika tika = new Tika();
try {
String mimeType = tika.detect(new URL(item.getIdentifier()));
logger.debug("Mime type is " + mimeType);
if(mimeType.equals("application/sdd+xml")) {
identificationKey = conversionService.convert(item, IdentificationKey.class);
resource = new Resource();
resource.setOrganisation(getSource());
resource.setIdentifier(UUID.randomUUID().toString());
resource.setUri(item.getIdentifier());
resource.setResourceType(ResourceType.IDENTIFICATION_KEY);
resource.setTitle("Resource " + item.getIdentifier());
resourceService.saveOrUpdate(resource);
} else {
logger.debug("Returning null");
return null;
}
} catch (Exception e) {
throw new ImageRetrievalException(item.getIdentifier());
}
} else if(resource.getResourceType().equals(ResourceType.IDENTIFICATION_KEY)) {
logger.debug("Resource " + resource + " exists for " + item.getIdentifier());
identificationKey = conversionService.convert(item, IdentificationKey.class);
} else {
return null;
}
logger.debug("Processing " + identificationKey);
identificationKey = processor.process(identificationKey);
if(identificationKey != null) {
try {
resourceService.harvestResource(resource.getId(), true);
} catch(ResourceAlreadyBeingHarvestedException rabhe) {
logger.warn("Tried to harvest " + item.getIdentifier() + " but it is already being harvested");
}
}
return identificationKey;
}
示例15: main
import org.apache.tika.Tika; //导入方法依赖的package包/类
/**
* Main function.
*/
public static void main(String[] args) {
try {
// Tika tika = new Tika();
// File xpsFile = new File("/home/foo/a/temp/xlsx.xlsx");
// InputStream inputStream = new FileInputStream(xpsFile);
// String FileName = xpsFile.getName();
// Metadata metadata = new Metadata();
// if (FileName != null && FileName.length() > 0)
// metadata.add(Metadata.RESOURCE_NAME_KEY, FileName);
// String MimeType = tika.detect(inputStream, metadata);
// metadata.add(Metadata.CONTENT_TYPE, MimeType);
// inputStream.close();
// inputStream = new FileInputStream(xpsFile);
// Reader reader = tika.parse(inputStream, metadata);
// String content = IOUtils.toString(reader);
// System.out.println(new AutoDetectParser().getParsers().keySet());
// System.out.println("shit: " + tika.getParser() + " " + MimeType);
// System.out.println(content);
// inputStream.close();
ClassLoader loader = Thread.currentThread().getContextClassLoader();
TikaConfig config = new TikaConfig(new File("/home/foo/a/code/big_bang/tika-1.5/"
+ "tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml"));
final AutoDetectParser autoDetectParser = new AutoDetectParser(config);
final Detector detector = config.getDetector();
final Tika tika = new Tika();
File xpsFile = new File("/home/foo/a/temp/xlsx.xlsx");
InputStream inputStream = new FileInputStream(xpsFile);
String FileName = xpsFile.getName();
Metadata metadata = new Metadata();
if (FileName != null && FileName.length() > 0)
metadata.add(Metadata.RESOURCE_NAME_KEY, FileName);
String MimeType = tika.detect(inputStream, metadata);
// metadata.add(Metadata.CONTENT_TYPE, MimeType);
// ContentHandler handler = new XHTMLContentHandler(System.out);
// ContentHandler bch = new BodyContentHandler(System.out);
// ContentHandler handler = new BodyContentHandler();
// ContentHandler xhtml = new XHTMLContentHandler(handler,
// metadata);
StringWriter sw = new StringWriter();
SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
TransformerHandler handler = factory.newTransformerHandler();
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "no");
handler.setResult(new StreamResult(sw));
BodyContentHandler bch = new BodyContentHandler(handler);
handler.startDocument();
inputStream.close();
inputStream = new FileInputStream(xpsFile);
autoDetectParser.parse(inputStream, bch, metadata);
String x = sw.toString();
System.out.println(x);
// Document doc = Jsoup.parse(x);
// Elements elements = doc.getElementsByTag("p");
// for (Element element : elements) {
// System.out.println(element.text());
// }
} catch (Exception e) {
e.printStackTrace();
}
}