本文整理匯總了Java中org.apache.tika.metadata.Metadata.set方法的典型用法代碼示例。如果您正苦於以下問題:Java Metadata.set方法的具體用法?Java Metadata.set怎麽用?Java Metadata.set使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.tika.metadata.Metadata
的用法示例。
在下文中一共展示了Metadata.set方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: detectMimeTypes
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
@Nonnull
@Override
public List<String> detectMimeTypes(final InputStream input, @Nullable final String fileName) throws IOException {
checkNotNull(input);
List<String> detected = Lists.newArrayList();
Metadata metadata = new Metadata();
if (fileName != null) {
metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
}
MediaType mediaType;
try (final TikaInputStream tis = TikaInputStream.get(input)) {
mediaType = detector.detect(tis, metadata);
}
// unravel to least specific
unravel(detected, mediaType);
if (detected.isEmpty()) {
detected.add(MimeTypes.OCTET_STREAM);
}
return detected;
}
示例2: testWritesISO8601Dates
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
@Test
public void testWritesISO8601Dates() throws IOException {
final SpewerStub spewer = new SpewerStub();
final Document document = factory.create("test.txt");
final Metadata metadata = document.getMetadata();
final FieldNames fields = spewer.getFields();
// TODO: this should go in a separate test for the MetadataTransformer.
final String[] dates = {"2011-12-03+01:00", "2015-06-03", "Tue Jan 27 17:03:21 2004", "19106-06-07T08:00:00Z"};
final String[] isoDates = {"2011-12-03T12:00:00Z", "2015-06-03T12:00:00Z", "2004-01-27T17:03:21Z",
"+19106-06-07T08:00:00Z"};
int i = 0;
for (String date: dates) {
metadata.set(Office.CREATION_DATE, date);
spewer.writeMetadata(document);
Assert.assertEquals(date, spewer.metadata.get(fields.forMetadata(Office.CREATION_DATE.getName())));
Assert.assertEquals(isoDates[i++],
spewer.metadata.get(fields.forMetadataISODate(Office.CREATION_DATE.getName())));
// Reset the store of written metadata on each iteration.
spewer.close();
}
}
示例3: initSize
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
private static ProcessResult initSize(BaseFileItem f, File img, String mime) {
ProcessResult res = new ProcessResult();
res.setProcess("get image dimensions :: " + f.getId());
final Parser parser = new ImageParser();
try (InputStream is = new FileInputStream(img)) {
Metadata metadata = new Metadata();
metadata.set(CONTENT_TYPE, mime);
parser.parse(is, new DefaultHandler(), metadata, new ParseContext());
f.setWidth(Integer.valueOf(metadata.get(TIFF.IMAGE_WIDTH)));
f.setHeight(Integer.valueOf(metadata.get(TIFF.IMAGE_LENGTH)));
res.setExitCode(ZERO);
} catch (Exception e) {
log.error("Error while getting dimensions", e);
res.setError("Error while getting dimensions");
res.setException(e.getMessage());
res.setExitCode(-1);
}
return res;
}
示例4: testTransformValue
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
/**
* Test that the ContentHandler properly strips the illegal characters
*/
@Test
public void testTransformValue() {
String fieldName = "user_name";
assertFalse("foobar".equals(getFoobarWithNonChars()));
Metadata metadata = new Metadata();
// load illegal char string into a metadata field and generate a new document,
// which will cause the ContentHandler to be invoked.
metadata.set(fieldName, getFoobarWithNonChars());
StripNonCharSolrContentHandlerFactory contentHandlerFactory =
new StripNonCharSolrContentHandlerFactory(DateUtil.DEFAULT_DATE_FORMATS);
IndexSchema schema = h.getCore().getLatestSchema();
SolrContentHandler contentHandler =
contentHandlerFactory.createSolrContentHandler(metadata, new MapSolrParams(new HashMap()), schema);
SolrInputDocument doc = contentHandler.newDocument();
String foobar = doc.getFieldValue(fieldName).toString();
assertTrue("foobar".equals(foobar));
}
示例5: writeFile
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
@Override
void writeFile(final String path, final byte[] data) throws IOException {
ObjectMetadata bucketMetadata = new ObjectMetadata();
bucketMetadata.setContentMD5(Md5Utils.md5AsBase64(data));
bucketMetadata.setContentLength(data.length);
// Give Tika a few hints for the content detection
Metadata tikaMetadata = new Metadata();
tikaMetadata.set(Metadata.RESOURCE_NAME_KEY, FilenameUtils.getName(FilenameUtils.normalize(path)));
// Fire!
try (InputStream bis = TikaInputStream.get(data, tikaMetadata)) {
bucketMetadata.setContentType(TIKA_DETECTOR.detect(bis, tikaMetadata).toString());
s3.putObject(bucket, resolveKey(path), bis, bucketMetadata);
}
}
示例6: walk
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
private boolean walk(Iterator<S3ObjectSummary> iter, ObjectId file, String path) throws IOException {
byte[] content;
byte[] newHash;
LOG.debug("Start processing file: {}", path);
try (DigestInputStream is = new DigestInputStream(repository.open(file).openStream(), DigestUtils.getMd5Digest())) {
// Get content
content = IOUtils.toByteArray(is);
// Get hash
newHash = is.getMessageDigest().digest();
}
if (isUploadFile(iter, path, Hex.encodeHexString(newHash))) {
LOG.info("Uploading file: {}", path);
ObjectMetadata bucketMetadata = new ObjectMetadata();
bucketMetadata.setContentMD5(Base64.encodeAsString(newHash));
bucketMetadata.setContentLength(content.length);
// Give Tika a few hints for the content detection
Metadata tikaMetadata = new Metadata();
tikaMetadata.set(Metadata.RESOURCE_NAME_KEY, FilenameUtils.getName(FilenameUtils.normalize(path)));
// Fire!
try (InputStream bis = TikaInputStream.get(content, tikaMetadata)) {
bucketMetadata.setContentType(TIKA_DETECTOR.detect(bis, tikaMetadata).toString());
s3.putObject(bucket.getName(), path, bis, bucketMetadata);
return true;
}
}
LOG.info("Skipping file (same checksum): {}", path);
return false;
}
示例7: detectMimeType
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
public static String detectMimeType(final String filePath) {
TikaInputStream tikaIS = null;
try {
File targetFile = new File(filePath);
tikaIS = TikaInputStream.get(targetFile);
/*
* You might not want to provide the file's name. If you provide an Excel
* document with a .xls extension, it will get it correct right away; but
* if you provide an Excel document with .doc extension, it will guess it
* to be a Word document
*/
final Metadata metadata = new Metadata();
metadata.set(Metadata.RESOURCE_NAME_KEY, targetFile.getName());
return DETECTOR.detect(tikaIS, metadata).toString();
} catch (IOException ex) {
return "UNKNOWN";
} finally {
if (tikaIS != null) {
try {
tikaIS.close();
} catch (IOException e) {
Timber.d(e, e.getMessage(), LOG_TAG);
}
}
}
}
示例8: withInputMetadata
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
/** Sets the input metadata for {@link Parser#parse}. */
public ParseFiles withInputMetadata(Metadata metadata) {
Metadata inputMetadata = this.getInputMetadata();
if (inputMetadata != null) {
for (String name : metadata.names()) {
inputMetadata.set(name, metadata.get(name));
}
} else {
inputMetadata = metadata;
}
return toBuilder().setInputMetadata(inputMetadata).build();
}
示例9: processElement
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
@ProcessElement
public void processElement(ProcessContext c) {
ParseResult result = c.element();
Metadata m = new Metadata();
// Files contain many metadata properties. This function drops all but the "Author"
// property manually added to "apache-beam-tika.odt" resource only to make
// the tests simpler
if (result.getFileLocation().endsWith("valid/apache-beam-tika.odt")) {
m.set("Author", result.getMetadata().get("Author"));
}
ParseResult newResult = ParseResult.success(result.getFileLocation(), result.getContent(), m);
c.output(newResult);
}
示例10: get
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
@GetMapping("/**")
@ResponseBody
public void get(HttpServletRequest request, HttpServletResponse response) throws IOException {
String path = request.getServletPath();
log.info("GET " + path);
if (path.equals("/")) {
path = "/index.html";
}
File f = ResourceManager.getResource(path.substring(1));
boolean isInPublic = f.getAbsolutePath().startsWith(ResourceManager.PUBLIC_DIR.getAbsolutePath());
//Verify that the file requested is in a public directory
if (!f.getParentFile().getAbsolutePath().equals(ResourceManager.getDataDir().getAbsolutePath())
&& !isInPublic) {
throw new FileNotFoundException();
}
if (!f.exists()) {
throw new FileNotFoundException();
}
Metadata metadata = new Metadata();
metadata.set(Metadata.RESOURCE_NAME_KEY, f.toString());
MediaType mediaType = tika.getDetector().detect(
TikaInputStream.get(f.toPath()), metadata);
response.setContentType(mediaType.toString());
response.setContentLengthLong(f.length());
IOUtils.copy(new FileInputStream(f), response.getOutputStream());
response.flushBuffer();
}
示例11: parse
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
@Override
public void parse(final InputStream stream, final ContentHandler handler, final Metadata metadata,
final ParseContext context) throws SAXException, IOException, TikaException {
final Parser parser;
final long size;
String value = metadata.get(Metadata.CONTENT_LENGTH);
if (null != value && !value.isEmpty()) {
size = Long.valueOf(value);
} else {
try (final TikaInputStream tis = TikaInputStream.get(stream)) {
size = tis.getLength();
}
metadata.set(Metadata.CONTENT_LENGTH, Long.toString(size));
}
// If the file is not empty, throw a parse error.
// Otherwise, output an empty document.
if (size > 0) {
parser = ErrorParser.INSTANCE;
} else {
metadata.set(Metadata.CONTENT_TYPE, "application/octet-stream");
parser = EmptyParser.INSTANCE;
}
parser.parse(stream, handler, metadata, context);
}
示例12: getMetadata
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
/**
* Utility method that returns a {@link Metadata} instance for a document with the given name.
*
* @param name resource name (or <code>null</code>)
* @return metadata instance
*/
private static Metadata getMetadata(final String name) {
final Metadata metadata = new Metadata();
if (name != null && name.length() > 0) {
metadata.set(Metadata.RESOURCE_NAME_KEY, name);
}
return metadata;
}
示例13: writeEmbed
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
private void writeEmbed(final TikaInputStream tis, final EmbeddedDocument embed, final String name) throws IOException {
final Path destination = outputPath.resolve(embed.getHash());
final Path source;
final Metadata metadata = embed.getMetadata();
final Object container = tis.getOpenContainer();
// If the input is a container, write it to a temporary file so that it can then be copied atomically.
// This happens with, for example, an Outlook Message that is an attachment of another Outlook Message.
if (container instanceof DirectoryEntry) {
try (final TemporaryResources tmp = new TemporaryResources();
final POIFSFileSystem fs = new POIFSFileSystem()) {
source = tmp.createTempFile();
saveEntries((DirectoryEntry) container, fs.getRoot());
try (final OutputStream output = Files.newOutputStream(source)) {
fs.writeFilesystem(output);
}
}
} else {
source = tis.getPath();
}
// Set the content-length as it isn't (always?) set by Tika for embeds.
if (null == metadata.get(Metadata.CONTENT_LENGTH)) {
metadata.set(Metadata.CONTENT_LENGTH, Long.toString(Files.size(source)));
}
// To prevent massive duplication and because the disk is only a storage for underlying data, save using the
// straight hash as a filename.
try {
Files.copy(source, destination);
} catch (final FileAlreadyExistsException e) {
if (Files.size(source) != Files.size(destination)) {
Files.copy(source, destination, StandardCopyOption.REPLACE_EXISTING);
} else {
logger.info("Temporary file for document \"{}\" in \"{}\" already exists.", name, root);
}
}
}
示例14: generateHTML
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
/**
* Asks Tika to translate the contents into HTML
*/
private void generateHTML(Parser p, RenderingContext context)
{
ContentReader contentReader = context.makeContentReader();
// Setup things to parse with
StringWriter sw = new StringWriter();
ContentHandler handler = buildContentHandler(sw, context);
// Tell Tika what we're dealing with
Metadata metadata = new Metadata();
metadata.set(
Metadata.CONTENT_TYPE,
contentReader.getMimetype()
);
metadata.set(
Metadata.RESOURCE_NAME_KEY,
nodeService.getProperty(
context.getSourceNode(),
ContentModel.PROP_NAME
).toString()
);
if (metadataExtracterConfig != null)
{
metadataExtracterConfig.prepareMetadataWithConfigParams(metadata);
}
// Our parse context needs to extract images
ParseContext parseContext = new ParseContext();
parseContext.set(Parser.class, new TikaImageExtractingParser(context));
// Parse
try {
p.parse(
contentReader.getContentInputStream(),
handler, metadata, parseContext
);
} catch(Exception e) {
throw new RenditionServiceException("Tika HTML Conversion Failed", e);
}
// As a string
String html = sw.toString();
// If we're doing body-only, remove all the html namespaces
// that will otherwise clutter up the document
boolean bodyOnly = context.getParamWithDefault(PARAM_BODY_CONTENTS_ONLY, false);
if(bodyOnly) {
html = html.replaceAll("<\\?xml.*?\\?>", "");
html = html.replaceAll("<p xmlns=\"http://www.w3.org/1999/xhtml\"","<p");
html = html.replaceAll("<h(\\d) xmlns=\"http://www.w3.org/1999/xhtml\"","<h\\1");
html = html.replaceAll("<div xmlns=\"http://www.w3.org/1999/xhtml\"","<div");
html = html.replaceAll("<table xmlns=\"http://www.w3.org/1999/xhtml\"","<table");
html = html.replaceAll(" ","");
}
// Save it
ContentWriter contentWriter = context.makeContentWriter();
contentWriter.setMimetype("text/html");
contentWriter.putContent( html );
}
示例15: testSupportedMimetypes
import org.apache.tika.metadata.Metadata; //導入方法依賴的package包/類
/**
* Test several different files
* Note - doesn't use extractFromMimetype
*/
public void testSupportedMimetypes() throws Exception
{
String[] testFiles = new String[] {
".doc", ".docx", ".xls", ".xlsx",
".ppt", ".pptx",
//".vsd", // Our sample file lacks suitable metadata
"2010.dwg",
"2003.mpp", "2007.mpp",
".pdf",
".odt",
};
AutoDetectParser ap = new AutoDetectParser();
for (String fileBase : testFiles)
{
String filename = "quick" + fileBase;
URL url = AbstractContentTransformerTest.class.getClassLoader().getResource("quick/" + filename);
File file = new File(url.getFile());
// Cheat and ask Tika for the mime type!
Metadata metadata = new Metadata();
metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
MediaType mt = ap.getDetector().detect(TikaInputStream.get(file), metadata);
String mimetype = mt.toString();
if (logger.isDebugEnabled())
{
logger.debug("Detected mimetype " + mimetype + " for quick test file " + filename);
}
// Have it processed
Map<QName, Serializable> properties = extractFromFile(file, mimetype);
// check we got something
assertFalse("extractFromMimetype should return at least some properties, " +
"none found for " + mimetype + " - " + filename,
properties.isEmpty());
// check common metadata
testCommonMetadata(mimetype, properties);
// check file-type specific metadata
testFileSpecificMetadata(mimetype, properties);
}
}