本文整理匯總了Java中org.apache.tika.metadata.Metadata類的典型用法代碼示例。如果您正苦於以下問題:Java Metadata類的具體用法?Java Metadata怎麽用?Java Metadata使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
Metadata類屬於org.apache.tika.metadata包,在下文中一共展示了Metadata類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: findMediaType
import org.apache.tika.metadata.Metadata; //導入依賴的package包/類
/**
* Finds media type (through Apache Tika library), based on filename and magic numbers.
* @throws IOException
*/
public static MediaType findMediaType(InputStream is, String fileName) throws IOException {
BufferedInputStream bis = new BufferedInputStream(is);
try {
AutoDetectParser parser = new AutoDetectParser();
Detector detector = parser.getDetector();
Metadata md = new Metadata();
md.add(Metadata.RESOURCE_NAME_KEY, fileName);
MediaType mediaType = detector.detect(bis, md);
return mediaType;
} finally {
try {
bis.close();
} catch (IOException e) {
;
}
}
}
示例2: extractText
import org.apache.tika.metadata.Metadata; //導入依賴的package包/類
@Override
public void extractText(String mimeType, InputStream input, StringBuilder outputText, int maxSize)
throws IOException
{
try
{
Metadata meta = new Metadata();
ContentHandler handler = new BodyContentHandler();
Parser parser = new AutoDetectParser(new TikaConfig(getClass().getClassLoader()));
parser.parse(input, handler, meta, new ParseContext());
String content = handler.toString();
if( content.length() > maxSize )
{
content = content.substring(0, maxSize);
}
outputText.append(content);
if( LOGGER.isDebugEnabled() )
{
LOGGER.debug("Word Summary:" + content); //$NON-NLS-1$
}
}
catch( Exception e )
{
throw new RuntimeException(e);
}
}
示例3: testDisableOcrOnEmbed
import org.apache.tika.metadata.Metadata; //導入依賴的package包/類
@Test
public void testDisableOcrOnEmbed() throws Throwable {
final Extractor extractor = new Extractor();
extractor.disableOcr();
final Document document = factory.create(getClass().getResource("/documents/ocr/embedded.pdf"));
String text;
try (final Reader reader = extractor.extract(document)) {
text = Spewer.toString(reader);
}
Assert.assertEquals("application/pdf", document.getMetadata().get(Metadata.CONTENT_TYPE));
Assert.assertEquals("\n\n\n\n", text);
}
示例4: convertWordDocumentIntoHtml
import org.apache.tika.metadata.Metadata; //導入依賴的package包/類
/**
* Converts a .docx document into HTML markup. This code
* is based on <a href="http://stackoverflow.com/a/9053258/313554">this StackOverflow</a> answer.
*
* @param wordDocument The converted .docx document.
* @return
*/
public ConvertedDocumentDTO convertWordDocumentIntoHtml(MultipartFile wordDocument) {
LOGGER.info("Converting word document: {} into HTML", wordDocument.getOriginalFilename());
try {
InputStream input = wordDocument.getInputStream();
Parser parser = new OOXMLParser();
StringWriter sw = new StringWriter();
SAXTransformerFactory factory = (SAXTransformerFactory)
SAXTransformerFactory.newInstance();
TransformerHandler handler = factory.newTransformerHandler();
handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, "utf-8");
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html");
handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
handler.setResult(new StreamResult(sw));
Metadata metadata = new Metadata();
metadata.add(Metadata.CONTENT_TYPE, "text/html;charset=utf-8");
parser.parse(input, handler, metadata, new ParseContext());
return new ConvertedDocumentDTO(wordDocument.getOriginalFilename(), sw.toString());
}
catch (IOException | SAXException | TransformerException | TikaException ex) {
LOGGER.error("Conversion failed because an exception was thrown", ex);
throw new DocumentConversionException(ex.getMessage(), ex);
}
}
示例5: parse
import org.apache.tika.metadata.Metadata; //導入依賴的package包/類
/**
* parses with tika, throwing any exception hit while parsing the document
*/
// only package private for testing!
static String parse(final byte content[], final Metadata metadata, final int limit) throws TikaException, IOException {
// check that its not unprivileged code like a script
SpecialPermission.check();
try {
return AccessController.doPrivileged((PrivilegedExceptionAction<String>)
() -> TIKA_INSTANCE.parseToString(new ByteArrayInputStream(content), metadata, limit), RESTRICTED_CONTEXT);
} catch (PrivilegedActionException e) {
// checked exception from tika: unbox it
Throwable cause = e.getCause();
if (cause instanceof TikaException) {
throw (TikaException) cause;
} else if (cause instanceof IOException) {
throw (IOException) cause;
} else {
throw new AssertionError(cause);
}
}
}
示例6: buildParseContext
import org.apache.tika.metadata.Metadata; //導入依賴的package包/類
@Override
protected ParseContext buildParseContext(Metadata metadata,
String targetMimeType, TransformationOptions options) {
ParseContext context = super.buildParseContext(metadata, targetMimeType, options);
boolean recurse = includeContents;
if(options.getIncludeEmbedded() != null)
{
recurse = options.getIncludeEmbedded();
}
if(recurse)
{
// Use an auto detect parser to handle the contents
if(tikaConfig == null)
{
tikaConfig = TikaConfig.getDefaultConfig();
}
context.set(Parser.class, new AutoDetectParser(tikaConfig));
}
return context;
}
示例7: extractSpecific
import org.apache.tika.metadata.Metadata; //導入依賴的package包/類
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers)
{
// Most things can go with the default Tika -> Alfresco Mapping
// Handle the few special cases here
// The description is special
putRawValue(KEY_DESCRIPTION, generateDescription(metadata), properties);
// The release date can be fiddly
Date releaseDate = generateReleaseDate(metadata);
putRawValue(KEY_CREATED, releaseDate, properties);
putRawValue(XMPDM.RELEASE_DATE.getName(), releaseDate, properties);
// TODO Get the Lyrics from the content
//putRawValue(KEY_LYRICS, getLyrics(), properties);
// All done
return properties;
}
示例8: generateReleaseDate
import org.apache.tika.metadata.Metadata; //導入依賴的package包/類
/**
* Generates the release date
*/
private Date generateReleaseDate(Metadata metadata)
{
String date = metadata.get(XMPDM.RELEASE_DATE);
if(date == null || date.length() == 0)
{
return null;
}
// Is it just a year?
if(date.matches("\\d\\d\\d\\d"))
{
// Just a year, we need a full date
// Go for the 1st of the 1st
Calendar c = Calendar.getInstance();
c.set(
Integer.parseInt(date), Calendar.JANUARY, 1,
0, 0, 0
);
c.set(Calendar.MILLISECOND, 0);
return c.getTime();
}
// Treat as a normal date
return makeDate(date);
}
示例9: generateDescription
import org.apache.tika.metadata.Metadata; //導入依賴的package包/類
/**
* Generate the description
*
* @param metadata the metadata extracted from the file
* @return the description
*/
@SuppressWarnings("deprecation")
private String generateDescription(Metadata metadata)
{
StringBuilder result = new StringBuilder();
if (metadata.get(Metadata.TITLE) != null)
{
result.append(metadata.get(Metadata.TITLE));
if (metadata.get(XMPDM.ALBUM) != null)
{
result
.append(" - ")
.append(metadata.get(XMPDM.ALBUM));
}
if (metadata.get(XMPDM.ARTIST) != null)
{
result
.append(" (")
.append(metadata.get(XMPDM.ARTIST))
.append(")");
}
}
return result.toString();
}
示例10: extractSpecific
import org.apache.tika.metadata.Metadata; //導入依賴的package包/類
@SuppressWarnings("deprecation")
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers)
{
putRawValue(KEY_CREATE_DATETIME, metadata.get(Metadata.CREATION_DATE), properties);
putRawValue(KEY_LAST_SAVE_DATETIME, metadata.get(Metadata.LAST_SAVED), properties);
putRawValue(KEY_EDIT_TIME, metadata.get(Metadata.EDIT_TIME), properties);
putRawValue(KEY_FORMAT, metadata.get(Metadata.FORMAT), properties);
putRawValue(KEY_KEYWORDS, metadata.get(Metadata.KEYWORDS), properties);
putRawValue(KEY_LAST_AUTHOR, metadata.get(Metadata.LAST_AUTHOR), properties);
putRawValue(KEY_LAST_PRINTED, metadata.get(Metadata.LAST_PRINTED), properties);
// putRawValue(KEY_OS_VERSION, metadata.get(Metadata.OS_VERSION), properties);
// putRawValue(KEY_THUMBNAIL, metadata.get(Metadata.THUMBNAIL), properties);
putRawValue(KEY_PAGE_COUNT, metadata.get(Metadata.PAGE_COUNT), properties);
putRawValue(KEY_PARAGRAPH_COUNT, metadata.get(Metadata.PARAGRAPH_COUNT), properties);
putRawValue(KEY_WORD_COUNT, metadata.get(Metadata.WORD_COUNT), properties);
return properties;
}
示例11: extractSpecific
import org.apache.tika.metadata.Metadata; //導入依賴的package包/類
@SuppressWarnings("deprecation")
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers)
{
putRawValue(KEY_ORIGINATOR, metadata.get(Metadata.AUTHOR), properties);
putRawValue(KEY_SUBJECT, metadata.get(Metadata.TITLE), properties);
putRawValue(KEY_DESCRIPTION, metadata.get(Metadata.SUBJECT), properties);
putRawValue(KEY_SENT_DATE, metadata.get(Metadata.LAST_SAVED), properties);
// Store the TO, but not cc/bcc in the addressee field
putRawValue(KEY_ADDRESSEE, metadata.get(Metadata.MESSAGE_TO), properties);
// Store each of To, CC and BCC in their own fields
putRawValue(KEY_TO_NAMES, metadata.getValues(Metadata.MESSAGE_TO), properties);
putRawValue(KEY_CC_NAMES, metadata.getValues(Metadata.MESSAGE_CC), properties);
putRawValue(KEY_BCC_NAMES, metadata.getValues(Metadata.MESSAGE_BCC), properties);
// But store all email addresses (to/cc/bcc) in the addresses field
putRawValue(KEY_ADDRESSEES, metadata.getValues(Metadata.MESSAGE_RECIPIENT_ADDRESS), properties);
return properties;
}
示例12: extractSpecific
import org.apache.tika.metadata.Metadata; //導入依賴的package包/類
@SuppressWarnings("deprecation")
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers)
{
// Do the normal Audio mappings
super.extractSpecific(metadata, properties, headers);
// Now do the compatibility ones
// We only need these for people who had pre-existing mapping
// properties from before the proper audio model was added
putRawValue(KEY_ALBUM_TITLE, metadata.get(XMPDM.ALBUM), properties);
putRawValue(KEY_SONG_TITLE, metadata.get(Metadata.TITLE), properties);
putRawValue(KEY_ARTIST, metadata.get(XMPDM.ARTIST), properties);
putRawValue(KEY_COMMENT, metadata.get(XMPDM.LOG_COMMENT), properties);
putRawValue(KEY_TRACK_NUMBER, metadata.get(XMPDM.TRACK_NUMBER), properties);
putRawValue(KEY_GENRE, metadata.get(XMPDM.GENRE), properties);
putRawValue(KEY_YEAR_RELEASED, metadata.get(XMPDM.RELEASE_DATE), properties);
putRawValue(KEY_COMPOSER, metadata.get(XMPDM.COMPOSER), properties);
// All done
return properties;
}
示例13: getMetadataValues
import org.apache.tika.metadata.Metadata; //導入依賴的package包/類
private Serializable getMetadataValues(Metadata metadata, String key)
{
// Use Set to prevent duplicates.
Set<String> valuesSet = new LinkedHashSet<String>();
String[] values = metadata.getValues(key);
for (int i = 0; i < values.length; i++)
{
String[] parts = values[i].split(metadataSeparator);
for (String subPart : parts)
{
valuesSet.add(subPart.trim());
}
}
Object[] objArrayValues = valuesSet.toArray();
values = Arrays.copyOf(objArrayValues, objArrayValues.length, String[].class);
return values.length == 0 ? null : (values.length == 1 ? values[0] : values);
}
示例14: getMetadataValue
import org.apache.tika.metadata.Metadata; //導入依賴的package包/類
private String getMetadataValue(Metadata metadata, String key)
{
if (metadata.isMultiValued(key))
{
String[] parts = metadata.getValues(key);
// use Set to prevent duplicates
Set<String> value = new LinkedHashSet<String>(parts.length);
for (int i = 0; i < parts.length; i++)
{
value.add(parts[i]);
}
String valueStr = value.toString();
// remove leading/trailing braces []
return valueStr.substring(1, valueStr.length() - 1);
}
else
{
return metadata.get(key);
}
}
示例15: extractSpecific
import org.apache.tika.metadata.Metadata; //導入依賴的package包/類
/**
* Because some editors use JPEG_IMAGE_HEIGHT_TAG when
* saving JPEG images , a more reliable source for
* image size are the values provided by Tika
* and not the exif/tiff metadata read from the file
* This will override the tiff:Image size
* which gets embedded into the alfresco node properties
* for jpeg files that contain such exif information
*/
@Override
protected Map<String, Serializable> extractSpecific(Metadata metadata,
Map<String, Serializable> properties, Map<String,String> headers)
{
if(MimetypeMap.MIMETYPE_IMAGE_JPEG.equals(metadata.get(Metadata.CONTENT_TYPE)))
{
//check if the image has exif information
if(metadata.get(EXIF_IMAGE_WIDTH_TAG) != null && metadata.get(EXIF_IMAGE_HEIGHT_TAG) != null )
{
//replace the exif size properties that will be embedded in the node with
//the guessed dimensions from Tika
putRawValue(TIFF.IMAGE_LENGTH.getName(), extractSize(metadata.get(JPEG_IMAGE_HEIGHT_TAG)), properties);
putRawValue(TIFF.IMAGE_WIDTH.getName(), extractSize(metadata.get(JPEG_IMAGE_WIDTH_TAG)), properties);
}
}
return properties;
}