本文整理汇总了Java中org.apache.tika.metadata.TikaMetadataKeys类的典型用法代码示例。如果您正苦于以下问题:Java TikaMetadataKeys类的具体用法?Java TikaMetadataKeys怎么用?Java TikaMetadataKeys使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
TikaMetadataKeys类属于org.apache.tika.metadata包,在下文中一共展示了TikaMetadataKeys类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: processSheet
import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
public void processSheet(SheetContentsHandler sheetContentsExtractor,
StylesTable styles, ReadOnlySharedStringsTable strings,
InputStream sheetInputStream) throws IOException, SAXException {
InputSource sheetSource = new InputSource(sheetInputStream);
SAXParserFactory saxFactory = SAXParserFactory.newInstance();
try {
SAXParser saxParser = saxFactory.newSAXParser();
XMLReader sheetParser = saxParser.getXMLReader();
XSSFSheetInterestingPartsCapturer handler = new XSSFSheetInterestingPartsCapturer(
new XSSFSheetXMLHandler(styles, strings, sheetContentsExtractor,
formatter, false));
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
sheetInputStream.close();
if (handler.hasProtection) {
metadata.set(TikaMetadataKeys.PROTECTED, "true");
}
} catch (ParserConfigurationException e) {
throw new RuntimeException("SAX parser appears to be broken - "
+ e.getMessage());
}
}
示例2: testProtectedExcelSheets
import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
/**
* Documents with some sheets are protected, but not all.
* See TIKA-364.
*/
@Test
public void testProtectedExcelSheets() throws Exception {
InputStream input = OOXMLParserTest.class
.getResourceAsStream("/test-documents/protectedSheets.xlsx");
Parser parser = new AutoDetectParser();
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
ParseContext context = new ParseContext();
try {
parser.parse(input, handler, metadata, context);
assertEquals(
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("true", metadata.get(TikaMetadataKeys.PROTECTED));
} finally {
input.close();
}
}
示例3: testProtectedExcelFile
import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
/**
* An excel document which is password protected.
* See TIKA-437.
*/
@Test
public void testProtectedExcelFile() throws Exception {
Parser parser = new AutoDetectParser();
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
ParseContext context = new ParseContext();
InputStream input = getTestDocument("protectedFile.xlsx");
try {
parser.parse(input, handler, metadata, context);
assertEquals(
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("true", metadata.get(TikaMetadataKeys.PROTECTED));
String content = handler.toString();
assertTrue(content.contains("Office"));
} finally {
input.close();
}
}
示例4: detectContentType
import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
public static String detectContentType(String uri) throws FileNotFoundException, IOException, TikaException {
final Detector detector = config.getDetector();
final TikaInputStream inputStream = createInputStream(uri);
final Metadata metadata = new Metadata();
// Set the file name. This provides some level of type-hinting.
metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName());
// Detect the content type.
String contentType = detector.detect(inputStream, metadata).toString();
inputStream.close();
// Return the default content-type if undetermined.
if (contentType == null || contentType.isEmpty()) {
return MediaType.OCTET_STREAM.toString();
}
return contentType;
}
示例5: prepareMetadataWithConfigParams
import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
@Override
public void prepareMetadataWithConfigParams(Metadata metadata)
{
if (metadata == null)
{
return;
}
boolean shouldParseShapes = getBooleanProperty(PARSE_SHAPE_PROP_STRING, TIKA_PARSER_PARSE_SHAPES_DEFAULT_VALUE);
metadata.add(TikaMetadataKeys.TIKA_PARSER_PARSE_SHAPES_KEY, Boolean.toString(shouldParseShapes));
if (logger.isDebugEnabled())
{
logger.debug("Tika metadata options passed to tika parser: " + metadata);
}
}
示例6: getPassword
import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
@Override
public String getPassword(Metadata meta) {
if(getExplicitPassword() != null) {
return getExplicitPassword();
}
if(passwordMap.size() > 0)
return lookupPasswordFromMap(meta.get(TikaMetadataKeys.RESOURCE_NAME_KEY));
return null;
}
示例7: from
import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
/**
* Detects the {@link MimeType} from the bytes stream
* @see http://tika.apache.org/1.4/detection.html
* @param is
* @param fileName
* @param contentType
* @return
* @throws IOException
*/
public static MimeType from(final InputStream is,
final String fileName,final String contentType) throws IOException {
Metadata md = new Metadata();
if (Strings.isNOTNullOrEmpty(fileName)) md.add(TikaMetadataKeys.RESOURCE_NAME_KEY ,fileName);
if (Strings.isNOTNullOrEmpty(contentType)) md.add(HttpHeaders.CONTENT_TYPE,contentType);
Tika tika = new Tika();
String mimeTypeStr = tika.detect(is,fileName);
return MimeType.forName(mimeTypeStr);
}
示例8: getXHTML
import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
@Override
public void getXHTML(ContentHandler handler, Metadata metadata,
ParseContext context) throws SAXException, XmlException, IOException,
TikaException {
this.metadata = metadata;
metadata.set(TikaMetadataKeys.PROTECTED, "false");
super.getXHTML(handler, metadata, context);
}
示例9: startElement
import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
binaryMode = ELEMENT_BINARY.equals(localName);
if (binaryMode) {
binaryData.setLength(0);
metadata = new Metadata();
metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, attributes.getValue(ATTRIBUTE_ID));
metadata.set(Metadata.CONTENT_TYPE, attributes.getValue(ATTRIBUTE_CONTENT_TYPE));
}
}
示例10: testExcel
import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
@Test
public void testExcel() throws Exception {
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
ParseContext context = new ParseContext();
context.set(Locale.class, Locale.US);
InputStream input = getTestDocument("testEXCEL.xlsx");
try {
parser.parse(input, handler, metadata, context);
assertEquals(
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Simple Excel document", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Keith Bennett", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
String content = handler.toString();
assertTrue(content.contains("Sample Excel Worksheet"));
assertTrue(content.contains("Numbers and their Squares"));
assertTrue(content.contains("9"));
assertFalse(content.contains("9.0"));
assertTrue(content.contains("196"));
assertFalse(content.contains("196.0"));
assertEquals("false", metadata.get(TikaMetadataKeys.PROTECTED));
} finally {
input.close();
}
}
示例11: fillMetadata
import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
private static void fillMetadata(Metadata metadata, String contentType, String uri) {
// Set the file name.
if (uri != null) {
metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName());
}
// Normalise the content-type.
contentType = normalizeContentType(contentType);
// Set the content-type.
if (contentType != null) {
metadata.add(HttpHeaders.CONTENT_TYPE, contentType);
}
}
示例12: detectContentTypeAndCharset
import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
public static String detectContentTypeAndCharset(String uri) throws FileNotFoundException, IOException, TikaException {
final Detector detector = config.getDetector();
final TikaInputStream inputStream = createInputStream(uri);
final Metadata metadata = new Metadata();
// Set the file name. This provides some level of type-hinting.
metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName());
// Detect the content type.
String contentType = detector.detect(inputStream, metadata).toString();
// Use metadata to provide type-hinting to the AutoDetectReader.
fillMetadata(metadata, contentType, uri);
// Detect the character set.
final AutoDetectReader reader = new AutoDetectReader(inputStream, metadata);
String charset = reader.getCharset().toString();
inputStream.close();
// Return the default content-type if undetermined.
if (contentType == null || contentType.isEmpty()) {
return MediaType.OCTET_STREAM.toString();
}
// Append the charset if the content-type was determined.
if (charset != null && !charset.isEmpty()) {
return contentType + "; charset=" + charset;
}
return contentType;
}
示例13: getTestFile
import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
protected static InputStream getTestFile(String name, Metadata metadata) throws Exception {
InputStream s = TestProjectParsers.class.getResourceAsStream("/test-files/" + name);
assertNotNull("Test file not found: " + name, s);
if (metadata != null) {
metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, name);
}
return s;
}
示例14: transformDOC2HTML
import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
private StringWithEncoding transformDOC2HTML(File resourceFile,
EntityManager entityManager) throws IOException, SAXException,
TikaException, TransformerConfigurationException {
ParseContext context = new ParseContext();
Parser parser = new AutoDetectParser();
context.set(Parser.class, parser);
Metadata metadata = new Metadata();
Writer writer = null;
if (resourceFile.isFile()) {
metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, resourceFile
.getName());
InputStream input = new FileInputStream(resourceFile);
try {
writer = new StringWriter();
parser.parse(input, this.HTML.getContentHandler(null, writer),
metadata, context);
} finally {
input.close();
if (writer != null) {
writer.close();
}
}
CharsetDetector charsetDetector = new CharsetDetector();
charsetDetector.setText(writer.toString().getBytes());
String encoding = charsetDetector.detect().getName();
StringWithEncoding stringWithEncoding = new StringWithEncoding(
writer.toString(), encoding);
return stringWithEncoding;
}
return null;
}
示例15: extractInfo
import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
public HashMap<String, String> extractInfo(File inputFile)
throws FileNotFoundException, IOException {
HashMap<String, String> rc = new HashMap<String, String>();
// File inputFile = new File(fileName);
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(
inputFile));
Parser p = getAutoDetectParser();
StringWriter writer = new StringWriter();
Metadata metadata = new Metadata();
metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, inputFile.getName()); // fileName
if (!parse(p, bis, writer, metadata))
return null;
for (String name : metadata.names()) {
rc.put(name, metadata.get(name));
// System.out.println(name + "->" + metadata.get(name));
}
bis.close();
return rc;
// return writer.toString();
}