当前位置: 首页>>代码示例>>Java>>正文


Java TikaMetadataKeys类代码示例

本文整理汇总了Java中org.apache.tika.metadata.TikaMetadataKeys的典型用法代码示例。如果您正苦于以下问题:Java TikaMetadataKeys类的具体用法?Java TikaMetadataKeys怎么用?Java TikaMetadataKeys使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


TikaMetadataKeys类属于org.apache.tika.metadata包,在下文中一共展示了TikaMetadataKeys类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: processSheet

import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
public void processSheet(SheetContentsHandler sheetContentsExtractor,
    StylesTable styles, ReadOnlySharedStringsTable strings,
    InputStream sheetInputStream) throws IOException, SAXException {
  InputSource sheetSource = new InputSource(sheetInputStream);
  SAXParserFactory saxFactory = SAXParserFactory.newInstance();
  try {
    SAXParser saxParser = saxFactory.newSAXParser();
    XMLReader sheetParser = saxParser.getXMLReader();
    XSSFSheetInterestingPartsCapturer handler = new XSSFSheetInterestingPartsCapturer(
        new XSSFSheetXMLHandler(styles, strings, sheetContentsExtractor,
            formatter, false));
    sheetParser.setContentHandler(handler);
    sheetParser.parse(sheetSource);
    sheetInputStream.close();

    if (handler.hasProtection) {
      metadata.set(TikaMetadataKeys.PROTECTED, "true");
    }
  } catch (ParserConfigurationException e) {
    throw new RuntimeException("SAX parser appears to be broken - "
        + e.getMessage());
  }
}
 
开发者ID:kolbasa,项目名称:OCRaptor,代码行数:24,代码来源:XSSFExcelExtractorDecorator.java

示例2: testProtectedExcelSheets

import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
/**
 * Documents with some sheets are protected, but not all. 
 * See TIKA-364.
 */
@Test
public void testProtectedExcelSheets() throws Exception {
    InputStream input = OOXMLParserTest.class
            .getResourceAsStream("/test-documents/protectedSheets.xlsx");

    Parser parser = new AutoDetectParser();
    Metadata metadata = new Metadata();
    ContentHandler handler = new BodyContentHandler();
    ParseContext context = new ParseContext();

    try {
        parser.parse(input, handler, metadata, context);

        assertEquals(
                "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                metadata.get(Metadata.CONTENT_TYPE));

        assertEquals("true", metadata.get(TikaMetadataKeys.PROTECTED));
    } finally {
        input.close();
    }
}
 
开发者ID:kanrourou,项目名称:software-testing,代码行数:27,代码来源:OOXMLParserTest.java

示例3: testProtectedExcelFile

import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
/**
 * An excel document which is password protected. 
 * See TIKA-437.
 */
@Test
public void testProtectedExcelFile() throws Exception {

    Parser parser = new AutoDetectParser();
    Metadata metadata = new Metadata();
    ContentHandler handler = new BodyContentHandler();
    ParseContext context = new ParseContext();

    InputStream input = getTestDocument("protectedFile.xlsx");
    try {
        parser.parse(input, handler, metadata, context);

        assertEquals(
                "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                metadata.get(Metadata.CONTENT_TYPE));

        assertEquals("true", metadata.get(TikaMetadataKeys.PROTECTED));
        
        String content = handler.toString();
        assertTrue(content.contains("Office"));
    } finally {
        input.close();
    }
}
 
开发者ID:kanrourou,项目名称:software-testing,代码行数:29,代码来源:OOXMLParserTest.java

示例4: detectContentType

import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
public static String detectContentType(String uri) throws FileNotFoundException, IOException, TikaException {
	final Detector detector = config.getDetector();
	final TikaInputStream inputStream = createInputStream(uri);
	final Metadata metadata = new Metadata();

	// Set the file name. This provides some level of type-hinting.
	metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName());

	// Detect the content type.
	String contentType = detector.detect(inputStream, metadata).toString();

	inputStream.close();

	// Return the default content-type if undetermined.
	if (contentType == null || contentType.isEmpty()) {
		return MediaType.OCTET_STREAM.toString();
	}

	return contentType;
}
 
开发者ID:ICIJ,项目名称:node-tika,代码行数:21,代码来源:NodeTika.java

示例5: prepareMetadataWithConfigParams

import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
@Override
public void prepareMetadataWithConfigParams(Metadata metadata)
{
    if (metadata == null)
    {
        return;
    }
    boolean shouldParseShapes = getBooleanProperty(PARSE_SHAPE_PROP_STRING, TIKA_PARSER_PARSE_SHAPES_DEFAULT_VALUE);
    metadata.add(TikaMetadataKeys.TIKA_PARSER_PARSE_SHAPES_KEY, Boolean.toString(shouldParseShapes));
    
    if (logger.isDebugEnabled())
    {
        logger.debug("Tika metadata options passed to tika parser: " + metadata);
    }
}
 
开发者ID:Alfresco,项目名称:alfresco-repository,代码行数:16,代码来源:MetadataExtracterConfigImpl.java

示例6: getPassword

import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
@Override
public String getPassword(Metadata meta) {
  if(getExplicitPassword() != null) {
    return getExplicitPassword();
  }
  
  if(passwordMap.size() > 0)
    return lookupPasswordFromMap(meta.get(TikaMetadataKeys.RESOURCE_NAME_KEY));
  
  return null;
}
 
开发者ID:europeana,项目名称:search,代码行数:12,代码来源:RegexRulesPasswordProvider.java

示例7: from

import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
/**
 * Detects the {@link MimeType} from the bytes stream
 * @see http://tika.apache.org/1.4/detection.html
 * @param is
 * @param fileName
 * @param contentType
 * @return
 * @throws IOException
 */
public static MimeType from(final InputStream is,
							final String fileName,final String contentType) throws IOException {
	Metadata md = new Metadata();
	if (Strings.isNOTNullOrEmpty(fileName)) md.add(TikaMetadataKeys.RESOURCE_NAME_KEY ,fileName);
	if (Strings.isNOTNullOrEmpty(contentType)) md.add(HttpHeaders.CONTENT_TYPE,contentType);
	
	Tika tika = new Tika();
	String mimeTypeStr = tika.detect(is,fileName);
	
	return MimeType.forName(mimeTypeStr);
}
 
开发者ID:opendata-euskadi,项目名称:r01fb,代码行数:21,代码来源:MimeType.java

示例8: getXHTML

import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
@Override
public void getXHTML(ContentHandler handler, Metadata metadata,
    ParseContext context) throws SAXException, XmlException, IOException,
    TikaException {

  this.metadata = metadata;
  metadata.set(TikaMetadataKeys.PROTECTED, "false");

  super.getXHTML(handler, metadata, context);
}
 
开发者ID:kolbasa,项目名称:OCRaptor,代码行数:11,代码来源:XSSFExcelExtractorDecorator.java

示例9: startElement

import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
    binaryMode = ELEMENT_BINARY.equals(localName);
    if (binaryMode) {
        binaryData.setLength(0);
        metadata = new Metadata();

        metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, attributes.getValue(ATTRIBUTE_ID));
        metadata.set(Metadata.CONTENT_TYPE, attributes.getValue(ATTRIBUTE_CONTENT_TYPE));
    }
}
 
开发者ID:kolbasa,项目名称:OCRaptor,代码行数:12,代码来源:FictionBookParser.java

示例10: testExcel

import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
@Test
public void testExcel() throws Exception {
    Metadata metadata = new Metadata(); 
    ContentHandler handler = new BodyContentHandler();
    ParseContext context = new ParseContext();
    context.set(Locale.class, Locale.US);

    InputStream input = getTestDocument("testEXCEL.xlsx");
    try {
        parser.parse(input, handler, metadata, context);

        assertEquals(
                "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                metadata.get(Metadata.CONTENT_TYPE));
        assertEquals("Simple Excel document", metadata.get(TikaCoreProperties.TITLE));
        assertEquals("Keith Bennett", metadata.get(TikaCoreProperties.CREATOR));
        assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
        String content = handler.toString();
        assertTrue(content.contains("Sample Excel Worksheet"));
        assertTrue(content.contains("Numbers and their Squares"));
        assertTrue(content.contains("9"));
        assertFalse(content.contains("9.0"));
        assertTrue(content.contains("196"));
        assertFalse(content.contains("196.0"));
        assertEquals("false", metadata.get(TikaMetadataKeys.PROTECTED));
    } finally {
        input.close();
    }
}
 
开发者ID:kanrourou,项目名称:software-testing,代码行数:30,代码来源:OOXMLParserTest.java

示例11: fillMetadata

import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
private static void fillMetadata(Metadata metadata, String contentType, String uri) {

		// Set the file name.
		if (uri != null) {
			metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName());
		}

		// Normalise the content-type.
		contentType = normalizeContentType(contentType);

		// Set the content-type.
		if (contentType != null) {
			metadata.add(HttpHeaders.CONTENT_TYPE, contentType);
		}
	}
 
开发者ID:ICIJ,项目名称:node-tika,代码行数:16,代码来源:NodeTika.java

示例12: detectContentTypeAndCharset

import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
public static String detectContentTypeAndCharset(String uri) throws FileNotFoundException, IOException, TikaException {
	final Detector detector = config.getDetector();
	final TikaInputStream inputStream = createInputStream(uri);
	final Metadata metadata = new Metadata();

	// Set the file name. This provides some level of type-hinting.
	metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName());

	// Detect the content type.
	String contentType = detector.detect(inputStream, metadata).toString();

	// Use metadata to provide type-hinting to the AutoDetectReader.
	fillMetadata(metadata, contentType, uri);

	// Detect the character set.
	final AutoDetectReader reader = new AutoDetectReader(inputStream, metadata);
	String charset = reader.getCharset().toString();

	inputStream.close();

	// Return the default content-type if undetermined.
	if (contentType == null || contentType.isEmpty()) {
		return MediaType.OCTET_STREAM.toString();
	}

	// Append the charset if the content-type was determined.
	if (charset != null && !charset.isEmpty()) {
		return contentType + "; charset=" + charset;
	}

	return contentType;
}
 
开发者ID:ICIJ,项目名称:node-tika,代码行数:33,代码来源:NodeTika.java

示例13: getTestFile

import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
protected static InputStream getTestFile(String name, Metadata metadata) throws Exception {
    InputStream s = TestProjectParsers.class.getResourceAsStream("/test-files/" + name);
    assertNotNull("Test file not found: " + name, s);

    if (metadata != null) {
        metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, name);
    }
    return s;
}
 
开发者ID:Gagravarr,项目名称:MPXJ-Tika,代码行数:10,代码来源:TestProjectParsers.java

示例14: transformDOC2HTML

import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
private StringWithEncoding transformDOC2HTML(File resourceFile,
		EntityManager entityManager) throws IOException, SAXException,
		TikaException, TransformerConfigurationException {
	ParseContext context = new ParseContext();
	Parser parser = new AutoDetectParser();
	context.set(Parser.class, parser);
	Metadata metadata = new Metadata();
	Writer writer = null;
	if (resourceFile.isFile()) {
		metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, resourceFile
				.getName());
		InputStream input = new FileInputStream(resourceFile);
		try {
			writer = new StringWriter();
			parser.parse(input, this.HTML.getContentHandler(null, writer),
					metadata, context);
		} finally {
			input.close();
			if (writer != null) {
				writer.close();
			}
		}
		CharsetDetector charsetDetector = new CharsetDetector();
		charsetDetector.setText(writer.toString().getBytes());
		String encoding = charsetDetector.detect().getName();
		StringWithEncoding stringWithEncoding = new StringWithEncoding(
				writer.toString(), encoding);
		return stringWithEncoding;
	}
	return null;
}
 
开发者ID:lablita,项目名称:ridire-cpi,代码行数:32,代码来源:Mapper.java

示例15: extractInfo

import org.apache.tika.metadata.TikaMetadataKeys; //导入依赖的package包/类
public HashMap<String, String> extractInfo(File inputFile)
		throws FileNotFoundException, IOException {

	HashMap<String, String> rc = new HashMap<String, String>();

	// File inputFile = new File(fileName);
	BufferedInputStream bis = new BufferedInputStream(new FileInputStream(
			inputFile));
	Parser p = getAutoDetectParser();
	StringWriter writer = new StringWriter();
	Metadata metadata = new Metadata();
	metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, inputFile.getName()); // fileName

	if (!parse(p, bis, writer, metadata))
		return null;

	for (String name : metadata.names()) {
		rc.put(name, metadata.get(name));
		// System.out.println(name + "->" + metadata.get(name));
	}

	bis.close();

	return rc;
	// return writer.toString();

}
 
开发者ID:OpenNTF,项目名称:Tika4XPages,代码行数:28,代码来源:TikaBean.java


注:本文中的org.apache.tika.metadata.TikaMetadataKeys类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。