当前位置: 首页>>代码示例>>Java>>正文


Java TeeContentHandler类代码示例

本文整理汇总了Java中org.apache.tika.sax.TeeContentHandler的典型用法代码示例。如果您正苦于以下问题:Java TeeContentHandler类的具体用法?Java TeeContentHandler怎么用?Java TeeContentHandler使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


TeeContentHandler类属于org.apache.tika.sax包,在下文中一共展示了TeeContentHandler类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getContentHandler

import org.apache.tika.sax.TeeContentHandler; //导入依赖的package包/类
protected ContentHandler getContentHandler(
        ContentHandler handler, Metadata metadata, ParseContext context) {
    return new TeeContentHandler(
            super.getContentHandler(handler, metadata, context),
            getDublinCoreHandler(metadata, TikaCoreProperties.TITLE, "title"),
            getDublinCoreHandler(metadata, TikaCoreProperties.KEYWORDS, "subject"),
            getDublinCoreHandler(metadata, TikaCoreProperties.CREATOR, "creator"),
            getDublinCoreHandler(metadata, TikaCoreProperties.DESCRIPTION, "description"),
            getDublinCoreHandler(metadata, TikaCoreProperties.PUBLISHER, "publisher"),
            getDublinCoreHandler(metadata, TikaCoreProperties.CONTRIBUTOR, "contributor"),
            getDublinCoreHandler(metadata, TikaCoreProperties.CREATED, "date"),
            getDublinCoreHandler(metadata, TikaCoreProperties.TYPE, "type"),
            getDublinCoreHandler(metadata, TikaCoreProperties.FORMAT, "format"),
            getDublinCoreHandler(metadata, TikaCoreProperties.IDENTIFIER, "identifier"),
            getDublinCoreHandler(metadata, TikaCoreProperties.LANGUAGE, "language"),
            getDublinCoreHandler(metadata, TikaCoreProperties.RIGHTS, "rights"));
}
 
开发者ID:kolbasa,项目名称:OCRaptor,代码行数:18,代码来源:DcXMLParser.java

示例2: getImgInfo

import org.apache.tika.sax.TeeContentHandler; //导入依赖的package包/类
public Metadata getImgInfo(String urlStr) throws Exception {
    if (StringUtils.isBlank(urlStr)) return null;
    URL url = new URL(urlStr);
    Metadata md = new Metadata();
    StringWriter htmlBuffer = new StringWriter();
    Parser parser = new  AutoDetectParser();
    imageParser= new ImageSavingParser(parser);
    TikaInputStream stream = TikaInputStream.get(url, md);
    try {
        ContentHandler handler = new TeeContentHandler(getHtmlHandler(htmlBuffer));
        parser.parse(stream, handler, md, new ParseContext());
    } finally {
        stream.close();
    }
    return md;
}
 
开发者ID:zhishan332,项目名称:hunt4j,代码行数:17,代码来源:TikaHelper.java

示例3: htmlParserShouldReturnMeta

import org.apache.tika.sax.TeeContentHandler; //导入依赖的package包/类
@Test
public void htmlParserShouldReturnMeta() throws Exception {
	
	try {
           new HtmlParser().parse(
                   stream, new TeeContentHandler(body, link),
                   metadata, new ParseContext());
       } finally {
           stream.close();
       }
	
	assertEquals("Tika Developers", metadata.get("Author"));
       assertEquals("5", metadata.get("refresh"));
       assertEquals("51.2312", metadata.get(Geographic.LATITUDE));
       assertEquals("-5.1987", metadata.get(Geographic.LONGITUDE));
}
 
开发者ID:kanrourou,项目名称:software-testing,代码行数:17,代码来源:HtmlParserBDDTest1.java

示例4: getMeta

import org.apache.tika.sax.TeeContentHandler; //导入依赖的package包/类
private static ContentHandler getMeta(
        ContentHandler ch, Metadata md, Property property, String element) {
    Matcher matcher = new CompositeMatcher(
            META_XPATH.parse("//meta:" + element),
            META_XPATH.parse("//meta:" + element + "//text()"));
    ContentHandler branch =
        new MatchingContentHandler(new MetadataHandler(md, property), matcher);
    return new TeeContentHandler(ch, branch);
}
 
开发者ID:kolbasa,项目名称:OCRaptor,代码行数:10,代码来源:OpenDocumentMetaParser.java

示例5: getUserDefined

import org.apache.tika.sax.TeeContentHandler; //导入依赖的package包/类
private static ContentHandler getUserDefined(
        ContentHandler ch, Metadata md) {
    Matcher matcher = new CompositeMatcher(
            META_XPATH.parse("//meta:user-defined/@meta:name"),
            META_XPATH.parse("//meta:user-defined//text()"));
    // eg <meta:user-defined meta:name="Info1">Text1</meta:user-defined> becomes custom:Info1=Text1
    ContentHandler branch = new MatchingContentHandler(
          new AttributeDependantMetadataHandler(md, "meta:name", Metadata.USER_DEFINED_METADATA_NAME_PREFIX),
          matcher);
    return new TeeContentHandler(ch, branch);
}
 
开发者ID:kolbasa,项目名称:OCRaptor,代码行数:12,代码来源:OpenDocumentMetaParser.java

示例6: getStatistic

import org.apache.tika.sax.TeeContentHandler; //导入依赖的package包/类
@Deprecated private static ContentHandler getStatistic(
        ContentHandler ch, Metadata md, String name, String attribute) {
    Matcher matcher =
        META_XPATH.parse("//meta:document-statistic/@meta:"+attribute);
    ContentHandler branch = new MatchingContentHandler(
          new AttributeMetadataHandler(META_NS, attribute, md, name), matcher);
    return new TeeContentHandler(ch, branch);
}
 
开发者ID:kolbasa,项目名称:OCRaptor,代码行数:9,代码来源:OpenDocumentMetaParser.java

示例7: getMetaInfo

import org.apache.tika.sax.TeeContentHandler; //导入依赖的package包/类
public Metadata getMetaInfo(String urlStr) throws Exception {
    if (StringUtils.isBlank(urlStr)) return null;
    URL url = new URL(urlStr);
    Metadata md = new Metadata();
    TikaInputStream stream = TikaInputStream.get(url, md);
    try {
        ContentHandler handler = new TeeContentHandler();
        Parser parser = new AutoDetectParser();
        parser.parse(stream, handler, md, new ParseContext());
    } finally {
        stream.close();
    }
    return md;
}
 
开发者ID:zhishan332,项目名称:hunt4j,代码行数:15,代码来源:TikaHelper.java

示例8: htmlParserShouldReturnTitle

import org.apache.tika.sax.TeeContentHandler; //导入依赖的package包/类
@Test
public void htmlParserShouldReturnTitle() throws Exception {
       
	try {
           new HtmlParser().parse(
                   stream, new TeeContentHandler(body, link),
                   metadata, new ParseContext());
       } finally {
           stream.close();
       }
       
	assertEquals("Title : Test Indexation Html", metadata.get(TikaCoreProperties.TITLE));
}
 
开发者ID:kanrourou,项目名称:software-testing,代码行数:14,代码来源:HtmlParserBDDTest1.java

示例9: htmlParserShouldReturnAnchor

import org.apache.tika.sax.TeeContentHandler; //导入依赖的package包/类
@Test
public void htmlParserShouldReturnAnchor() throws Exception {
	
	try {
           new HtmlParser().parse(
                   stream, new TeeContentHandler(body, link),
                   metadata, new ParseContext());
       } finally {
           stream.close();
       }
	
	assertEquals("http://www.apache.org/", href.toString());
	assertEquals("test-anchor", name.toString());
	
}
 
开发者ID:kanrourou,项目名称:software-testing,代码行数:16,代码来源:HtmlParserBDDTest1.java

示例10: htmlParserShouldReturnBody

import org.apache.tika.sax.TeeContentHandler; //导入依赖的package包/类
@Test
public void htmlParserShouldReturnBody() throws Exception {
	
	try {
           new HtmlParser().parse(
                   stream, new TeeContentHandler(body, link),
                   metadata, new ParseContext());
       } finally {
           stream.close();
       }
	
	String content = body.toString();
       assertTrue(content.contains("Test Indexation Html"));
       assertTrue(content.contains("Indexation du fichier"));
}
 
开发者ID:kanrourou,项目名称:software-testing,代码行数:16,代码来源:HtmlParserBDDTest1.java

示例11: parseToCache

import org.apache.tika.sax.TeeContentHandler; //导入依赖的package包/类
private void parseToCache(final TikaInputStream tis, final ContentHandler handler, final Metadata metadata,
                          final ParseContext context, final TesseractOCRConfig config, final boolean inline,
                          final Writer writer) throws SAXException, IOException, TikaException {
	final ContentHandler tee = new TeeContentHandler(handler, new WriteOutContentHandler(writer));

	if (inline) {
		super.parseInline(tis, new XHTMLContentHandler(tee, metadata), context, config);
	} else {
		super.parse(tis, tee, metadata, context);
	}
}
 
开发者ID:ICIJ,项目名称:extract,代码行数:12,代码来源:CachingTesseractOCRParser.java


注:本文中的org.apache.tika.sax.TeeContentHandler类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。