当前位置: 首页>>代码示例>>Java>>正文


Java CloseShieldInputStream类代码示例

本文整理汇总了Java中org.apache.tika.io.CloseShieldInputStream的典型用法代码示例。如果您正苦于以下问题:Java CloseShieldInputStream类的具体用法?Java CloseShieldInputStream怎么用?Java CloseShieldInputStream使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


CloseShieldInputStream类属于org.apache.tika.io包,在下文中一共展示了CloseShieldInputStream类的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: parse

import org.apache.tika.io.CloseShieldInputStream; //导入依赖的package包/类
@Override
public void parse(InputStream stream, ContentHandler handler,
		Metadata metadata, ParseContext context) throws IOException,
		SAXException, TikaException {

	ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
	turtleParser.setRDFHandler(new RDFXMLWriter(outputStream));
	
	try {
		turtleParser.parse(stream, metadata.get(Metadata.RESOURCE_NAME_KEY));
	} catch (Exception e) {
		throw new SAXException(e.getMessage());
	}

	ByteArrayInputStream os = new ByteArrayInputStream(outputStream.toByteArray());
	try {
		context.getSAXParser().parse(
               new CloseShieldInputStream(os),
               new OfflineContentHandler(handler));
	} finally {
		os.close();
	}
}
 
开发者ID:erfgoed-en-locatie,项目名称:artsholland-platform,代码行数:24,代码来源:TurtleParser.java

示例2: parse

import org.apache.tika.io.CloseShieldInputStream; //导入依赖的package包/类
@Override
	public final void parse(InputStream stream, ContentHandler handler,
			Metadata metadata, ParseContext context) throws IOException,
			SAXException, TikaException {
		if (metadata.get(Metadata.CONTENT_TYPE) == null) {
			throw new TikaException("No content type set");
		}

//		TaggedContentHandler tagged = new TaggedContentHandler(handler);
		ContentHandler wrappedHandler = getContentHandler(handler, metadata, context);
		
		if (wrappedHandler == null) {
			throw new TikaException("Parsing aborted, unable to init Tika handler");
		}

		try {
			context.getSAXParser().parse(new CloseShieldInputStream(stream),
					new OfflineContentHandler(wrappedHandler));
		} catch (Exception e) {
//			tagged.throwIfCauseOf(e);
			throw new TikaException("XML parse error", e);
		}
	}
 
开发者ID:erfgoed-en-locatie,项目名称:artsholland-platform,代码行数:24,代码来源:AbstractParser.java

示例3: parse

import org.apache.tika.io.CloseShieldInputStream; //导入依赖的package包/类
public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
    ParseContext context) throws IOException, SAXException, TikaException {
  if (metadata.get(Metadata.CONTENT_TYPE) == null) {
    metadata.set(Metadata.CONTENT_TYPE, "application/xml");
  }

  final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
  xhtml.startDocument();
  xhtml.startElement("p");

  TaggedContentHandler tagged = new TaggedContentHandler(handler);
  try {
    context.getSAXParser().parse(
        new CloseShieldInputStream(stream),
        new OfflineContentHandler(new EmbeddedContentHandler(getContentHandler(tagged, metadata,
            context))));
  } catch (SAXException e) {
    tagged.throwIfCauseOf(e);
    LOG.info("XML parse error", e);
    // TODO:
    // throw new TikaException("XML parse error", e);
  }

  xhtml.endElement("p");
  xhtml.endDocument();
}
 
开发者ID:kolbasa,项目名称:OCRaptor,代码行数:27,代码来源:XMLParser.java

示例4: parse

import org.apache.tika.io.CloseShieldInputStream; //导入依赖的package包/类
public void parse(InputStream stream, ContentHandler handler,
    Metadata metadata, ParseContext context) throws IOException,
    SAXException, TikaException {
  // Automatically detect the character encoding
  AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(
      stream), metadata, context.get(ServiceLoader.class, LOADER));
  try {
    Charset charset = reader.getCharset();
    MediaType type = new MediaType(MediaType.TEXT_PLAIN, charset);
    metadata.set(Metadata.CONTENT_TYPE, type.toString());
    // deprecated, see TIKA-431
    metadata.set(Metadata.CONTENT_ENCODING, charset.name());

    XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
    xhtml.startDocument();

    xhtml.startElement("p");
    char[] buffer = new char[4096];
    int n = reader.read(buffer);
    while (n != -1) {
      xhtml.characters(buffer, 0, n);
      n = reader.read(buffer);
    }
    xhtml.endElement("p");

    xhtml.endDocument();
  } finally {
    reader.close();
  }
}
 
开发者ID:kolbasa,项目名称:OCRaptor,代码行数:31,代码来源:TXTParser.java

示例5: parse

import org.apache.tika.io.CloseShieldInputStream; //导入依赖的package包/类
public void parse(InputStream stream, ContentHandler handler,
    Metadata metadata, ParseContext context) throws IOException,
    SAXException, TikaException {
  // Automatically detect the character encoding
  AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(
      stream), metadata, context.get(ServiceLoader.class, LOADER));
  try {
    Charset charset = reader.getCharset();
    // charset = Charset.forName("utf-8");
    String previous = metadata.get(Metadata.CONTENT_TYPE);
    if (previous == null || previous.startsWith("text/html")) {
      MediaType type = new MediaType(MediaType.TEXT_HTML, charset);
      metadata.set(Metadata.CONTENT_TYPE, type.toString());
    }
    // deprecated, see TIKA-431
    metadata.set(Metadata.CONTENT_ENCODING, charset.name());

    // Get the HTML mapper from the parse context
    HtmlMapper mapper = context.get(HtmlMapper.class, new HtmlParserMapper());

    // Parse the HTML document
    org.ccil.cowan.tagsoup.Parser parser = new org.ccil.cowan.tagsoup.Parser();

    // Use schema from context or default
    Schema schema = context.get(Schema.class, HTML_SCHEMA);

    // TIKA-528: Reuse share schema to avoid heavy instantiation
    parser.setProperty(org.ccil.cowan.tagsoup.Parser.schemaProperty, schema);
    // TIKA-599: Shared schema is thread-safe only if bogons are ignored
    parser
        .setFeature(org.ccil.cowan.tagsoup.Parser.ignoreBogonsFeature, true);

    parser.setContentHandler(new XHTMLDowngradeHandler(new HtmlHandler(
        mapper, handler, metadata)));

    parser.parse(reader.asInputSource());
  } finally {
    reader.close();
  }
}
 
开发者ID:kolbasa,项目名称:OCRaptor,代码行数:41,代码来源:HtmlParser.java

示例6: parse

import org.apache.tika.io.CloseShieldInputStream; //导入依赖的package包/类
public void parse(InputStream stream, ContentHandler handler, 
        Metadata metadata, ParseContext context)
                throws IOException, SAXException, TikaException {

    //Only outputting the MIME type as metadata
    metadata.set(Metadata.CONTENT_TYPE, ENVI_MIME_TYPE);

    // The following code was taken from the TXTParser
    // Automatically detect the character encoding
    AutoDetectReader reader = 
            new AutoDetectReader(new CloseShieldInputStream(stream), metadata);

    try {
        Charset charset = reader.getCharset();
        MediaType type = new MediaType(MediaType.TEXT_PLAIN, charset);
        // deprecated, see TIKA-431
        metadata.set(Metadata.CONTENT_ENCODING, charset.name());

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);

        xhtml.startDocument();

        //text contents of the xhtml
        xhtml.startElement("p");
        char[] buffer = new char[4096];
        int n = reader.read(buffer);
        while (n != -1) {
            xhtml.characters(buffer, 0, n);
            n = reader.read(buffer);
        }
        xhtml.endElement("p");

        xhtml.endDocument();
    } 		
    finally{
        reader.close();
    }

}
 
开发者ID:abburgess,项目名称:ENVIJava,代码行数:40,代码来源:EnviHeaderParser.java

示例7: parseEmbedded

import org.apache.tika.io.CloseShieldInputStream; //导入依赖的package包/类
public boolean parseEmbedded(InputStream stream, Record record, String name, Command child) {
    // Use the delegate parser to parse this entry
    
    TemporaryResources tmp = new TemporaryResources();
    try {
      final TikaInputStream newStream = TikaInputStream.get(new CloseShieldInputStream(stream), tmp);
      if (stream instanceof TikaInputStream) {
        final Object container = ((TikaInputStream) stream).getOpenContainer();
        if (container != null) {
          newStream.setOpenContainer(container);
        }
      }
      record = record.copy();

      record.replaceValues(Fields.ATTACHMENT_BODY, newStream);
      record.removeAll(Fields.ATTACHMENT_MIME_TYPE);
      record.removeAll(Fields.ATTACHMENT_CHARSET);
      
      record.removeAll(Fields.ATTACHMENT_NAME);
      if (name != null && name.length() > 0) {
        record.put(Fields.ATTACHMENT_NAME, name);
      }
      
      return child.process(record);
//    } catch (RuntimeException e) {
//      
//      // THIS IS THE DIFF WRT ParsingEmbeddedDocumentExtractor
//      throw new MorphlineRuntimeException(e);
//      
//        // TODO: can we log a warning somehow?
//        // Could not parse the entry, just skip the content
    } finally {
      Closeables.closeQuietly(tmp);
    }

  }
 
开发者ID:cloudera,项目名称:cdk,代码行数:37,代码来源:EmbeddedExtractor.java


注:本文中的org.apache.tika.io.CloseShieldInputStream类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。