本文整理汇总了Java中org.apache.tika.io.CloseShieldInputStream类的典型用法代码示例。如果您正苦于以下问题:Java CloseShieldInputStream类的具体用法?Java CloseShieldInputStream怎么用?Java CloseShieldInputStream使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
CloseShieldInputStream类属于org.apache.tika.io包,在下文中一共展示了CloseShieldInputStream类的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: parse
import org.apache.tika.io.CloseShieldInputStream; //导入依赖的package包/类
@Override
public void parse(InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context) throws IOException,
SAXException, TikaException {
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
turtleParser.setRDFHandler(new RDFXMLWriter(outputStream));
try {
turtleParser.parse(stream, metadata.get(Metadata.RESOURCE_NAME_KEY));
} catch (Exception e) {
throw new SAXException(e.getMessage());
}
ByteArrayInputStream os = new ByteArrayInputStream(outputStream.toByteArray());
try {
context.getSAXParser().parse(
new CloseShieldInputStream(os),
new OfflineContentHandler(handler));
} finally {
os.close();
}
}
示例2: parse
import org.apache.tika.io.CloseShieldInputStream; //导入依赖的package包/类
@Override
public final void parse(InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context) throws IOException,
SAXException, TikaException {
if (metadata.get(Metadata.CONTENT_TYPE) == null) {
throw new TikaException("No content type set");
}
// TaggedContentHandler tagged = new TaggedContentHandler(handler);
ContentHandler wrappedHandler = getContentHandler(handler, metadata, context);
if (wrappedHandler == null) {
throw new TikaException("Parsing aborted, unable to init Tika handler");
}
try {
context.getSAXParser().parse(new CloseShieldInputStream(stream),
new OfflineContentHandler(wrappedHandler));
} catch (Exception e) {
// tagged.throwIfCauseOf(e);
throw new TikaException("XML parse error", e);
}
}
示例3: parse
import org.apache.tika.io.CloseShieldInputStream; //导入依赖的package包/类
public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
ParseContext context) throws IOException, SAXException, TikaException {
if (metadata.get(Metadata.CONTENT_TYPE) == null) {
metadata.set(Metadata.CONTENT_TYPE, "application/xml");
}
final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
xhtml.startElement("p");
TaggedContentHandler tagged = new TaggedContentHandler(handler);
try {
context.getSAXParser().parse(
new CloseShieldInputStream(stream),
new OfflineContentHandler(new EmbeddedContentHandler(getContentHandler(tagged, metadata,
context))));
} catch (SAXException e) {
tagged.throwIfCauseOf(e);
LOG.info("XML parse error", e);
// TODO:
// throw new TikaException("XML parse error", e);
}
xhtml.endElement("p");
xhtml.endDocument();
}
示例4: parse
import org.apache.tika.io.CloseShieldInputStream; //导入依赖的package包/类
public void parse(InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context) throws IOException,
SAXException, TikaException {
// Automatically detect the character encoding
AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(
stream), metadata, context.get(ServiceLoader.class, LOADER));
try {
Charset charset = reader.getCharset();
MediaType type = new MediaType(MediaType.TEXT_PLAIN, charset);
metadata.set(Metadata.CONTENT_TYPE, type.toString());
// deprecated, see TIKA-431
metadata.set(Metadata.CONTENT_ENCODING, charset.name());
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
xhtml.startElement("p");
char[] buffer = new char[4096];
int n = reader.read(buffer);
while (n != -1) {
xhtml.characters(buffer, 0, n);
n = reader.read(buffer);
}
xhtml.endElement("p");
xhtml.endDocument();
} finally {
reader.close();
}
}
示例5: parse
import org.apache.tika.io.CloseShieldInputStream; //导入依赖的package包/类
public void parse(InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context) throws IOException,
SAXException, TikaException {
// Automatically detect the character encoding
AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(
stream), metadata, context.get(ServiceLoader.class, LOADER));
try {
Charset charset = reader.getCharset();
// charset = Charset.forName("utf-8");
String previous = metadata.get(Metadata.CONTENT_TYPE);
if (previous == null || previous.startsWith("text/html")) {
MediaType type = new MediaType(MediaType.TEXT_HTML, charset);
metadata.set(Metadata.CONTENT_TYPE, type.toString());
}
// deprecated, see TIKA-431
metadata.set(Metadata.CONTENT_ENCODING, charset.name());
// Get the HTML mapper from the parse context
HtmlMapper mapper = context.get(HtmlMapper.class, new HtmlParserMapper());
// Parse the HTML document
org.ccil.cowan.tagsoup.Parser parser = new org.ccil.cowan.tagsoup.Parser();
// Use schema from context or default
Schema schema = context.get(Schema.class, HTML_SCHEMA);
// TIKA-528: Reuse share schema to avoid heavy instantiation
parser.setProperty(org.ccil.cowan.tagsoup.Parser.schemaProperty, schema);
// TIKA-599: Shared schema is thread-safe only if bogons are ignored
parser
.setFeature(org.ccil.cowan.tagsoup.Parser.ignoreBogonsFeature, true);
parser.setContentHandler(new XHTMLDowngradeHandler(new HtmlHandler(
mapper, handler, metadata)));
parser.parse(reader.asInputSource());
} finally {
reader.close();
}
}
示例6: parse
import org.apache.tika.io.CloseShieldInputStream; //导入依赖的package包/类
public void parse(InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
//Only outputting the MIME type as metadata
metadata.set(Metadata.CONTENT_TYPE, ENVI_MIME_TYPE);
// The following code was taken from the TXTParser
// Automatically detect the character encoding
AutoDetectReader reader =
new AutoDetectReader(new CloseShieldInputStream(stream), metadata);
try {
Charset charset = reader.getCharset();
MediaType type = new MediaType(MediaType.TEXT_PLAIN, charset);
// deprecated, see TIKA-431
metadata.set(Metadata.CONTENT_ENCODING, charset.name());
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
//text contents of the xhtml
xhtml.startElement("p");
char[] buffer = new char[4096];
int n = reader.read(buffer);
while (n != -1) {
xhtml.characters(buffer, 0, n);
n = reader.read(buffer);
}
xhtml.endElement("p");
xhtml.endDocument();
}
finally{
reader.close();
}
}
示例7: parseEmbedded
import org.apache.tika.io.CloseShieldInputStream; //导入依赖的package包/类
public boolean parseEmbedded(InputStream stream, Record record, String name, Command child) {
// Use the delegate parser to parse this entry
TemporaryResources tmp = new TemporaryResources();
try {
final TikaInputStream newStream = TikaInputStream.get(new CloseShieldInputStream(stream), tmp);
if (stream instanceof TikaInputStream) {
final Object container = ((TikaInputStream) stream).getOpenContainer();
if (container != null) {
newStream.setOpenContainer(container);
}
}
record = record.copy();
record.replaceValues(Fields.ATTACHMENT_BODY, newStream);
record.removeAll(Fields.ATTACHMENT_MIME_TYPE);
record.removeAll(Fields.ATTACHMENT_CHARSET);
record.removeAll(Fields.ATTACHMENT_NAME);
if (name != null && name.length() > 0) {
record.put(Fields.ATTACHMENT_NAME, name);
}
return child.process(record);
// } catch (RuntimeException e) {
//
// // THIS IS THE DIFF WRT ParsingEmbeddedDocumentExtractor
// throw new MorphlineRuntimeException(e);
//
// // TODO: can we log a warning somehow?
// // Could not parse the entry, just skip the content
} finally {
Closeables.closeQuietly(tmp);
}
}