本文整理汇总了Java中org.apache.tika.sax.XHTMLContentHandler.characters方法的典型用法代码示例。如果您正苦于以下问题:Java XHTMLContentHandler.characters方法的具体用法?Java XHTMLContentHandler.characters怎么用?Java XHTMLContentHandler.characters使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.tika.sax.XHTMLContentHandler
的用法示例。
在下文中一共展示了XHTMLContentHandler.characters方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: extractOutput
import org.apache.tika.sax.XHTMLContentHandler; //导入方法依赖的package包/类
/**
* Reads the contents of the given stream and write it to the given XHTML
* content handler. The stream is closed once fully processed.
*
* @param stream Stream where is the result of ocr
* @param xhtml XHTML content handler
* @throws SAXException if the XHTML SAX events could not be handled
* @throws IOException if an input error occurred
*/
private void extractOutput(InputStream stream, XHTMLContentHandler xhtml) throws SAXException, IOException {
xhtml.startDocument();
xhtml.startElement("div");
try (Reader reader = new InputStreamReader(stream, UTF_8)) {
char[] buffer = new char[1024];
for (int n = reader.read(buffer); n != -1; n = reader.read(buffer)) {
if (n > 0) {
xhtml.characters(buffer, 0, n);
}
}
}
xhtml.endElement("div");
xhtml.endDocument();
}
示例2: textRunsToText
import org.apache.tika.sax.XHTMLContentHandler; //导入方法依赖的package包/类
private void textRunsToText(XHTMLContentHandler xhtml, TextRun[] runs)
throws SAXException {
if (runs == null) {
return;
}
for (TextRun run : runs) {
if (run != null) {
// Leaving in wisdom from TIKA-712 for easy revert.
// Avoid boiler-plate text on the master slide (0
// = TextHeaderAtom.TITLE_TYPE, 1 = TextHeaderAtom.BODY_TYPE):
// if (!isMaster || (run.getRunType() != 0 && run.getRunType()
// != 1)) {
String txt = run.getText();
if (txt != null) {
xhtml.characters(txt);
xhtml.startElement("br");
xhtml.endElement("br");
}
}
}
}
示例3: parse
import org.apache.tika.sax.XHTMLContentHandler; //导入方法依赖的package包/类
@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
ParseContext context) throws IOException, SAXException, TikaException {
ChmExtractor chmExtractor = new ChmExtractor(stream);
// metadata
metadata.set(Metadata.CONTENT_TYPE, "application/vnd.ms-htmlhelp");
// content
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
for (DirectoryListingEntry entry : chmExtractor.getChmDirList().getDirectoryListingEntryList()) {
if (entry.getName().endsWith(".html") || entry.getName().endsWith(".htm")) {
xhtml.characters(extract(chmExtractor.extractChmEntry(entry)));
}
}
xhtml.endDocument();
}
示例4: extractOutput
import org.apache.tika.sax.XHTMLContentHandler; //导入方法依赖的package包/类
/**
* Starts a thread that extracts the contents of the standard output
* stream of the given process to the given XHTML content handler.
* The standard output stream is closed once fully processed.
*
* @param process process
* @param xhtml XHTML content handler
* @throws SAXException if the XHTML SAX events could not be handled
* @throws IOException if an input error occurred
*/
private void extractOutput(InputStream stream, XHTMLContentHandler xhtml)
throws SAXException, IOException {
Reader reader = new InputStreamReader(stream);
try {
xhtml.startDocument();
xhtml.startElement("p");
char[] buffer = new char[1024];
for (int n = reader.read(buffer); n != -1; n = reader.read(buffer)) {
xhtml.characters(buffer, 0, n);
}
xhtml.endElement("p");
xhtml.endDocument();
} finally {
reader.close();
}
}
示例5: handleDates
import org.apache.tika.sax.XHTMLContentHandler; //导入方法依赖的package包/类
/**
* Render a date range
*/
protected static void handleDates(String what, Date start, Date finish,
Duration duration, XHTMLContentHandler xhtml) throws SAXException {
if (start == null && finish == null) {
// Assume there's nothing there, and skip
return;
}
String cls = what.toLowerCase() + "Dates";
xhtml.startElement("div", "class", "fromTo " + cls);
xhtml.characters(what);
xhtml.characters(" from ");
xhtml.characters(buildDate(start));
xhtml.characters(" to ");
xhtml.characters(buildDate(finish));
if (duration != null) {
xhtml.characters(" taking ");
xhtml.characters(buildDuration(duration));
}
xhtml.endElement("div");
}
示例6: readFully
import org.apache.tika.sax.XHTMLContentHandler; //导入方法依赖的package包/类
private void readFully(final Reader reader, final XHTMLContentHandler xhtml) throws IOException, SAXException {
final char[] buffer = new char[1024];
for (int n = reader.read(buffer); n != -1; n = reader.read(buffer)) {
if (n > 0) {
xhtml.characters(buffer, 0, n);
}
}
}
示例7: extractSDT
import org.apache.tika.sax.XHTMLContentHandler; //导入方法依赖的package包/类
private void extractSDT(XWPFSDT element, XHTMLContentHandler xhtml)
throws SAXException, XmlException, IOException {
XWPFSDTContent content = element.getContent();
String tag = "p";
xhtml.startElement(tag);
xhtml.characters(content.getText());
xhtml.endElement(tag);
}
示例8: parse
import org.apache.tika.sax.XHTMLContentHandler; //导入方法依赖的package包/类
public void parse(InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context) throws IOException,
SAXException, TikaException {
// Automatically detect the character encoding
AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(
stream), metadata, context.get(ServiceLoader.class, LOADER));
try {
Charset charset = reader.getCharset();
MediaType type = new MediaType(MediaType.TEXT_PLAIN, charset);
metadata.set(Metadata.CONTENT_TYPE, type.toString());
// deprecated, see TIKA-431
metadata.set(Metadata.CONTENT_ENCODING, charset.name());
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
xhtml.startElement("p");
char[] buffer = new char[4096];
int n = reader.read(buffer);
while (n != -1) {
xhtml.characters(buffer, 0, n);
n = reader.read(buffer);
}
xhtml.endElement("p");
xhtml.endDocument();
} finally {
reader.close();
}
}
示例9: addFieldString
import org.apache.tika.sax.XHTMLContentHandler; //导入方法依赖的package包/类
private void addFieldString(PDField field, XHTMLContentHandler handler)
throws SAXException {
// Pick partial name to present in content and altName for attribute
// Ignoring FullyQualifiedName for now
String partName = field.getPartialName();
String altName = field.getAlternateFieldName();
StringBuilder sb = new StringBuilder();
AttributesImpl attrs = new AttributesImpl();
if (partName != null) {
sb.append(partName).append(": ");
}
if (altName != null) {
attrs.addAttribute("", "altName", "altName", "CDATA", altName);
}
// return early if PDSignature field
if (field instanceof PDSignatureField) {
handleSignature(attrs, (PDSignatureField) field, handler);
return;
}
try {
// getValue can throw an IOException if there is no value
String value = field.getValue();
if (value != null && !value.equals("null")) {
sb.append(value);
}
} catch (Exception e) {
// swallow
}
if (attrs.getLength() > 0 || sb.length() > 0) {
handler.startElement("li", attrs);
handler.characters(sb.toString());
handler.endElement("li");
}
}
示例10: parse
import org.apache.tika.sax.XHTMLContentHandler; //导入方法依赖的package包/类
public void parse(InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
//Only outputting the MIME type as metadata
metadata.set(Metadata.CONTENT_TYPE, ENVI_MIME_TYPE);
// The following code was taken from the TXTParser
// Automatically detect the character encoding
AutoDetectReader reader =
new AutoDetectReader(new CloseShieldInputStream(stream), metadata);
try {
Charset charset = reader.getCharset();
MediaType type = new MediaType(MediaType.TEXT_PLAIN, charset);
// deprecated, see TIKA-431
metadata.set(Metadata.CONTENT_ENCODING, charset.name());
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
//text contents of the xhtml
xhtml.startElement("p");
char[] buffer = new char[4096];
int n = reader.read(buffer);
while (n != -1) {
xhtml.characters(buffer, 0, n);
n = reader.read(buffer);
}
xhtml.endElement("p");
xhtml.endDocument();
}
finally{
reader.close();
}
}
示例11: addTextToHandler
import org.apache.tika.sax.XHTMLContentHandler; //导入方法依赖的package包/类
/**
*
*
* @param imageText
* @param page
* @param allPagesCount
* @throws SAXException
*/
public void addTextToHandler(XHTMLContentHandler handler, Integer page, Integer allPagesCount)
throws SAXException {
if (!this.imageText.isEmpty()) {
// handler.startElement("p", "class", "page_indicator");
// // TODO: text
// handler.characters("Images on page: " + page);
// handler.endElement("p");
boolean endImageContainer = false;
for (int i = 0; i < this.imageText.size(); i++) {
String text = this.imageText.get(i);
text = St.removeRareCharacters(text);
if (text.length() > 5) {
if (i == 0) {
handler.startElement("div", "class", IMAGE_CONTAINER_CLASS);
endImageContainer = true;
}
// TODO: pagination
if (page != null && allPagesCount != null) {
handler.startElement("span", "page", page + ":" + allPagesCount);
handler.characters(" ");
handler.endElement("span");
}
handler.characters(text);
// --- //
if (i >= 0 && i != this.imageText.size() - 1) {
handler.startElement("span", "class", "imageDivider");
handler.characters(" ");
handler.endElement("span");
}
}
}
if (endImageContainer) {
handler.characters(" ");
handler.endElement("div");
}
}
this.imageText.clear();
}
示例12: render
import org.apache.tika.sax.XHTMLContentHandler; //导入方法依赖的package包/类
public void render(XHTMLContentHandler handler) throws SAXException {
handler.characters(text);
}
示例13: processRun
import org.apache.tika.sax.XHTMLContentHandler; //导入方法依赖的package包/类
private TmpFormatting processRun(XWPFRun run, XWPFParagraph paragraph,
XHTMLContentHandler xhtml, TmpFormatting tfmtg) throws SAXException,
XmlException, IOException {
// True if we are currently in the named style tag:
if (run.isBold() != tfmtg.isBold()) {
if (tfmtg.isItalic()) {
xhtml.endElement("i");
tfmtg.setItalic(false);
}
if (run.isBold()) {
xhtml.startElement("b");
} else {
xhtml.endElement("b");
}
tfmtg.setBold(run.isBold());
}
if (run.isItalic() != tfmtg.isItalic()) {
if (run.isItalic()) {
xhtml.startElement("i");
} else {
xhtml.endElement("i");
}
tfmtg.setItalic(run.isItalic());
}
boolean addedHREF = false;
if (run instanceof XWPFHyperlinkRun) {
XWPFHyperlinkRun linkRun = (XWPFHyperlinkRun) run;
XWPFHyperlink link = linkRun.getHyperlink(document);
if (link != null && link.getURL() != null) {
xhtml.startElement("a", "href", link.getURL());
addedHREF = true;
} else if (linkRun.getAnchor() != null
&& linkRun.getAnchor().length() > 0) {
xhtml.startElement("a", "href", "#" + linkRun.getAnchor());
addedHREF = true;
}
}
xhtml.characters(run.toString());
// If we have any pictures, output them
for (XWPFPicture picture : run.getEmbeddedPictures()) {
if (paragraph.getDocument() != null) {
XWPFPictureData data = picture.getPictureData();
if (data != null) {
AttributesImpl attr = new AttributesImpl();
attr.addAttribute("", "src", "src", "CDATA",
"embedded:" + data.getFileName());
attr.addAttribute("", "alt", "alt", "CDATA", picture.getDescription());
xhtml.startElement("img", attr);
xhtml.endElement("img");
}
}
}
if (addedHREF) {
xhtml.endElement("a");
}
return tfmtg;
}
示例14: processSDTRun
import org.apache.tika.sax.XHTMLContentHandler; //导入方法依赖的package包/类
private void processSDTRun(XWPFSDT run, XHTMLContentHandler xhtml)
throws SAXException, XmlException, IOException {
xhtml.characters(run.getContent().getText());
}
示例15: render
import org.apache.tika.sax.XHTMLContentHandler; //导入方法依赖的package包/类
public void render(XHTMLContentHandler handler) throws SAXException {
handler.characters(format.format(number));
}