本文整理汇总了Java中org.cyberneko.html.HTMLConfiguration类的典型用法代码示例。如果您正苦于以下问题:Java HTMLConfiguration类的具体用法?Java HTMLConfiguration怎么用?Java HTMLConfiguration使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
HTMLConfiguration类属于org.cyberneko.html包,在下文中一共展示了HTMLConfiguration类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: parse
import org.cyberneko.html.HTMLConfiguration; //导入依赖的package包/类
@Override
public Document parse() throws SAXException, IOException
{
//temporay NekoHTML fix until nekohtml gets fixed
if (!neko_fixed)
{
HTMLElements.Element li = HTMLElements.getElement(HTMLElements.LI);
HTMLElements.Element[] oldparents = li.parent;
li.parent = new HTMLElements.Element[oldparents.length + 1];
for (int i = 0; i < oldparents.length; i++)
li.parent[i] = oldparents[i];
li.parent[oldparents.length] = HTMLElements.getElement(HTMLElements.MENU);
neko_fixed = true;
}
DOMParser parser = new DOMParser(new HTMLConfiguration());
parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
if (charset != null)
parser.setProperty("http://cyberneko.org/html/properties/default-encoding", charset);
parser.parse(new org.xml.sax.InputSource(getDocumentSource().getInputStream()));
return parser.getDocument();
}
示例2: parse
import org.cyberneko.html.HTMLConfiguration; //导入依赖的package包/类
public Document parse() throws SAXException, IOException
{
DOMParser parser = new DOMParser(new HTMLConfiguration());
parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
parser.setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
if (charset != null)
parser.setProperty("http://cyberneko.org/html/properties/default-encoding", charset);
//preparation for filters, not used now
/*XMLDocumentFilter attributeFilter = new DOMAttributeFilter();
XMLDocumentFilter[] filters = { attributeFilter };
parser.setProperty("http://cyberneko.org/html/properties/filters", filters);*/
parser.parse(new org.xml.sax.InputSource(is));
doc = parser.getDocument();
return doc;
}
示例3: initParser
import org.cyberneko.html.HTMLConfiguration; //导入依赖的package包/类
@Override
protected void initParser(Ruby runtime) {
XMLParserConfiguration config = new HTMLConfiguration();
XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler);
//XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter};
XMLDocumentFilter[] filters = { elementValidityCheckFilter};
config.setErrorHandler(this.errorHandler);
parser = new NokogiriDomParser(config);
// see http://nekohtml.sourceforge.net/settings.html for details
setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
setProperty("http://cyberneko.org/html/properties/filters", filters);
setFeature("http://cyberneko.org/html/features/report-errors", true);
setFeature("http://xml.org/sax/features/namespaces", false);
}
示例4: parseDomImpl
import org.cyberneko.html.HTMLConfiguration; //导入依赖的package包/类
@Override
protected Document parseDomImpl(String source) throws GadgetException {
DocumentHandler handler;
HTMLConfiguration config = newConfiguration();
try {
handler = parseHtmlImpl(source, config, new NormalizingTagBalancer());
} catch (IOException ioe) {
return null;
}
Document document = handler.getDocument();
DocumentFragment fragment = handler.getFragment();
normalizeFragment(document, fragment);
return document;
}
示例5: parseFragmentImpl
import org.cyberneko.html.HTMLConfiguration; //导入依赖的package包/类
@Override
protected DocumentFragment parseFragmentImpl(String source) throws GadgetException {
DocumentHandler handler;
HTMLConfiguration config = newConfiguration();
// http://cyberneko.org/html/features/balance-tags/document-fragment
// deprecated http://cyberneko.org/html/features/document-fragment
config.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
config.setProperty("http://cyberneko.org/html/properties/balance-tags/fragment-context-stack",
new QName[]{new QName(null, "HTML", "HTML", null), new QName(null, "BODY", "BODY", null)});
try {
handler = parseHtmlImpl(source, config, new NekoPatchTagBalancer());
} catch (IOException ioe) {
return null;
}
return handler.getFragment();
}
示例6: parse
import org.cyberneko.html.HTMLConfiguration; //导入依赖的package包/类
public Document parse() throws SAXException, IOException
{
DOMParser parser = new DOMParser(new HTMLConfiguration());
parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
if (charset != null)
parser.setProperty("http://cyberneko.org/html/properties/default-encoding", charset);
parser.parse(new org.xml.sax.InputSource(is));
doc = parser.getDocument();
return doc;
}
示例7: parse
import org.cyberneko.html.HTMLConfiguration; //导入依赖的package包/类
@Override
public Document parse() throws SAXException, IOException
{
DOMParser parser = new DOMParser(new HTMLConfiguration());
parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
if (charset != null)
parser.setProperty("http://cyberneko.org/html/properties/default-encoding", charset);
parser.parse(new org.xml.sax.InputSource(getDocumentSource().getInputStream()));
return parser.getDocument();
}
示例8: DOMFragmentParser
import org.cyberneko.html.HTMLConfiguration; //导入依赖的package包/类
/** Default constructor. */
public DOMFragmentParser() {
fParserConfiguration = new HTMLConfiguration();
fParserConfiguration.addRecognizedFeatures(RECOGNIZED_FEATURES);
fParserConfiguration.addRecognizedProperties(RECOGNIZED_PROPERTIES);
fParserConfiguration.setFeature(DOCUMENT_FRAGMENT, true);
fParserConfiguration.setDocumentHandler(this);
}
示例9: getDomHtmlNode
import org.cyberneko.html.HTMLConfiguration; //导入依赖的package包/类
private DomHtmlNode getDomHtmlNode(InputSource inputSource) throws SAXException, IOException {
DOMParser parser = new DOMParser(new HTMLConfiguration());
parser.setFeature("http://xml.org/sax/features/namespaces", true);
parser.setFeature("http://cyberneko.org/html/features/balance-tags/ignore-outside-content", false);
parser.setFeature("http://cyberneko.org/html/features/balance-tags", true);
parser.setFeature("http://cyberneko.org/html/features/report-errors", false);
parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
parser.setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
parser.parse(inputSource);
return new DomHtmlNode(parser.getDocument());
}
示例10: parse
import org.cyberneko.html.HTMLConfiguration; //导入依赖的package包/类
public Document parse() throws SAXException, IOException {
DOMParser parser = new DOMParser(new HTMLConfiguration());
parser.setProperty("http://cyberneko.org/html/properties/names/elems",
"lower");
if (charset != null)
parser.setProperty(
"http://cyberneko.org/html/properties/default-encoding",
charset);
parser.parse(new org.xml.sax.InputSource(getDocumentSource()
.getInputStream()));
return parser.getDocument();
}
示例11: parseHtmlImpl
import org.cyberneko.html.HTMLConfiguration; //导入依赖的package包/类
/**
* Parse HTML source.
*
* @return a document handler containing the parsed source
*/
private DocumentHandler parseHtmlImpl(String source, HTMLConfiguration config,
NormalizingTagBalancer tagBalancer)
throws IOException {
HTMLScanner htmlScanner = new HTMLScanner();
tagBalancer.setScanner(htmlScanner);
DocumentHandler handler = newDocumentHandler(source);
NamespaceBinder namespaceBinder = new NamespaceBinder();
namespaceBinder.setDocumentHandler(handler);
namespaceBinder.setDocumentSource(tagBalancer);
namespaceBinder.reset(config);
tagBalancer.setDocumentHandler(namespaceBinder);
// Order of filter is Scanner -> OSMLFilter -> Tag Balancer
tagBalancer.setDocumentSource(htmlScanner);
htmlScanner.setDocumentHandler(tagBalancer);
tagBalancer.reset(config);
htmlScanner.reset(config);
XMLInputSource inputSource = new XMLInputSource(null, null, null);
inputSource.setEncoding("UTF-8");
inputSource.setCharacterStream(new StringReader(source));
htmlScanner.setInputSource(inputSource);
htmlScanner.scanDocument(true);
return handler;
}
示例12: newConfiguration
import org.cyberneko.html.HTMLConfiguration; //导入依赖的package包/类
protected HTMLConfiguration newConfiguration() {
HTMLConfiguration config = new HTMLConfiguration();
// Maintain original case for elements and attributes
config.setProperty("http://cyberneko.org/html/properties/names/elems", "match");
config.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change");
// Get notified of entity and character references
config.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true);
config.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs", true);
config.setFeature("http://xml.org/sax/features/namespaces", true);
return config;
}
示例13: Implementation
import org.cyberneko.html.HTMLConfiguration; //导入依赖的package包/类
Implementation() {
super(new HTMLConfiguration());
setContentHandler(this);
}
示例14: BoilerpipeHTMLParser
import org.cyberneko.html.HTMLConfiguration; //导入依赖的package包/类
protected BoilerpipeHTMLParser(boolean ignore) {
super(new HTMLConfiguration());
}
示例15: setTarget
import org.cyberneko.html.HTMLConfiguration; //导入依赖的package包/类
@Override
public void setTarget(Object target) {
if(target == null)
throw new NullPointerException("received a null target");
if(!(target instanceof Controller))
throw new IllegalArgumentException("not a controller");
Controller controller = (Controller)target;
if(!controller.getFeatures().containsKey("gate.app.MetadataURL"))
throw new IllegalArgumentException("no gate.app.MetadataURL feature");
try {
URL metadata = (URL)controller.getFeatures().get("gate.app.MetadataURL");
URL longDesc = new URL(metadata, "long-desc.html");
URL iconDesc = new URL(metadata, "icon.png");
Document document = builder.parse(metadata.openStream());
Node text =
document.getDocumentElement().getElementsByTagName("pipeline-name")
.item(0).getFirstChild();
Font font =
Gate.getUserConfig().getFont(GateConstants.TEXT_COMPONENTS_FONT);
StringBuilder page = new StringBuilder();
page.append("<!DOCTYPE html>");
page.append("<html>");
page.append("<head>");
page.append("<style type='text/css'>body { font-family: ")
.append(font.getFamily()).append("; font-size: ")
.append(font.getSize()).append("pt }</style>");
page.append("</head>");
page.append("<body>");
page.append("<h1><img style='vertical-align: middle;' src='")
.append(StringEscapeUtils.escapeHtml(iconDesc.toString())).append("'/> ")
.append(StringEscapeUtils.escapeHtml(text.getTextContent())).append("</h1>");
page.append(IOUtils.toString(longDesc, "UTF-8"));
page.append("</body></html>");
// parse using NekoHTML
HTMLConfiguration config = new HTMLConfiguration();
// Force element names to lower case to match XHTML requirements
// as that is what Flying Saucer expects
config.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
DOMParser htmlParser = new DOMParser(config);
htmlParser.parse(new InputSource(new StringReader(page.toString())));
display.setDocument(htmlParser.getDocument(),
longDesc.toString());
} catch(Exception e) {
throw new IllegalArgumentException(e);
}
}