本文整理汇总了Java中org.ccil.cowan.tagsoup.Parser类的典型用法代码示例。如果您正苦于以下问题:Java Parser类的具体用法?Java Parser怎么用?Java Parser使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Parser类属于org.ccil.cowan.tagsoup包,在下文中一共展示了Parser类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: download
import org.ccil.cowan.tagsoup.Parser; //导入依赖的package包/类
public void download()
{
UnicodeReader reader = new UnicodeReader(stream, "UTF-8"); //$NON-NLS-1$
XMLReader r = new Parser();
InputSource s = new InputSource();
s.setCharacterStream(reader);
try
{
r.setContentHandler(this);
r.parse(s);
}
catch( Exception e )
{
throw new RuntimeException(e);
}
}
示例2: modifyXml
import org.ccil.cowan.tagsoup.Parser; //导入依赖的package包/类
@Override
public String modifyXml(Reader reader, HtmlContentHandler writer)
{
InputSource s = new InputSource();
s.setEncoding(Constants.UTF8);
s.setCharacterStream(reader);
try
{
XMLReader r = new Parser();
r.setContentHandler(writer);
r.parse(s);
return writer.getOutput();
}
catch( Exception e )
{
throw new RuntimeException(e);
}
}
示例3: changeHardcodedUrls
import org.ccil.cowan.tagsoup.Parser; //导入依赖的package包/类
private String changeHardcodedUrls(String html, final URL oldUrl, final URL newUrl)
{
final XMLReader p = new Parser();
final InputSource s = new InputSource();
final StringWriter w = new StringWriter();
final ItemConverterHrefCallback cb = new ItemConverterHrefCallback(oldUrl, newUrl);
final FindHrefHandler x = new FindHrefHandler(w, cb, true, true);
p.setContentHandler(x);
s.setCharacterStream(new StringReader(html));
try
{
p.parse(s);
if( cb.wasChanged() )
{
return w.toString();
}
return null;
}
catch( Exception e )
{
throw new RuntimeException(e);
}
}
示例4: fromHtml
import org.ccil.cowan.tagsoup.Parser; //导入依赖的package包/类
/**
* Returns displayable styled text from the provided HTML string. Any <img> tags in the
* HTML will use the specified ImageGetter to request a representation of the image (use null
* if you don't want this) and the specified TagHandler to handle unknown tags (specify null if
* you don't want this).
* <p>
* <p>This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
*/
public static Spanned fromHtml(@NonNull Context context, @NonNull String source, int flags,
@Nullable ImageGetter imageGetter, @Nullable TagHandler tagHandler,
@Nullable SpanCallback spanCallback) {
if (source == null) {
return null;
}
Parser parser = new Parser();
try {
parser.setProperty(Parser.schemaProperty, HtmlParser.schema);
} catch (org.xml.sax.SAXNotRecognizedException | org.xml.sax.SAXNotSupportedException e) {
// Should not happen.
throw new RuntimeException(e);
}
HtmlToSpannedConverter converter =
new HtmlToSpannedConverter(context, source, imageGetter, tagHandler, spanCallback, parser, flags);
return converter.convert();
}
示例5: parse
import org.ccil.cowan.tagsoup.Parser; //导入依赖的package包/类
public String parse(String html) {
Parser p = new Parser();
p.setContentHandler(this);
p.setErrorHandler(this);
try {
p.setFeature(Parser.defaultAttributesFeature, false); //or else some default attributes get added to <br>
p.setFeature(Parser.ignorableWhitespaceFeature, false);
p.setProperty(Parser.lexicalHandlerProperty, this);
InputSource inputSource = new InputSource(new StringReader(html));
p.parse(inputSource);
} catch (Exception e) {
// TODO Auto-generated catch block
}
return rebuiltHtml.toString();
}
示例6: fromHtml
import org.ccil.cowan.tagsoup.Parser; //导入依赖的package包/类
/**
* Returns displayable styled text from the provided HTML string.
* Any <img> tags in the HTML will use the specified ImageGetter
* to request a representation of the image (use null if you don't
* want this) and the specified TagHandler to handle unknown tags
* (specify null if you don't want this).
*
* <p>This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
*/
public static SpannableStringBuilder fromHtml(String source, ImageGetter imageGetter,
TagHandler tagHandler) {
Parser parser = new Parser();
try {
parser.setProperty(Parser.schemaProperty, HtmlParser.schema);
} catch (org.xml.sax.SAXNotRecognizedException | org.xml.sax.SAXNotSupportedException e) {
// Should not happen.
throw new RuntimeException(e);
}
HtmlToSpannedConverter converter =
new HtmlToSpannedConverter(source, imageGetter, tagHandler,
parser);
return converter.convert();
}
示例7: init
import org.ccil.cowan.tagsoup.Parser; //导入依赖的package包/类
private void init(Result result, Reader in, ContentType type, HeaderSet headers, boolean html) throws HttpClientException {
myContentType = type;
myHeaders = headers;
String sys_id = "TODO-find-a-useful-systemId";
try {
Source src;
if (html) {
Parser parser = new Parser();
parser.setFeature(Parser.namespacesFeature, true);
parser.setFeature(Parser.namespacePrefixesFeature, true);
InputSource input = new InputSource(in);
src = new SAXSource(parser, input);
src.setSystemId(sys_id);
} else {
src = new StreamSource(in, sys_id);
}
result.add(src);
} catch (SAXException ex) {
throw new HttpClientException("error parsing result HTML", ex);
}
}
示例8: HtmlToSpannedConverter
import org.ccil.cowan.tagsoup.Parser; //导入依赖的package包/类
public HtmlToSpannedConverter(String subject, String source, ThemeColors colors, HtmlParser.ImageGetter imageGetter, boolean openSpoilers,
Parser parser) {
mSource = source;
mSpannableStringBuilder = new SpannableStringBuilder();
if (!TextUtils.isEmpty(subject)) {
mSpannableStringBuilder.append(subject);
int len = mSpannableStringBuilder.length();
mSpannableStringBuilder.setSpan(new RelativeSizeSpan(1.25f), 0, len, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
mSpannableStringBuilder.setSpan(new StyleSpan(Typeface.BOLD), 0, len, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
if (colors != null) {
mSpannableStringBuilder.setSpan(new ForegroundColorSpan(colors.subjectForeground), 0, len, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
}
mSpannableStringBuilder.append('\n');
mStartLength = mSpannableStringBuilder.length();
}
mColors = colors;
mOpenSpoilers = openSpoilers;
mImageGetter = imageGetter;
mReader = parser;
}
示例9: loadDom
import org.ccil.cowan.tagsoup.Parser; //导入依赖的package包/类
public static Document loadDom(String url) {
Parser parser = new Parser();
try {
parser.setFeature(Parser.namespacesFeature, false);
parser.setFeature(Parser.namespacePrefixesFeature, false);
Reader reader = openReader(url);
DOMResult result = new DOMResult();
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.transform(new SAXSource(parser, new InputSource(reader)), result);
reader.close();
return (Document) result.getNode();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
示例10: ConvertHTML
import org.ccil.cowan.tagsoup.Parser; //导入依赖的package包/类
public ConvertHTML(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) throws SAXNotRecognizedException, SAXNotSupportedException {
super(builder, config, parent, child, context);
this.charset = getConfigs().getCharset(config, "charset", null);
this.omitXMLDeclaration = getConfigs().getBoolean(config, "omitXMLDeclaration", false);
this.xmlReader = new Parser(); // no reuse?
xmlReader.setProperty(Parser.schemaProperty, htmlSchema);
xmlReader.setFeature(Parser.CDATAElementsFeature, getConfigs().getBoolean(config, "noCDATA", false));
xmlReader.setFeature(Parser.namespacesFeature, !getConfigs().getBoolean(config, "noNamespaces", true));
xmlReader.setFeature(Parser.ignoreBogonsFeature, getConfigs().getBoolean(config, "noBogons", false)); // also see TIKA-599
xmlReader.setFeature(Parser.bogonsEmptyFeature, getConfigs().getBoolean(config, "emptyBogons", false));
xmlReader.setFeature(Parser.rootBogonsFeature, getConfigs().getBoolean(config, "noRootBogons", false));
xmlReader.setFeature(Parser.defaultAttributesFeature, getConfigs().getBoolean(config, "noDefaultAttributes", false));
xmlReader.setFeature(Parser.translateColonsFeature, getConfigs().getBoolean(config, "noColons", false));
xmlReader.setFeature(Parser.restartElementsFeature, getConfigs().getBoolean(config, "noRestart", false));
xmlReader.setFeature(Parser.ignorableWhitespaceFeature, !getConfigs().getBoolean(config, "suppressIgnorableWhitespace", true));
validateArguments();
}
示例11: evaluateXpath
import org.ccil.cowan.tagsoup.Parser; //导入依赖的package包/类
@Override
public String evaluateXpath(String xpath) throws Exception {
XPathFactory xpathFac = XPathFactory.newInstance();
XPath theXpath = xpathFac.newXPath();
String html = getHtmlSource();
html = html.replaceAll(">\\s+<", "><");
InputStream input = new ByteArrayInputStream(html.getBytes(Charset.forName("UTF-8")));
XMLReader reader = new Parser();
reader.setFeature(Parser.namespacesFeature, false);
Transformer transformer = TransformerFactory.newInstance()
.newTransformer();
DOMResult result = new DOMResult();
transformer.transform(new SAXSource(reader, new InputSource(input)),
result);
Node htmlNode = result.getNode(); // This code gets a Node from the
// result.
return (String) theXpath.evaluate(xpath, htmlNode,
XPathConstants.STRING);
}
示例12: getNodeListUsingJavaXPath
import org.ccil.cowan.tagsoup.Parser; //导入依赖的package包/类
/**
* Get the list of nodes which satisfy the xpath expression passed in
*
* @param xpath
* the input xpath expression
* @return the nodeset of matching elements
* @throws Exception
*/
private NodeList getNodeListUsingJavaXPath(String xpath) throws Exception {
XPathFactory xpathFac = XPathFactory.newInstance();
XPath theXpath = xpathFac.newXPath();
String html = getGUIDriver().getHtmlSource();
html = html.replaceAll(">\\s+<", "><");
InputStream input = new ByteArrayInputStream(html.getBytes(Charset.forName("UTF-8")));
XMLReader reader = new Parser();
reader.setFeature(Parser.namespacesFeature, false);
Transformer transformer = TransformerFactory.newInstance().newTransformer();
DOMResult result = new DOMResult();
transformer.transform(new SAXSource(reader, new InputSource(input)), result);
Node htmlNode = result.getNode(); // This code gets a Node from the
// result.
NodeList nodes = (NodeList) theXpath.evaluate(xpath, htmlNode, XPathConstants.NODESET);
return nodes;
}
示例13: getNodeListUsingJavaXPath
import org.ccil.cowan.tagsoup.Parser; //导入依赖的package包/类
/**
*
* @param xpath
* of the NodeList
* @return a list of nodes found at the xpath
* @throws Exception
*/
private NodeList getNodeListUsingJavaXPath(String xpath) throws Exception {
XPathFactory xpathFac = XPathFactory.newInstance();
XPath theXpath = xpathFac.newXPath();
String html = getGUIDriver().getHtmlSource();
html = html.replaceAll(">\\s+<", "><");
InputStream input = new ByteArrayInputStream(html.getBytes(Charset.forName("UTF-8")));
XMLReader reader = new Parser();
reader.setFeature(Parser.namespacesFeature, false);
Transformer transformer = TransformerFactory.newInstance().newTransformer();
DOMResult result = new DOMResult();
transformer.transform(new SAXSource(reader, new InputSource(input)), result);
// This code gets a Node from the result.
Node htmlNode = result.getNode();
NodeList nodes = (NodeList) theXpath.evaluate(xpath, htmlNode, XPathConstants.NODESET);
return nodes;
}
示例14: parse
import org.ccil.cowan.tagsoup.Parser; //导入依赖的package包/类
/**
* @see org.apache.sling.commons.html.HtmlParser#parse(java.lang.String, java.io.InputStream, java.lang.String)
*/
public Document parse(String systemId, InputStream stream, String encoding) throws IOException {
final Parser parser = new Parser();
final DOMBuilder builder = new DOMBuilder();
final InputSource source = new InputSource(stream);
source.setEncoding(encoding);
source.setSystemId(systemId);
try {
parser.setProperty("http://xml.org/sax/properties/lexical-handler", builder);
parser.setContentHandler(builder);
parser.parse(source);
} catch (SAXException se) {
if ( se.getCause() instanceof IOException ) {
throw (IOException) se.getCause();
}
throw (IOException) new IOException("Unable to parse xml.").initCause(se);
}
return builder.getDocument();
}
示例15: getHtmlAsXml
import org.ccil.cowan.tagsoup.Parser; //导入依赖的package包/类
@Override
public XmlScriptType getHtmlAsXml()
{
try
{
XMLReader htmlParser = new Parser();
htmlParser.setFeature(Parser.namespacesFeature, false);
htmlParser.setFeature(Parser.namespacePrefixesFeature, false);
Transformer transformer = TransformerFactory.newInstance().newTransformer();
DOMResult result = new DOMResult();
transformer.transform(new SAXSource(htmlParser, new InputSource(new StringReader(getAsText()))),
result);
Node node = result.getNode();
if( node.getNodeType() == Node.DOCUMENT_NODE )
{
node = node.getFirstChild();
}
return new PropBagWrapper(new PropBagEx(node));
}
catch( Exception ex )
{
throw new RuntimeException("Response received from external URL could not be tidied into XML", ex);
}
}