本文整理汇总了Java中org.htmlcleaner.TagNode.getText方法的典型用法代码示例。如果您正苦于以下问题:Java TagNode.getText方法的具体用法?Java TagNode.getText怎么用?Java TagNode.getText使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.htmlcleaner.TagNode
的用法示例。
在下文中一共展示了TagNode.getText方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: handleTagNode
import org.htmlcleaner.TagNode; //导入方法依赖的package包/类
@Override public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end) {
if (isPre) {
StringBuffer buffer = new StringBuffer();
buffer.append("\n");//fake padding top + make sure, pre is always by itself
getPlainText(buffer, node);
buffer.append("\n");//fake padding bottom + make sure, pre is always by itself
builder.append(replace(buffer.toString()));
builder.append("\n");
builder.setSpan(new CodeBackgroundRoundedSpan(color), start, builder.length(), SPAN_EXCLUSIVE_EXCLUSIVE);
builder.append("\n");
this.appendNewLine(builder);
this.appendNewLine(builder);
} else {
StringBuffer text = node.getText();
builder.append(" ");
builder.append(replace(text.toString()));
builder.append(" ");
final int stringStart = start + 1;
final int stringEnd = builder.length() - 1;
builder.setSpan(new BackgroundColorSpan(color), stringStart, stringEnd, SPAN_EXCLUSIVE_EXCLUSIVE);
if (theme == PrefGetter.LIGHT) {
builder.setSpan(new ForegroundColorSpan(Color.RED), stringStart, stringEnd, SPAN_EXCLUSIVE_EXCLUSIVE);
}
builder.setSpan(new TypefaceSpan("monospace"), stringStart, stringEnd, SPAN_EXCLUSIVE_EXCLUSIVE);
}
}
示例2: checkStyleCSS
import org.htmlcleaner.TagNode; //导入方法依赖的package包/类
final private void checkStyleCSS(TagNode node)
throws ClientProtocolException, IllegalStateException, IOException, SearchLibException, URISyntaxException {
if (!("style".equalsIgnoreCase(node.getName())))
return;
String attr = node.getAttributeByName("type");
if (!StringUtils.isEmpty(attr) && !"text/css".equalsIgnoreCase(attr))
return;
attr = node.getAttributeByName("media");
if (!StringUtils.isEmpty(attr) && !"screen".equalsIgnoreCase(attr) && !"all".equalsIgnoreCase(attr))
return;
StringBuilder builder = (StringBuilder) node.getText();
if (builder == null)
return;
String content = builder.toString();
String newContent = StringEscapeUtils.unescapeXml(content);
StringBuffer sb = checkCSSContent(baseUrl, newContent);
if (sb != null)
newContent = sb.toString();
if (newContent.equals(content))
return;
node.removeAllChildren();
node.addChild(new ContentNode(newContent));
}
示例3: checkScriptContent
import org.htmlcleaner.TagNode; //导入方法依赖的package包/类
final private void checkScriptContent(TagNode node, Set<TagNode> disableScriptNodeSet) {
if (!("script".equalsIgnoreCase(node.getName())))
return;
if (disableScriptNodeSet != null && hasAncestorXPath(disableScriptNodeSet, node)) {
node.removeFromTree();
return;
}
StringBuilder builder = (StringBuilder) node.getText();
if (builder == null)
return;
String content = builder.toString();
if (content == null)
return;
String newContent = StringEscapeUtils.unescapeXml(content);
if (newContent.equals(content))
return;
node.removeAllChildren();
node.addChild(new ContentNode(newContent));
}
示例4: handleTagNode
import org.htmlcleaner.TagNode; //导入方法依赖的package包/类
@Override public void handleTagNode(TagNode node, SpannableStringBuilder spannableStringBuilder, int start, int end) {
String href = node.getAttributeByName("href");
if (href != null) {
spannableStringBuilder.setSpan(new LinkSpan(href, linkColor), start, end, 33);
} else if (node.getText() != null) {
spannableStringBuilder.setSpan(new LinkSpan("https://github.com/" + node.getText().toString(), linkColor), start, end, 33);
}
}
示例5: getTextFromHtmlString
import org.htmlcleaner.TagNode; //导入方法依赖的package包/类
/**
* This method extracts the text from html string.
* @param htmlString {@link String}
* @return {@link String}
*/
public static String getTextFromHtmlString(String htmlString) {
String errorText = "";
CleanerProperties cleanerProps = new CleanerProperties();
// set some properties to non-default values
cleanerProps.setTransResCharsToNCR(true);
cleanerProps.setTranslateSpecialEntities(true);
cleanerProps.setOmitComments(true);
cleanerProps.setOmitDoctypeDeclaration(true);
cleanerProps.setOmitXmlDeclaration(true);
cleanerProps.setUseEmptyElementTags(true);
HtmlCleaner cleaner = new HtmlCleaner(cleanerProps);
TagNode tagNode = cleaner.clean(htmlString);
Object[] rootNode = null;
try {
rootNode = tagNode.evaluateXPath("//table");
if (null != rootNode && rootNode.length > 0) {
TagNode[] textNode = ((TagNode) rootNode[rootNode.length - 1]).getElementsByName("td", true);
for (TagNode tag : textNode) {
if (tag != null && tag.getText() != null) {
StringBuilder errorTextString = new StringBuilder();
errorTextString.append(errorText);
if (tag.getText().toString().trim().equals(" ")) {
errorTextString.append(" ");
errorText = errorTextString.toString();
} else {
errorTextString.append(tag.getText());
errorText = errorTextString.toString();
}
}
}
}
} catch (XPatherException e) {
LOGGER.error("Error extracting table node from html." + e.getMessage());
}
return errorText;
}
示例6: satisfy
import org.htmlcleaner.TagNode; //导入方法依赖的package包/类
private boolean satisfy(TagNode tagNode, boolean override) {
String name = tagNode.getName();
TagInfo tagInfo = tagInfoProvider.getTagInfo(name);
//Only _block_ elements can match.
if (tagInfo != null && !hasIdAttributeSet(tagNode) && none != tagInfo.getDisplay() && !tagInfo.isEmptyTag() && (override || !unsafeBlockElements.contains(name))) {
CharSequence contentString = tagNode.getText();
if(isEmptyString(contentString)) {
// even though there may be no text need to make sure all children are empty or can be pruned
if (tagNode.isEmpty()) {
return true;
} else {
for(Object child: tagNode.getAllChildren()) {
// TODO : similar check as in tagNode.isEmpty() argues for a visitor pattern
// but allow empty td, ths to be pruned.
if ( child instanceof TagNode) {
if (!satisfy((TagNode)child, true)) {
return false;
}
} else if (child instanceof ContentNode ) {
if ( !((ContentNode)child).isBlank()) {
return false;
}
} else {
return false;
}
}
return true;
}
}
}
return false;
}
示例7: satisfy
import org.htmlcleaner.TagNode; //导入方法依赖的package包/类
private boolean satisfy(TagNode tagNode, boolean override) {
String name = tagNode.getName();
TagInfo tagInfo = tagInfoProvider.getTagInfo(name);
//Only _block_ elements can match.
if (tagInfo != null && !hasIdAttributeSet(tagNode) && none != tagInfo.getDisplay() && !tagInfo.isEmptyTag() && (override || !unsafeBlockElements.contains(name))) {
CharSequence contentString = tagNode.getText();
if (isEmptyString(contentString)) {
// even though there may be no text need to make sure all children are empty or can be pruned
if (tagNode.isEmpty()) {
return true;
} else {
for (Object child : tagNode.getAllChildren()) {
// TODO : similar check as in tagNode.isEmpty() argues for a visitor pattern
// but allow empty td, ths to be pruned.
if (child instanceof TagNode) {
if (!satisfy((TagNode) child, true)) {
return false;
}
} else if (child instanceof ContentNode) {
if (!((ContentNode) child).isBlank()) {
return false;
}
} else {
return false;
}
}
return true;
}
}
}
return false;
}
示例8: parsePageInfoByPath
import org.htmlcleaner.TagNode; //导入方法依赖的package包/类
public String parsePageInfoByPath(TagNode node, String xpath) throws IOException, XPatherException
{
Object[] ns = node.evaluateXPath(xpath);
String result ="";
for (Object object : ns)
{
TagNode dd = (TagNode) object;
result = result +dd.getText();
}
return result;
}
示例9: parsePageInfoByPathandIndex
import org.htmlcleaner.TagNode; //导入方法依赖的package包/类
public String parsePageInfoByPathandIndex(TagNode node, String xpath,int index) throws IOException, XPatherException
{
Object[] ns = node.evaluateXPath(xpath);
String result = "" ;
if(ns.length>0)
{
TagNode dd = (TagNode) ns[index];
result = result +dd.getText();
}
return result;
}
示例10: main
import org.htmlcleaner.TagNode; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException, XPatherException {
CleanerProperties props = cleaner.getProperties();
props.setUseCdataForScriptAndStyle(true);
props.setRecognizeUnicodeChars(true);
props.setUseEmptyElementTags(true);
props.setAdvancedXmlEscape(true);
props.setTranslateSpecialEntities(true);
props.setBooleanAttributeValues("empty");
String result ="";
File file = new File("E:/test4java/tangniaobing.htm");
URL url = new URL("http://www.haodf.com/wenda/anzhentaohong_g_638200415.htm");
TagNode node = cleaner.clean(url,"gb2312");
//Object[] ns = node.getElementsByName("", true);
Object[] ns = node.evaluateXPath("//*[@class=\"bb_d3 bl_d3 pb20\"]/div[3]/div[2]/p[2]");
//Object[] ns = node.("//*[@id=\"shequREP_pageNumLab\"]/a");
for (Object object : ns)
{
TagNode dd = (TagNode) object;
result = result +dd.getText()+"\n";
}
result = result.replace(" ", "").replace("\r", "").replace(";", "");
result = CommonUtil.getDateString(result,".*?([0-9]+.[0-9]+.[0-9]+).*");
/*result = "?uthorid=4917458&page=6&tid=16785968";
String rex = "\\?(?!authorid=).*";
Pattern p = Pattern.compile(rex);
Matcher m = p.matcher(result);
boolean s = m.matches();
for(int i=1;i<=m.groupCount();i++)
{
System.out.println(m.group(i));
}*/
System.out.print(result);
}