当前位置: 首页>>代码示例>>Java>>正文


Java TagNode.getText方法代码示例

本文整理汇总了Java中org.htmlcleaner.TagNode.getText方法的典型用法代码示例。如果您正苦于以下问题:Java TagNode.getText方法的具体用法?Java TagNode.getText怎么用?Java TagNode.getText使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.htmlcleaner.TagNode的用法示例。


在下文中一共展示了TagNode.getText方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: handleTagNode

import org.htmlcleaner.TagNode; //导入方法依赖的package包/类
@Override public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end) {
    if (isPre) {
        StringBuffer buffer = new StringBuffer();
        buffer.append("\n");//fake padding top + make sure, pre is always by itself
        getPlainText(buffer, node);
        buffer.append("\n");//fake padding bottom + make sure, pre is always by itself
        builder.append(replace(buffer.toString()));
        builder.append("\n");
        builder.setSpan(new CodeBackgroundRoundedSpan(color), start, builder.length(), SPAN_EXCLUSIVE_EXCLUSIVE);
        builder.append("\n");
        this.appendNewLine(builder);
        this.appendNewLine(builder);
    } else {
        StringBuffer text = node.getText();
        builder.append(" ");
        builder.append(replace(text.toString()));
        builder.append(" ");
        final int stringStart = start + 1;
        final int stringEnd = builder.length() - 1;
        builder.setSpan(new BackgroundColorSpan(color), stringStart, stringEnd, SPAN_EXCLUSIVE_EXCLUSIVE);
        if (theme == PrefGetter.LIGHT) {
            builder.setSpan(new ForegroundColorSpan(Color.RED), stringStart, stringEnd, SPAN_EXCLUSIVE_EXCLUSIVE);
        }
        builder.setSpan(new TypefaceSpan("monospace"), stringStart, stringEnd, SPAN_EXCLUSIVE_EXCLUSIVE);
    }
}
 
开发者ID:duyp,项目名称:mvvm-template,代码行数:27,代码来源:PreTagHandler.java

示例2: checkStyleCSS

import org.htmlcleaner.TagNode; //导入方法依赖的package包/类
final private void checkStyleCSS(TagNode node)
		throws ClientProtocolException, IllegalStateException, IOException, SearchLibException, URISyntaxException {
	if (!("style".equalsIgnoreCase(node.getName())))
		return;
	String attr = node.getAttributeByName("type");
	if (!StringUtils.isEmpty(attr) && !"text/css".equalsIgnoreCase(attr))
		return;
	attr = node.getAttributeByName("media");
	if (!StringUtils.isEmpty(attr) && !"screen".equalsIgnoreCase(attr) && !"all".equalsIgnoreCase(attr))
		return;
	StringBuilder builder = (StringBuilder) node.getText();
	if (builder == null)
		return;
	String content = builder.toString();
	String newContent = StringEscapeUtils.unescapeXml(content);
	StringBuffer sb = checkCSSContent(baseUrl, newContent);
	if (sb != null)
		newContent = sb.toString();
	if (newContent.equals(content))
		return;
	node.removeAllChildren();
	node.addChild(new ContentNode(newContent));
}
 
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:24,代码来源:HtmlArchiver.java

示例3: checkScriptContent

import org.htmlcleaner.TagNode; //导入方法依赖的package包/类
final private void checkScriptContent(TagNode node, Set<TagNode> disableScriptNodeSet) {
	if (!("script".equalsIgnoreCase(node.getName())))
		return;
	if (disableScriptNodeSet != null && hasAncestorXPath(disableScriptNodeSet, node)) {
		node.removeFromTree();
		return;
	}
	StringBuilder builder = (StringBuilder) node.getText();
	if (builder == null)
		return;
	String content = builder.toString();
	if (content == null)
		return;
	String newContent = StringEscapeUtils.unescapeXml(content);
	if (newContent.equals(content))
		return;
	node.removeAllChildren();
	node.addChild(new ContentNode(newContent));
}
 
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:20,代码来源:HtmlArchiver.java

示例4: handleTagNode

import org.htmlcleaner.TagNode; //导入方法依赖的package包/类
@Override public void handleTagNode(TagNode node, SpannableStringBuilder spannableStringBuilder, int start, int end) {
    String href = node.getAttributeByName("href");
    if (href != null) {
        spannableStringBuilder.setSpan(new LinkSpan(href, linkColor), start, end, 33);
    } else if (node.getText() != null) {
        spannableStringBuilder.setSpan(new LinkSpan("https://github.com/" + node.getText().toString(), linkColor), start, end, 33);
    }
}
 
开发者ID:duyp,项目名称:mvvm-template,代码行数:9,代码来源:LinkHandler.java

示例5: getTextFromHtmlString

import org.htmlcleaner.TagNode; //导入方法依赖的package包/类
/**
 * This method extracts the text from html string.
 * @param htmlString {@link String}
 * @return {@link String}
 */
public static String getTextFromHtmlString(String htmlString) {
	String errorText = "";
	CleanerProperties cleanerProps = new CleanerProperties();
	// set some properties to non-default values
	cleanerProps.setTransResCharsToNCR(true);
	cleanerProps.setTranslateSpecialEntities(true);
	cleanerProps.setOmitComments(true);
	cleanerProps.setOmitDoctypeDeclaration(true);
	cleanerProps.setOmitXmlDeclaration(true);
	cleanerProps.setUseEmptyElementTags(true);

	HtmlCleaner cleaner = new HtmlCleaner(cleanerProps);
	TagNode tagNode = cleaner.clean(htmlString);
	Object[] rootNode = null;
	try {
		rootNode = tagNode.evaluateXPath("//table");
		if (null != rootNode && rootNode.length > 0) {
			TagNode[] textNode = ((TagNode) rootNode[rootNode.length - 1]).getElementsByName("td", true);
			for (TagNode tag : textNode) {
				if (tag != null && tag.getText() != null) {
					StringBuilder errorTextString = new StringBuilder();
					errorTextString.append(errorText);
					if (tag.getText().toString().trim().equals("&nbsp;")) {
						errorTextString.append(" ");
						errorText = errorTextString.toString();
					} else {
						errorTextString.append(tag.getText());
						errorText = errorTextString.toString();
					}
				}
			}
		}
	} catch (XPatherException e) {
		LOGGER.error("Error extracting table node from html." + e.getMessage());
	}
	return errorText;
}
 
开发者ID:kuzavas,项目名称:ephesoft,代码行数:43,代码来源:AbstractUploadFile.java

示例6: satisfy

import org.htmlcleaner.TagNode; //导入方法依赖的package包/类
private boolean satisfy(TagNode tagNode, boolean override) {
    String name = tagNode.getName();
    TagInfo tagInfo = tagInfoProvider.getTagInfo(name);
    //Only _block_ elements can match.
    if (tagInfo != null && !hasIdAttributeSet(tagNode) && none != tagInfo.getDisplay() && !tagInfo.isEmptyTag() && (override || !unsafeBlockElements.contains(name))) {
        CharSequence contentString = tagNode.getText();
        if(isEmptyString(contentString)) {
            // even though there may be no text need to make sure all children are empty or can be pruned
            if (tagNode.isEmpty()) {
                return true;
            } else {
                for(Object child: tagNode.getAllChildren()) {
                    // TODO : similar check as in tagNode.isEmpty() argues for a visitor pattern
                    // but allow empty td, ths to be pruned.
                    if ( child instanceof TagNode) {
                        if (!satisfy((TagNode)child, true)) {
                            return false;
                        }
                    } else if (child instanceof ContentNode ) {
                        if ( !((ContentNode)child).isBlank()) {
                            return false;
                        }
                    } else {
                        return false;
                    }
                }
                return true;
            }
        }
    }
    return false;
}
 
开发者ID:unktomi,项目名称:form-follows-function,代码行数:33,代码来源:TagNodeEmptyContentCondition.java

示例7: satisfy

import org.htmlcleaner.TagNode; //导入方法依赖的package包/类
private boolean satisfy(TagNode tagNode, boolean override) {
    String name = tagNode.getName();
    TagInfo tagInfo = tagInfoProvider.getTagInfo(name);
    //Only _block_ elements can match.
    if (tagInfo != null && !hasIdAttributeSet(tagNode) && none != tagInfo.getDisplay() && !tagInfo.isEmptyTag() && (override || !unsafeBlockElements.contains(name))) {
        CharSequence contentString = tagNode.getText();
        if (isEmptyString(contentString)) {
            // even though there may be no text need to make sure all children are empty or can be pruned
            if (tagNode.isEmpty()) {
                return true;
            } else {
                for (Object child : tagNode.getAllChildren()) {
                    // TODO : similar check as in tagNode.isEmpty() argues for a visitor pattern
                    // but allow empty td, ths to be pruned.
                    if (child instanceof TagNode) {
                        if (!satisfy((TagNode) child, true)) {
                            return false;
                        }
                    } else if (child instanceof ContentNode) {
                        if (!((ContentNode) child).isBlank()) {
                            return false;
                        }
                    } else {
                        return false;
                    }
                }
                return true;
            }
        }
    }
    return false;
}
 
开发者ID:fivesmallq,项目名称:web-data-extractor,代码行数:33,代码来源:TagNodeEmptyContentCondition.java

示例8: parsePageInfoByPath

import org.htmlcleaner.TagNode; //导入方法依赖的package包/类
public  String parsePageInfoByPath(TagNode node, String xpath) throws IOException, XPatherException
{
	 Object[] ns = node.evaluateXPath(xpath);
	 String result ="";
	 for (Object object : ns) 
	 {
	    TagNode dd = (TagNode) object;
	    result = result +dd.getText();
	 }
	 return result;
}
 
开发者ID:anphoenix,项目名称:data_crawler_generic,代码行数:12,代码来源:ExtractInfoWithHtmlCleaner.java

示例9: parsePageInfoByPathandIndex

import org.htmlcleaner.TagNode; //导入方法依赖的package包/类
public  String parsePageInfoByPathandIndex(TagNode node, String xpath,int index) throws IOException, XPatherException
{
	 Object[] ns = node.evaluateXPath(xpath);
	 String result = "" ;
	 if(ns.length>0)
	 {
	    TagNode dd = (TagNode) ns[index];
	    result = result +dd.getText();
	 }
	 return result;
}
 
开发者ID:anphoenix,项目名称:data_crawler_generic,代码行数:12,代码来源:ExtractInfoWithHtmlCleaner.java

示例10: main

import org.htmlcleaner.TagNode; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException, XPatherException {
	CleanerProperties props = cleaner.getProperties();     
       props.setUseCdataForScriptAndStyle(true);     
       props.setRecognizeUnicodeChars(true);     
       props.setUseEmptyElementTags(true);     
       props.setAdvancedXmlEscape(true);     
       props.setTranslateSpecialEntities(true);     
       props.setBooleanAttributeValues("empty");     
       String result ="";
       File file = new File("E:/test4java/tangniaobing.htm");
       
       
       URL url = new URL("http://www.haodf.com/wenda/anzhentaohong_g_638200415.htm");
	 TagNode node = cleaner.clean(url,"gb2312");
	 //Object[] ns = node.getElementsByName("", true);
	 Object[] ns = node.evaluateXPath("//*[@class=\"bb_d3 bl_d3 pb20\"]/div[3]/div[2]/p[2]");
	 //Object[] ns = node.("//*[@id=\"shequREP_pageNumLab\"]/a");
	 for (Object object : ns) 
	 {
	    TagNode dd = (TagNode) object;
	    
	    result = result +dd.getText()+"\n";
	 }
	 result = result.replace("&nbsp", "").replace("\r", "").replace(";", "");
	 
			result = CommonUtil.getDateString(result,".*?([0-9]+.[0-9]+.[0-9]+).*");

	 /*result = "?uthorid=4917458&page=6&tid=16785968";
	 String rex = "\\?(?!authorid=).*";
	 Pattern p = Pattern.compile(rex);
	 Matcher m = p.matcher(result);
	 boolean s = m.matches();
	 for(int i=1;i<=m.groupCount();i++)
	 {
		 System.out.println(m.group(i));
	 }*/
	 
	 System.out.print(result);
}
 
开发者ID:anphoenix,项目名称:data_crawler_generic,代码行数:40,代码来源:ExtractInfoWithHtmlCleaner.java


注:本文中的org.htmlcleaner.TagNode.getText方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。