本文整理汇总了Java中net.htmlparser.jericho.OutputDocument.toString方法的典型用法代码示例。如果您正苦于以下问题:Java OutputDocument.toString方法的具体用法?Java OutputDocument.toString怎么用?Java OutputDocument.toString使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类net.htmlparser.jericho.OutputDocument
的用法示例。
在下文中一共展示了OutputDocument.toString方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: processPage
import net.htmlparser.jericho.OutputDocument; //导入方法依赖的package包/类
protected String processPage( PathOrigin baseDir, String pagePath ) throws IOException {
long start = System.currentTimeMillis();
InputStream file = null;
try {
file = baseDir.getReader( getRepo() ).getFileInputStream( pagePath );
Source html = new Source( file );
OutputDocument outDoc = new OutputDocument( html );
// transform
modifyDocument( html, baseDir, outDoc );
return outDoc.toString();
} finally {
IOUtils.closeQuietly( file );
if ( log.isDebugEnabled() ) {
log.debug( String.format( "processPage for %s took %dms", pagePath, System.currentTimeMillis() - start ) );
}
}
}
示例2: strip
import net.htmlparser.jericho.OutputDocument; //导入方法依赖的package包/类
/**
* Retira tags indesejadas
*
* @param html
* @return
*/
public String strip(String html) {
if (html == null)
return "";
Source source = new Source(html);
source.fullSequentialParse();
OutputDocument output = new OutputDocument(source);
List<Tag> tags = source.getAllTags();
for (Tag tag : tags) {
if (processTag(tag, output)) {
tag.setUserData(VALID_MARKER);
} else {
output.remove(tag);
}
// reencodeTextSegment(source, output, pos, tag.getBegin());
}
// reencodeTextSegment(source, output, pos, source.getEnd());
return output.toString();
}
示例3: strip
import net.htmlparser.jericho.OutputDocument; //导入方法依赖的package包/类
/**
* Retira tags indesejadas
*
* @param html
* @return
*/
public String strip(String html) {
if (html == null)
return "";
Source source = new Source(html);
source.fullSequentialParse();
OutputDocument output = new OutputDocument(source);
List<Tag> tags = source.getAllTags();
int pos = 0;
for (Tag tag : tags) {
if (processTag(tag, output)) {
tag.setUserData(VALID_MARKER);
} else {
output.remove(tag);
}
reencodeTextSegment(source, output, pos, tag.getBegin());
pos = tag.getEnd();
}
reencodeTextSegment(source, output, pos, source.getEnd());
return output.toString();
}
示例4: sanitise
import net.htmlparser.jericho.OutputDocument; //导入方法依赖的package包/类
private static String sanitise(String pseudoHTML, boolean formatWhiteSpace, boolean stripInvalidElements) {
Source source=new Source(pseudoHTML);
source.fullSequentialParse();
OutputDocument outputDocument=new OutputDocument(source);
List<Tag> tags=source.getAllTags();
int pos=0;
for (Tag tag : tags) {
if (processTag(tag,outputDocument)) {
tag.setUserData(VALID_MARKER);
} else {
if (!stripInvalidElements) continue; // element will be encoded along with surrounding text
if (!stripInvalidElements) continue; // element will be encoded along with surrounding text
if(tag.getName().equalsIgnoreCase("style")){
Tag nextTag=tag.getNextTag();
int endPos=0;
if(nextTag!=null){
endPos=nextTag.getBegin()-1;
}else{
endPos=source.getEnd();
}
outputDocument.remove(tag.getBegin(),endPos);
}else{
outputDocument.remove(tag);
}
}
//reencodeTextSegment(source,outputDocument,pos,tag.getBegin(),formatWhiteSpace);
pos=tag.getEnd();
}
//reencodeTextSegment(source,outputDocument,pos,source.getEnd(),formatWhiteSpace);
return outputDocument.toString();
}
示例5: removeNotAllowedTags
import net.htmlparser.jericho.OutputDocument; //导入方法依赖的package包/类
/**
* Serduszko dla Bartka od Kasi <3
* @param htmlFragment
* @param docUri
* @return
*/
private String removeNotAllowedTags(String htmlFragment, URI docUri) {
Source source = new Source(htmlFragment);
OutputDocument outputDocument = new OutputDocument(source);
List<Element> elements = source.getAllElements();
for (Element element : elements) {
Attributes attrs = element.getAttributes();
Map<String, String> attrsUpdate = outputDocument.replace(attrs, true);
if (!element.getName().contains("a")) {
attrsUpdate.clear();
} else {
if (attrsUpdate.get("href")!=null) {
String link = attrsUpdate.get("href");
if (!link.contains("http")) {
URI documentUri = docUri;
URI anchorUri;
try {
anchorUri = new URI(link);
URI result = documentUri.resolve(anchorUri);
attrsUpdate.put("href", result.toString());
} catch (URISyntaxException e) {
outputDocument.remove(element);
}
}
}
}
if (NOT_ALLOWED_HTML_TAGS.contains(element.getName())) {
Segment content = element.getContent();
if (element.getName() == "script"
|| element.getName() == "style"
|| element.getName() == "form") {
outputDocument.remove(content);
}
outputDocument.remove(element.getStartTag());
if (!element.getStartTag().isSyntacticalEmptyElementTag()) {
outputDocument.remove(element.getEndTag());
}
}
}
String out = outputDocument.toString();
out = out.replaceAll("\\n", "");
out = out.replaceAll("\\t", "");
return out;
}