本文整理汇总了Java中net.htmlparser.jericho.Source.fullSequentialParse方法的典型用法代码示例。如果您正苦于以下问题:Java Source.fullSequentialParse方法的具体用法?Java Source.fullSequentialParse怎么用?Java Source.fullSequentialParse使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类net.htmlparser.jericho.Source
的用法示例。
在下文中一共展示了Source.fullSequentialParse方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: realWrite
import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
public void realWrite(boolean printHeadBodyTags) throws IOException {
flush();
ByteArrayOutputStream stream = (ByteArrayOutputStream) this.out;
this.out = outputStream;
if (!printHeadBodyTags) {
stream.writeTo(outputStream);
return;
}
println("<html>");
println("<head>");
println("<style>");
for (Style style : styles)
println(style);
println("</style>");
println("</head>");
String htmlText = new String(stream.toByteArray(), "UTF-8");
Source source = new Source(htmlText);
source.fullSequentialParse();
List<StartTag> startTags = source.getAllStartTags("body");
if (startTags.size() == 0) {
println("<body>");
println(htmlText);
println("</body>");
} else {
println(new StringBuffer(startTags.get(0).getElement()));
}
println("</html>");
}
示例2: setHTMLText
import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
public void setHTMLText(String htmlText) {
try {
this.text = htmlText;
if (formatt) {
Source segment = new Source(text);
segment.fullSequentialParse();
SourceFormatter formatter = new SourceFormatter(
segment);
htmlText = formatter.toString();
}
editorPane.read(new StringReader(htmlText), null);
} catch (IOException e) {
e.printStackTrace();
}
}
示例3: strip
import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
/**
* Retira tags indesejadas
*
* @param html
* @return
*/
public String strip(String html) {
if (html == null)
return "";
Source source = new Source(html);
source.fullSequentialParse();
OutputDocument output = new OutputDocument(source);
List<Tag> tags = source.getAllTags();
for (Tag tag : tags) {
if (processTag(tag, output)) {
tag.setUserData(VALID_MARKER);
} else {
output.remove(tag);
}
// reencodeTextSegment(source, output, pos, tag.getBegin());
}
// reencodeTextSegment(source, output, pos, source.getEnd());
return output.toString();
}
示例4: strip
import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
/**
* Retira tags indesejadas
*
* @param html
* @return
*/
public String strip(String html) {
if (html == null)
return "";
Source source = new Source(html);
source.fullSequentialParse();
OutputDocument output = new OutputDocument(source);
List<Tag> tags = source.getAllTags();
int pos = 0;
for (Tag tag : tags) {
if (processTag(tag, output)) {
tag.setUserData(VALID_MARKER);
} else {
output.remove(tag);
}
reencodeTextSegment(source, output, pos, tag.getBegin());
pos = tag.getEnd();
}
reencodeTextSegment(source, output, pos, source.getEnd());
return output.toString();
}
示例5: ProcessTextDocument
import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
/**
* Processes the text document, extracts the title, and strip the HTML tags
* @return boolean
*/
public boolean ProcessTextDocument() {
// The content should be in plain HTML, prefered not to be stripped
String withoutHTML = this.StripHTML(doc.getRawTextContent());
withoutHTML = StringEscapeUtils.escapeXml(withoutHTML);
doc.setStrippedTextContent(withoutHTML);
String title = ""; //doc.TitleProperty;
MicrosoftTagTypes.register();
PHPTagTypes.register();
PHPTagTypes.PHP_SHORT.deregister(); // remove PHP short tags for this example otherwise they override processing instructions
MasonTagTypes.register();
Source source = new Source(doc.getRawTextContent());
source.fullSequentialParse();
title = getTitle(source);
if (title != null) {
title = StringEscapeUtils.escapeXml(title);
doc.setTitle(title);
}
return true;
}
示例6: convert
import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
public String convert() {
Source source = new Source(code);
source.fullSequentialParse();
int from = 0;
StringBuffer result = new StringBuffer();
for (StartTag tag : source.getAllStartTags()) {
if (tag.getName().startsWith("%")) {
addLines(result, source, from, tag.getBegin());
from = tag.getEnd();
String script = tag.toString();
if (script.length() > 4) {
if (script.charAt(2) == '=') {
result.append("doc.print(");
result.append(script.substring(3, script.length() - 2));
result.append(");");
} else {
result.append(script.substring(2, script.length() - 2));
}
}
}
}
addLines(result, source, from, source.getEnd());
return result.toString();
}
示例7: getReport
import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
public Source getReport(String name, Query query) {
Element element = engine.getElement(name, ReportPlugin
.getReportsQualifier(engine).getId());
if (element == null)
throw new DataException("Error.reportNotFound", "Report " + name
+ " not found", name);
HashMap<String, Object> map = new HashMap<String, Object>();
if (query != null)
map.put("query", query);
String htmlReport = reportQuery.getHTMLReport(element, map);
Source source = new Source(htmlReport);
source.fullSequentialParse();
return source;
}
示例8: printHTMLPage
import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
public void printHTMLPage(HTMLPage page) throws IOException {
byte[] data = page.getData();
if (data == null)
return;
Source source = new Source(new ByteArrayInputStream(data));
source.fullSequentialParse();
printHTMLPage(source);
}
示例9: actionPerformed
import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
@Override
public void actionPerformed(ActionEvent e) {
String text = getText();
Source segment = new Source(text);
segment.fullSequentialParse();
SourceFormatter formatter = new SourceFormatter(segment);
text = formatter.toString();
editorPane.selectAll();
editorPane.replaceSelection(text);
}
示例10: sanitise
import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
private static String sanitise(String pseudoHTML, boolean formatWhiteSpace, boolean stripInvalidElements) {
Source source=new Source(pseudoHTML);
source.fullSequentialParse();
OutputDocument outputDocument=new OutputDocument(source);
List<Tag> tags=source.getAllTags();
int pos=0;
for (Tag tag : tags) {
if (processTag(tag,outputDocument)) {
tag.setUserData(VALID_MARKER);
} else {
if (!stripInvalidElements) continue; // element will be encoded along with surrounding text
if (!stripInvalidElements) continue; // element will be encoded along with surrounding text
if(tag.getName().equalsIgnoreCase("style")){
Tag nextTag=tag.getNextTag();
int endPos=0;
if(nextTag!=null){
endPos=nextTag.getBegin()-1;
}else{
endPos=source.getEnd();
}
outputDocument.remove(tag.getBegin(),endPos);
}else{
outputDocument.remove(tag);
}
}
//reencodeTextSegment(source,outputDocument,pos,tag.getBegin(),formatWhiteSpace);
pos=tag.getEnd();
}
//reencodeTextSegment(source,outputDocument,pos,source.getEnd(),formatWhiteSpace);
return outputDocument.toString();
}
示例11: realWriteWithHTMLUpdate
import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
public void realWriteWithHTMLUpdate() throws IOException {
flush();
ByteArrayOutputStream out = (ByteArrayOutputStream) this.out;
Source source = new Source(new String(out.toByteArray(), "UTF-8"));
source.fullSequentialParse();
List<StartTag> list = source.getAllStartTags("html");
if (list.size() == 0) {
realWrite();
return;
}
this.out = outputStream;
OutputStreamWriter writer = new OutputStreamWriter(this.out, "UTF-8");
OutputDocument document = new OutputDocument(source);
StringBuffer style = new StringBuffer();
if (this.styles.size() > 0) {
for (Style style2 : this.styles)
style.append(style2.toString());
}
List<StartTag> h = source.getAllStartTags("style");
if (h.size() > 0) {
document.insert(h.get(0).getElement().getEndTag().getBegin(), style);
} else {
style.insert(0, "\n<style>\n");
style.append("</style>\n");
h = source.getAllStartTags("head");
if (h.size() > 0) {
document.insert(h.get(0).getElement().getEndTag().getBegin(),
style);
} else {
style.insert(0, "\n<head>\n");
style.append("</head>\n");
document.insert(h.get(0).getElement().getEndTag().getBegin(),
style);
}
}
document.writeTo(writer);
writer.flush();
}
示例12: main
import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length > 0)
inDir = args[0];
if (args.length > 1)
outDir = args[1];
else
outDir = inDir;
File f = new File(inDir);
for (File doc : f.listFiles()){
String sourceUrlString="file:"+inDir+doc.getName();
MicrosoftConditionalCommentTagTypes.register();
PHPTagTypes.register();
PHPTagTypes.PHP_SHORT.deregister(); // remove PHP short tags for this example otherwise they override processing instructions
MasonTagTypes.register();
Source source=new Source(new URL(sourceUrlString));
PrintStream ps = new PrintStream(outDir+doc.getName().replaceFirst(".html", "")+".txt");
PrintStream ps_key = new PrintStream(outDir+doc.getName().replaceFirst(".html", "")+".key");
// Call fullSequentialParse manually as most of the source will be parsed.
source.fullSequentialParse();
// System.out.println("Document title:");
String title=getTitle(source);
ps.print(title+"\n\n");
// System.out.println("\nDocument description:");
String description=getMetaValue(source,"description");
ps.println(description+"\n\n");
// System.out.println("\nDocument keywords:");
String keywords=getMetaValue(source,"keywords");
if (keywords != null)
ps_key.print(keywords.replaceAll("\\s*,\\s*", "\n"));
ps_key.close();
List<Element> divElements=source.getAllElements(HTMLElementName.DIV);
for (Element div : divElements) {
String id=div.getAttributeValue("id");
if (id==null)
continue;
else if (id.equals("article-body-blocks")){
String article=div.getContent().getTextExtractor().toString();
ps.println(article);
}
}
ps.close();
}
}