当前位置: 首页>>代码示例>>Java>>正文


Java Source.fullSequentialParse方法代码示例

本文整理汇总了Java中net.htmlparser.jericho.Source.fullSequentialParse方法的典型用法代码示例。如果您正苦于以下问题:Java Source.fullSequentialParse方法的具体用法?Java Source.fullSequentialParse怎么用?Java Source.fullSequentialParse使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在net.htmlparser.jericho.Source的用法示例。


在下文中一共展示了Source.fullSequentialParse方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: realWrite

import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
public void realWrite(boolean printHeadBodyTags) throws IOException {
    flush();
    ByteArrayOutputStream stream = (ByteArrayOutputStream) this.out;
    this.out = outputStream;
    if (!printHeadBodyTags) {
        stream.writeTo(outputStream);
        return;
    }
    println("<html>");
    println("<head>");
    println("<style>");
    for (Style style : styles)
        println(style);
    println("</style>");
    println("</head>");

    String htmlText = new String(stream.toByteArray(), "UTF-8");
    Source source = new Source(htmlText);
    source.fullSequentialParse();

    List<StartTag> startTags = source.getAllStartTags("body");
    if (startTags.size() == 0) {
        println("<body>");
        println(htmlText);
        println("</body>");
    } else {
        println(new StringBuffer(startTags.get(0).getElement()));
    }

    println("</html>");
}
 
开发者ID:Vitaliy-Yakovchuk,项目名称:ramus,代码行数:32,代码来源:Out.java

示例2: setHTMLText

import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
public void setHTMLText(String htmlText) {
    try {
        this.text = htmlText;
        if (formatt) {
            Source segment = new Source(text);
            segment.fullSequentialParse();
            SourceFormatter formatter = new SourceFormatter(
                    segment);
            htmlText = formatter.toString();
        }

        editorPane.read(new StringReader(htmlText), null);
    } catch (IOException e) {
        e.printStackTrace();
    }
}
 
开发者ID:Vitaliy-Yakovchuk,项目名称:ramus,代码行数:17,代码来源:HTMLView.java

示例3: strip

import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
/**
	 * Retira tags indesejadas
	 * 
	 * @param html
	 * @return
	 */
	public String strip(String html) {

		
		if (html == null)
			return "";
		Source source = new Source(html);
		source.fullSequentialParse();
		OutputDocument output = new OutputDocument(source);
		List<Tag> tags = source.getAllTags();

		for (Tag tag : tags) {
			if (processTag(tag, output)) {
				tag.setUserData(VALID_MARKER);
			} else {
				output.remove(tag);
			}
//			reencodeTextSegment(source, output, pos, tag.getBegin());
		}
//		reencodeTextSegment(source, output, pos, source.getEnd());
		return output.toString();
	}
 
开发者ID:camaradosdeputadosoficial,项目名称:edemocracia,代码行数:28,代码来源:HtmlStripperDiscussion.java

示例4: strip

import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
/**
 * Retira tags indesejadas
 * 
 * @param html
 * @return
 */
public String strip(String html) {
	if (html == null)
		return "";
	Source source = new Source(html);
	source.fullSequentialParse();
	OutputDocument output = new OutputDocument(source);
	List<Tag> tags = source.getAllTags();
	int pos = 0;
	for (Tag tag : tags) {
		if (processTag(tag, output)) {
			tag.setUserData(VALID_MARKER);
		} else {
			output.remove(tag);
		}
		reencodeTextSegment(source, output, pos, tag.getBegin());
		pos = tag.getEnd();
	}
	reencodeTextSegment(source, output, pos, source.getEnd());
	return output.toString();
}
 
开发者ID:camaradosdeputadosoficial,项目名称:edemocracia,代码行数:27,代码来源:HtmlStripper.java

示例5: ProcessTextDocument

import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
/**
 * Processes the text document, extracts the title, and strip the HTML tags
 * @return boolean
 */
public boolean ProcessTextDocument() {
       // The content should be in plain HTML, prefered not to be stripped
       String withoutHTML = this.StripHTML(doc.getRawTextContent());
       withoutHTML = StringEscapeUtils.escapeXml(withoutHTML);
       doc.setStrippedTextContent(withoutHTML);
       String title = ""; //doc.TitleProperty;

       MicrosoftTagTypes.register();
       PHPTagTypes.register();
       PHPTagTypes.PHP_SHORT.deregister(); // remove PHP short tags for this example otherwise they override processing instructions
       MasonTagTypes.register();
       Source source = new Source(doc.getRawTextContent());
       source.fullSequentialParse();

       title = getTitle(source);

       if (title != null) {

           title = StringEscapeUtils.escapeXml(title);
           doc.setTitle(title);

       }

       return true;
   }
 
开发者ID:shiftdirector,项目名称:youseer,代码行数:30,代码来源:Worker.java

示例6: convert

import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
public String convert() {
    Source source = new Source(code);
    source.fullSequentialParse();

    int from = 0;

    StringBuffer result = new StringBuffer();

    for (StartTag tag : source.getAllStartTags()) {
        if (tag.getName().startsWith("%")) {

            addLines(result, source, from, tag.getBegin());

            from = tag.getEnd();

            String script = tag.toString();
            if (script.length() > 4) {
                if (script.charAt(2) == '=') {
                    result.append("doc.print(");
                    result.append(script.substring(3, script.length() - 2));
                    result.append(");");
                } else {
                    result.append(script.substring(2, script.length() - 2));
                }
            }
        }
    }

    addLines(result, source, from, source.getEnd());

    return result.toString();
}
 
开发者ID:Vitaliy-Yakovchuk,项目名称:ramus,代码行数:33,代码来源:JSSPToJsConverter.java

示例7: getReport

import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
public Source getReport(String name, Query query) {
    Element element = engine.getElement(name, ReportPlugin
            .getReportsQualifier(engine).getId());
    if (element == null)
        throw new DataException("Error.reportNotFound", "Report " + name
                + " not found", name);
    HashMap<String, Object> map = new HashMap<String, Object>();
    if (query != null)
        map.put("query", query);
    String htmlReport = reportQuery.getHTMLReport(element, map);
    Source source = new Source(htmlReport);
    source.fullSequentialParse();
    return source;
}
 
开发者ID:Vitaliy-Yakovchuk,项目名称:ramus,代码行数:15,代码来源:Data.java

示例8: printHTMLPage

import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
public void printHTMLPage(HTMLPage page) throws IOException {
    byte[] data = page.getData();
    if (data == null)
        return;
    Source source = new Source(new ByteArrayInputStream(data));
    source.fullSequentialParse();
    printHTMLPage(source);
}
 
开发者ID:Vitaliy-Yakovchuk,项目名称:ramus,代码行数:9,代码来源:Out.java

示例9: actionPerformed

import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
@Override
public void actionPerformed(ActionEvent e) {
    String text = getText();
    Source segment = new Source(text);
    segment.fullSequentialParse();
    SourceFormatter formatter = new SourceFormatter(segment);
    text = formatter.toString();
    editorPane.selectAll();
    editorPane.replaceSelection(text);
}
 
开发者ID:Vitaliy-Yakovchuk,项目名称:ramus,代码行数:11,代码来源:ScriptEditorView.java

示例10: sanitise

import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
private static String sanitise(String pseudoHTML, boolean formatWhiteSpace, boolean stripInvalidElements) {
	Source source=new Source(pseudoHTML);
	source.fullSequentialParse();
	OutputDocument outputDocument=new OutputDocument(source);
	List<Tag> tags=source.getAllTags();
	int pos=0;
	for (Tag tag : tags) {
		if (processTag(tag,outputDocument)) {
			tag.setUserData(VALID_MARKER);
		} else {
			if (!stripInvalidElements) continue; // element will be encoded along with surrounding text
			if (!stripInvalidElements) continue; // element will be encoded along with surrounding text
			if(tag.getName().equalsIgnoreCase("style")){
				Tag nextTag=tag.getNextTag();
				int endPos=0;
				if(nextTag!=null){
					endPos=nextTag.getBegin()-1;
				}else{
					endPos=source.getEnd();
				}
				outputDocument.remove(tag.getBegin(),endPos);
			}else{
				outputDocument.remove(tag);
			}
		}
		//reencodeTextSegment(source,outputDocument,pos,tag.getBegin(),formatWhiteSpace);
		pos=tag.getEnd();
	}
	//reencodeTextSegment(source,outputDocument,pos,source.getEnd(),formatWhiteSpace);
	return outputDocument.toString();
}
 
开发者ID:trackplus,项目名称:Genji,代码行数:32,代码来源:HTMLSanitiser.java

示例11: realWriteWithHTMLUpdate

import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
public void realWriteWithHTMLUpdate() throws IOException {
    flush();
    ByteArrayOutputStream out = (ByteArrayOutputStream) this.out;
    Source source = new Source(new String(out.toByteArray(), "UTF-8"));
    source.fullSequentialParse();
    List<StartTag> list = source.getAllStartTags("html");
    if (list.size() == 0) {
        realWrite();
        return;
    }

    this.out = outputStream;

    OutputStreamWriter writer = new OutputStreamWriter(this.out, "UTF-8");
    OutputDocument document = new OutputDocument(source);

    StringBuffer style = new StringBuffer();
    if (this.styles.size() > 0) {
        for (Style style2 : this.styles)
            style.append(style2.toString());
    }

    List<StartTag> h = source.getAllStartTags("style");
    if (h.size() > 0) {
        document.insert(h.get(0).getElement().getEndTag().getBegin(), style);
    } else {

        style.insert(0, "\n<style>\n");
        style.append("</style>\n");

        h = source.getAllStartTags("head");
        if (h.size() > 0) {
            document.insert(h.get(0).getElement().getEndTag().getBegin(),
                    style);
        } else {
            style.insert(0, "\n<head>\n");
            style.append("</head>\n");
            document.insert(h.get(0).getElement().getEndTag().getBegin(),
                    style);
        }
    }
    document.writeTo(writer);
    writer.flush();

}
 
开发者ID:Vitaliy-Yakovchuk,项目名称:ramus,代码行数:46,代码来源:Out.java

示例12: main

import net.htmlparser.jericho.Source; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
		
		if (args.length > 0)
			inDir = args[0];
		if (args.length > 1)
			outDir = args[1];
		else
			outDir = inDir;
		
		File f = new File(inDir);
		
		for (File doc : f.listFiles()){
		
		String sourceUrlString="file:"+inDir+doc.getName();
		
		MicrosoftConditionalCommentTagTypes.register();
		PHPTagTypes.register();
		PHPTagTypes.PHP_SHORT.deregister(); // remove PHP short tags for this example otherwise they override processing instructions
		MasonTagTypes.register();
		Source source=new Source(new URL(sourceUrlString));

		PrintStream ps = new PrintStream(outDir+doc.getName().replaceFirst(".html", "")+".txt");
		
		PrintStream ps_key = new PrintStream(outDir+doc.getName().replaceFirst(".html", "")+".key");
		
		// Call fullSequentialParse manually as most of the source will be parsed.
		source.fullSequentialParse();

//		System.out.println("Document title:");
		String title=getTitle(source);
		ps.print(title+"\n\n");
//		System.out.println("\nDocument description:");
		String description=getMetaValue(source,"description");
		ps.println(description+"\n\n");
//		System.out.println("\nDocument keywords:");
		String keywords=getMetaValue(source,"keywords");
		if (keywords != null)
			ps_key.print(keywords.replaceAll("\\s*,\\s*", "\n"));
		ps_key.close();
		
		List<Element> divElements=source.getAllElements(HTMLElementName.DIV);
		for (Element div : divElements) {
			String id=div.getAttributeValue("id");
			if (id==null) 
				continue;
			else if (id.equals("article-body-blocks")){
				String article=div.getContent().getTextExtractor().toString();
				ps.println(article);
			}
		}
		ps.close();
		}

  }
 
开发者ID:EUMSSI,项目名称:EUMSSI-tools,代码行数:55,代码来源:ExtractGuardian.java


注:本文中的net.htmlparser.jericho.Source.fullSequentialParse方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。