当前位置: 首页>>代码示例>>Java>>正文


Java HTMLElementName类代码示例

本文整理汇总了Java中net.htmlparser.jericho.HTMLElementName的典型用法代码示例。如果您正苦于以下问题:Java HTMLElementName类的具体用法?Java HTMLElementName怎么用?Java HTMLElementName使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


HTMLElementName类属于net.htmlparser.jericho包,在下文中一共展示了HTMLElementName类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: isValidTRTag

import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
private static boolean isValidTRTag(Tag tag) {
	Element parentElement=tag.getElement().getParentElement();
	if (parentElement==null) return false; // ignore TR elements without a parent
	if (parentElement.getStartTag().getUserData()!=VALID_MARKER) return false; // ignore TR elements who's parent is not valid
	if(parentElement.getName()==HTMLElementName.TABLE){
		return true;
	}
	if(parentElement.getName()==HTMLElementName.TBODY
			||parentElement.getName()==HTMLElementName.THEAD){
		Element gradParent=parentElement.getParentElement();
		if(gradParent==null){
			return false;
		}
		if (gradParent.getStartTag().getUserData()!=VALID_MARKER) return false;
		return (gradParent.getName()==HTMLElementName.TABLE);
	}else{
		return false;
	}
}
 
开发者ID:trackplus,项目名称:Genji,代码行数:20,代码来源:HTMLSanitiser.java

示例2: printHTMLPage

import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
public void printHTMLPage(Source source)
        throws UnsupportedEncodingException, IOException {
    List<StartTag> list = source.getAllStartTags(HTMLElementName.STYLE);
    Iterator<StartTag> iterator = list.iterator();
    String text = "";
    while (iterator.hasNext()) {
        final StartTag tag = iterator.next();
        final Segment s = new Segment(source, tag.getEnd(), tag
                .getElement().getEndTag().getBegin());
        text += s.toString();
    }
    Vector<ReplaceRight> rights = Style.getStyles(text, styles, counter);

    // генерація сторінки з оновленими стилями.

    list = source.getAllStartTags();
    iterator = list.iterator();
    StartTag startTag = null;
    while (iterator.hasNext()) {
        final StartTag st = iterator.next();
        if (HTMLElementName.BODY.equals(st.getName())) {
            startTag = st;
            break;
        }
    }
    if (startTag == null)
        return;

    final StartTag body = startTag;
    final OutputDocument document = new OutputDocument(source);
    while (iterator.hasNext()) {
        startTag = iterator.next();
        replaceAttrs(startTag, document, rights);
    }

    OutputStreamWriter writer = new OutputStreamWriter(this.out, "UTF-8");
    document.writeTo(writer, body.getEnd(), body.getElement().getEndTag()
            .getBegin());
    writer.flush();
}
 
开发者ID:Vitaliy-Yakovchuk,项目名称:ramus,代码行数:41,代码来源:Out.java

示例3: main

import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
public static void main(final String[] args) {
    try {
        final FileInputStream is = new FileInputStream("d:/test.html");
        final FileOutputStream o = new FileOutputStream("d:/res1.txt");
        final Source source = new Source(is);
        final List<StartTag> list = source
                .getAllStartTags(HTMLElementName.STYLE);
        final Iterator<StartTag> iterator = list.iterator();
        String text = "";
        final PrintStream out = new PrintStream(o);
        while (iterator.hasNext()) {
            final StartTag tag = iterator.next();
            final Segment s = new Segment(source, tag.getEnd(), tag
                    .getElement().getEndTag().getBegin());
            text += s.toString();
            // out.println(text);
            // out.println("---------------");
        }
        is.close();
        final Vector<Style> styles = new Vector<Style>();
        getStyles(text, styles, createCounter());
        for (int i = 0; i < styles.size(); i++) {
            out.println(styles.get(i));
        }
        o.close();
    } catch (final IOException e) {
        e.printStackTrace();
    }
}
 
开发者ID:Vitaliy-Yakovchuk,项目名称:ramus,代码行数:30,代码来源:Style.java

示例4: modifyDocument

import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
/**
 * Updates relative source attributes to externally accessible abs paths
 * @param html the document
 * @param baseDir html location
 * @param out processed document
 */
protected void modifyDocument( Source html, PathOrigin baseDir, OutputDocument out ) {
  replaceUrlAttribute( html.getAllStartTags( HTMLElementName.LINK ), "href", baseDir, out );
  replaceUrlAttribute( html.getAllStartTags( HTMLElementName.SCRIPT ), "src", baseDir, out );
  replaceUrlAttribute( html.getAllStartTags( HTMLElementName.IMG ), "src", baseDir, out );
  //int insertPos = html.getFirstElement( HTMLElementName.HEAD ).getEndTag().getBegin();
  //out.insert( insertPos, getCodeSnippet( getBackendAssignments( getUrlProvider() ) ) );
}
 
开发者ID:webdetails,项目名称:cte,代码行数:14,代码来源:ProcessedHtmlPage.java

示例5: processTag

import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
private static boolean processTag(Tag tag, OutputDocument outputDocument) {
	String elementName=tag.getName();
	if (!VALID_ELEMENT_NAMES.contains(elementName)){
		//System.out.println("Not ok tag:!"+elementName+"!");
		return false;
	}
	if (tag.getTagType()==StartTagType.NORMAL) {
		Element element=tag.getElement();
		if (elementName==HTMLElementName.THEAD && !isValidTbodyTHeadTag(tag)) return false;
		if (elementName==HTMLElementName.TBODY && !isValidTbodyTHeadTag(tag)) return false;
		if (elementName==HTMLElementName.TR && !isValidTRTag(tag)) return false;
		if (elementName==HTMLElementName.TD && !isValidTDTHTag(tag)) return false;
		if (elementName==HTMLElementName.TH && !isValidTDTHTag(tag)) return false;
		if (HTMLElements.getEndTagRequiredElementNames().contains(elementName)) {
			if (element.getEndTag()==null) return false; // reject start tag if its required end tag is missing
		} else if (HTMLElements.getEndTagOptionalElementNames().contains(elementName)) {
			if (elementName==HTMLElementName.LI && !isValidLITag(tag)) return false; // reject invalid LI tags
			if (element.getEndTag()==null) outputDocument.insert(element.getEnd(),getEndTagHTML(elementName)); // insert optional end tag if it is missing
		}
		outputDocument.replace(tag,getStartTagHTML(element.getStartTag()));
	} else if (tag.getTagType()==EndTagType.NORMAL) {
		if (tag.getElement()==null) return false; // reject end tags that aren't associated with a start tag
		if (elementName==HTMLElementName.THEAD && !isValidTbodyTHeadTag(tag)) return false;
		if (elementName==HTMLElementName.TBODY && !isValidTbodyTHeadTag(tag)) return false;
		if (elementName==HTMLElementName.TR && !isValidTRTag(tag)) return false;
		if (elementName==HTMLElementName.TD && !isValidTDTHTag(tag)) return false;
		if (elementName==HTMLElementName.TH && !isValidTDTHTag(tag)) return false;
		if (elementName==HTMLElementName.LI && !isValidLITag(tag)) return false; // reject invalid LI tags
		outputDocument.replace(tag,getEndTagHTML(elementName));
	} else {
		return false; // reject abnormal tags
	}
	return true;
}
 
开发者ID:trackplus,项目名称:Genji,代码行数:35,代码来源:HTMLSanitiser.java

示例6: getMetaValue

import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
private static String getMetaValue(Source source, String key) {
	for (int pos=0; pos<source.length();) {
		StartTag startTag=source.getNextStartTag(pos,"name",key,false);
		if (startTag==null) return null;
		if (startTag.getName()==HTMLElementName.META)
			return startTag.getAttributeValue("content"); // Attribute values are automatically decoded
		pos=startTag.getEnd();
	}
	return null;
}
 
开发者ID:EUMSSI,项目名称:EUMSSI-tools,代码行数:11,代码来源:ExtractGuardian.java

示例7: processTag

import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
private boolean processTag(Tag tag, OutputDocument output) {
	String elementName = tag.getName().toLowerCase();
	if (!allowedTags.contains(elementName))
		return false;
	if (tag.getTagType() == StartTagType.NORMAL) {
		Element element = tag.getElement();
		if (HTMLElements.getEndTagRequiredElementNames().contains(
				elementName)) {
			if (element.getEndTag() == null)
				return false; // reject start tag if its required end tag is
								// missing
		} else if (HTMLElements.getEndTagOptionalElementNames().contains(
				elementName)) {
			if (elementName == HTMLElementName.LI && !isValidLITag(tag))
				return false; // reject invalid LI tags
			if (element.getEndTag() == null)
				output.insert(element.getEnd(), getEndTagHTML(elementName)); // insert
																				// optional
																				// end
																				// tag
																				// if
																				// it
																				// is
																				// missing
		}
		output.replace(tag, getStartTagHTML(element.getStartTag()));
	} else if (tag.getTagType() == EndTagType.NORMAL) {
		if (tag.getElement() == null)
			return false; // reject end tags that aren't associated with a
							// start tag
		if (elementName == HTMLElementName.LI && !isValidLITag(tag))
			return false; // reject invalid LI tags
		output.replace(tag, getEndTagHTML(elementName));
	} else {
		return false; // reject abnormal tags
	}
	return true;
}
 
开发者ID:camaradosdeputadosoficial,项目名称:edemocracia,代码行数:39,代码来源:HtmlStripperDiscussion.java

示例8: isValidLITag

import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
private boolean isValidLITag(Tag tag) {
	Element parentElement = tag.getElement().getParentElement();
	if (parentElement == null)
		return false; // ignore LI elements without a parent
	if (parentElement.getStartTag().getUserData() != VALID_MARKER)
		return false; // ignore LI elements who's parent is not valid
	return parentElement.getName() == HTMLElementName.UL
			|| parentElement.getName() == HTMLElementName.OL; // only accept
																// LI tags
																// who's
																// immediate
																// parent is
																// UL or OL.
}
 
开发者ID:camaradosdeputadosoficial,项目名称:edemocracia,代码行数:15,代码来源:HtmlStripperDiscussion.java

示例9: processTag

import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
private boolean processTag(Tag tag, OutputDocument output) {
	String elementName = tag.getName().toLowerCase();
	if (!allowedTags.contains(elementName))
		return false;
	if (tag.getTagType() == StartTagType.NORMAL) {
		Element element = tag.getElement();
		if (HTMLElements.getEndTagRequiredElementNames().contains(elementName)) {
			if (element.getEndTag() == null)
				return false; // reject start tag if its required end tag is
				              // missing
		} else if (HTMLElements.getEndTagOptionalElementNames().contains(elementName)) {
			if (elementName == HTMLElementName.LI && !isValidLITag(tag))
				return false; // reject invalid LI tags
			if (element.getEndTag() == null)
				// insert optional end tag if it is missing
				output.insert(element.getEnd(), getEndTagHTML(elementName));
		}
		output.replace(tag, getStartTagHTML(element.getStartTag()));
	} else if (tag.getTagType() == EndTagType.NORMAL) {
		if (tag.getElement() == null)
			return false; // reject end tags that aren't associated with a
			              // start tag
		if (elementName == HTMLElementName.LI && !isValidLITag(tag))
			return false; // reject invalid LI tags
		output.replace(tag, getEndTagHTML(elementName));
	} else {
		return false; // reject abnormal tags
	}
	return true;
}
 
开发者ID:camaradosdeputadosoficial,项目名称:edemocracia,代码行数:31,代码来源:HtmlStripper.java

示例10: isValidLITag

import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
private boolean isValidLITag(Tag tag) {
	Element parentElement = tag.getElement().getParentElement();
	if (parentElement == null)
		return false; // ignore LI elements without a parent
	if (parentElement.getStartTag().getUserData() != VALID_MARKER)
		return false; // ignore LI elements who's parent is not valid
	// only accept LI tags who's immediate parent is UL or OL.
	return parentElement.getName() == HTMLElementName.UL || parentElement.getName() == HTMLElementName.OL; 
}
 
开发者ID:camaradosdeputadosoficial,项目名称:edemocracia,代码行数:10,代码来源:HtmlStripper.java

示例11: parseHtml

import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
public static ArrayList<Image> parseHtml(String html) {
	ArrayList<Image> images = new ArrayList<Image>();
	Source source = new Source(html);
	
	Element table = source.getAllElements(HTMLElementName.TABLE).get(0);
	Element div = table.getFirstElement().getFirstElement().getFirstElement().getFirstElement();
	
	ArrayList<Element> ps = new ArrayList<Element>(div.getAllElements(HTMLElementName.P));
	
	boolean isReallyP = true;
	for (Element p : ps) {
		if (p.getContent().toString().contains("이미지 결과 더 보기")) {
			isReallyP = false;
		}
		
		if (isReallyP && p.getAllElements(HTMLElementName.A).size() > 0) {
			Element a = p.getAllElements(HTMLElementName.A).get(0);
			Element img = a.getAllElements(HTMLElementName.IMG).get(0);
			String from = a.getAttributeValue("href");
			String url = img.getAttributeValue("src");
			Image image = new Image(url, from);
			image.debug = p.toString();
			images.add(image);
		}
	}
	
	return images;
}
 
开发者ID:rishubil,项目名称:GalbijjimSearcher,代码行数:29,代码来源:HTMLParser.java

示例12: isValidLITag

import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
private static boolean isValidLITag(Tag tag) {
	Element parentElement=tag.getElement().getParentElement();
	if (parentElement==null) return false; // ignore LI elements without a parent
	if (parentElement.getStartTag().getUserData()!=VALID_MARKER) return false; // ignore LI elements who's parent is not valid
	return parentElement.getName()==HTMLElementName.UL || parentElement.getName()==HTMLElementName.OL; // only accept LI tags who's immediate parent is UL or OL.
}
 
开发者ID:trackplus,项目名称:Genji,代码行数:7,代码来源:HTMLSanitiser.java

示例13: isValidTDTHTag

import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
private static boolean isValidTDTHTag(Tag tag) {
	Element parentElement=tag.getElement().getParentElement();
	if (parentElement==null) return false; // ignore TD, TH elements without a parent
	if (parentElement.getStartTag().getUserData()!=VALID_MARKER) return false; // ignore TD,TH elements who's parent is not valid
	return parentElement.getName()==HTMLElementName.TR; // only accept TD,TH tags who's immediate parent is TR.
}
 
开发者ID:trackplus,项目名称:Genji,代码行数:7,代码来源:HTMLSanitiser.java

示例14: isValidTbodyTHeadTag

import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
private static boolean isValidTbodyTHeadTag(Tag tag) {
	Element parentElement=tag.getElement().getParentElement();
	if (parentElement==null) return false;
	if (parentElement.getStartTag().getUserData()!=VALID_MARKER) return false;
	return parentElement.getName()==HTMLElementName.TABLE;
}
 
开发者ID:trackplus,项目名称:Genji,代码行数:7,代码来源:HTMLSanitiser.java

示例15: main

import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
		
		if (args.length > 0)
			inDir = args[0];
		if (args.length > 1)
			outDir = args[1];
		else
			outDir = inDir;
		
		File f = new File(inDir);
		
		for (File doc : f.listFiles()){
		
		String sourceUrlString="file:"+inDir+doc.getName();
		
		MicrosoftConditionalCommentTagTypes.register();
		PHPTagTypes.register();
		PHPTagTypes.PHP_SHORT.deregister(); // remove PHP short tags for this example otherwise they override processing instructions
		MasonTagTypes.register();
		Source source=new Source(new URL(sourceUrlString));

		PrintStream ps = new PrintStream(outDir+doc.getName().replaceFirst(".html", "")+".txt");
		
		PrintStream ps_key = new PrintStream(outDir+doc.getName().replaceFirst(".html", "")+".key");
		
		// Call fullSequentialParse manually as most of the source will be parsed.
		source.fullSequentialParse();

//		System.out.println("Document title:");
		String title=getTitle(source);
		ps.print(title+"\n\n");
//		System.out.println("\nDocument description:");
		String description=getMetaValue(source,"description");
		ps.println(description+"\n\n");
//		System.out.println("\nDocument keywords:");
		String keywords=getMetaValue(source,"keywords");
		if (keywords != null)
			ps_key.print(keywords.replaceAll("\\s*,\\s*", "\n"));
		ps_key.close();
		
		List<Element> divElements=source.getAllElements(HTMLElementName.DIV);
		for (Element div : divElements) {
			String id=div.getAttributeValue("id");
			if (id==null) 
				continue;
			else if (id.equals("article-body-blocks")){
				String article=div.getContent().getTextExtractor().toString();
				ps.println(article);
			}
		}
		ps.close();
		}

  }
 
开发者ID:EUMSSI,项目名称:EUMSSI-tools,代码行数:55,代码来源:ExtractGuardian.java


注:本文中的net.htmlparser.jericho.HTMLElementName类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。