本文整理汇总了Java中net.htmlparser.jericho.StartTag类的典型用法代码示例。如果您正苦于以下问题:Java StartTag类的具体用法?Java StartTag怎么用?Java StartTag使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
StartTag类属于net.htmlparser.jericho包,在下文中一共展示了StartTag类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: realWrite
import net.htmlparser.jericho.StartTag; //导入依赖的package包/类
public void realWrite(boolean printHeadBodyTags) throws IOException {
flush();
ByteArrayOutputStream stream = (ByteArrayOutputStream) this.out;
this.out = outputStream;
if (!printHeadBodyTags) {
stream.writeTo(outputStream);
return;
}
println("<html>");
println("<head>");
println("<style>");
for (Style style : styles)
println(style);
println("</style>");
println("</head>");
String htmlText = new String(stream.toByteArray(), "UTF-8");
Source source = new Source(htmlText);
source.fullSequentialParse();
List<StartTag> startTags = source.getAllStartTags("body");
if (startTags.size() == 0) {
println("<body>");
println(htmlText);
println("</body>");
} else {
println(new StringBuffer(startTags.get(0).getElement()));
}
println("</html>");
}
示例2: replaceUrlAttribute
import net.htmlparser.jericho.StartTag; //导入依赖的package包/类
protected int replaceUrlAttribute ( Iterable<StartTag> tags, final String pathAttribute, PathOrigin baseDir, OutputDocument doc ) {
int count = 0;
for ( StartTag tag : tags ) {
Attributes attr = tag.parseAttributes();
String path = attr.getValue( pathAttribute );
if ( shouldProcessPath( path ) ) {
String newPath = processPath( baseDir, path, getUrlProvider() );
if ( log.isTraceEnabled() ) { //TODO: trace
log.trace( String.format( "replaced: in %[email protected]%s \"%s\" --> \"%s\"", tag.getName(), pathAttribute, path, newPath ) );
}
doc.replace( attr, true ).put( pathAttribute, newPath );
count++;
}
}
return count;
}
示例3: parseTag
import net.htmlparser.jericho.StartTag; //导入依赖的package包/类
private void parseTag(Tag tag, List<TemplateNode> result, Predicate<Element> filter) {
if (tag instanceof StartTag) {
StartTag startTag = (StartTag) tag;
if (startTag.getStartTagType() == StartTagType.XML_PROCESSING_INSTRUCTION) {
parseProcessingInstruction(startTag);
} else if (startTag.getStartTagType() == StartTagType.NORMAL) {
if (filter.test(tag.getElement())) {
TemplateNode node = parseElement(tag.getElement());
if (node != null) {
result.add(node);
}
} else {
position = tag.getElement().getEnd();
}
}
}
}
示例4: getStartTagHTML
import net.htmlparser.jericho.StartTag; //导入依赖的package包/类
private static CharSequence getStartTagHTML(StartTag startTag) {
// tidies and filters out non-approved attributes
StringBuilder sb=new StringBuilder();
sb.append('<').append(startTag.getName());
for (Attribute attribute : startTag.getAttributes()) {
if (VALID_ATTRIBUTE_NAMES.contains(attribute.getKey())) {
sb.append(' ').append(attribute.getName());
if (attribute.getValue()!=null) {
sb.append("=\"");
sb.append(CharacterReference.encode(attribute.getValue()));
sb.append('"');
}
}
}
if (startTag.getElement().getEndTag()==null && !HTMLElements.getEndTagOptionalElementNames().contains(startTag.getName())) sb.append(" /");
sb.append('>');
return sb;
}
示例5: getStartTagHTML
import net.htmlparser.jericho.StartTag; //导入依赖的package包/类
private CharSequence getStartTagHTML(StartTag startTag) {
// tidies and filters out non-approved attributes
StringBuilder sb = new StringBuilder();
sb.append('<').append(startTag.getName());
for (Attribute attribute : startTag.getAttributes()) {
if (allowedAttributes.contains(attribute.getKey())) {
sb.append(' ').append(attribute.getName());
if (attribute.getValue() != null) {
sb.append("=\"");
sb.append(CharacterReference.encode(attribute.getValue()));
sb.append('"');
}
}
}
if (startTag.getElement().getEndTag() == null
&& !HTMLElements.getEndTagOptionalElementNames().contains(
startTag.getName()))
sb.append(" /");
sb.append('>');
return sb;
}
示例6: getStartTagHTML
import net.htmlparser.jericho.StartTag; //导入依赖的package包/类
private CharSequence getStartTagHTML(StartTag startTag) {
// tidies and filters out non-approved attributes
StringBuilder sb = new StringBuilder();
sb.append('<').append(startTag.getName());
for (Attribute attribute : startTag.getAttributes()) {
if (allowedAttributes.contains(attribute.getKey())) {
sb.append(' ').append(attribute.getName());
if (attribute.getValue() != null) {
sb.append("=\"");
sb.append(CharacterReference.encode(attribute.getValue()));
sb.append('"');
}
}
}
if (startTag.getElement().getEndTag() == null && !HTMLElements.getEndTagOptionalElementNames().contains(startTag.getName()))
sb.append(" /");
sb.append('>');
return sb;
}
示例7: parseHTMLMetadata
import net.htmlparser.jericho.StartTag; //导入依赖的package包/类
/**
* Heuristics for pulling in metadata that Tika neglects for various reasons.
* This adds found meta tags to given metadata.
*
* TODO: InputStream is difficult to reset after tika parser reads it. So just using the file object,
* Jericho reads the raw file again.
*
* @param doc file object for document
* @param metadata metadata map to backfill
* @throws IOException
*/
private void parseHTMLMetadata(File doc, Map<String, String> md) throws IOException {
net.htmlparser.jericho.Source htmlDoc = new net.htmlparser.jericho.Source(doc);
List<net.htmlparser.jericho.StartTag> tags = htmlDoc.getAllStartTags("meta");
for (StartTag t : tags) {
String n = t.getAttributeValue("name");
String p = t.getAttributeValue("property");
if (p == null && n == null) {
log.debug("Unmatched metadata in HTML {}", t.toString());
continue;
}
String key = p != null ? p : n;
if (!isUsefulMeta(key)) {
continue;
}
/* hopefully value is in content field */
String v = t.getAttributeValue("content");
if (v == null) {
continue;
}
md.put(key, v);
}
}
示例8: convert
import net.htmlparser.jericho.StartTag; //导入依赖的package包/类
public String convert() {
Source source = new Source(code);
source.fullSequentialParse();
int from = 0;
StringBuffer result = new StringBuffer();
for (StartTag tag : source.getAllStartTags()) {
if (tag.getName().startsWith("%")) {
addLines(result, source, from, tag.getBegin());
from = tag.getEnd();
String script = tag.toString();
if (script.length() > 4) {
if (script.charAt(2) == '=') {
result.append("doc.print(");
result.append(script.substring(3, script.length() - 2));
result.append(");");
} else {
result.append(script.substring(2, script.length() - 2));
}
}
}
}
addLines(result, source, from, source.getEnd());
return result.toString();
}
示例9: printHTMLPage
import net.htmlparser.jericho.StartTag; //导入依赖的package包/类
public void printHTMLPage(Source source)
throws UnsupportedEncodingException, IOException {
List<StartTag> list = source.getAllStartTags(HTMLElementName.STYLE);
Iterator<StartTag> iterator = list.iterator();
String text = "";
while (iterator.hasNext()) {
final StartTag tag = iterator.next();
final Segment s = new Segment(source, tag.getEnd(), tag
.getElement().getEndTag().getBegin());
text += s.toString();
}
Vector<ReplaceRight> rights = Style.getStyles(text, styles, counter);
// генерація сторінки з оновленими стилями.
list = source.getAllStartTags();
iterator = list.iterator();
StartTag startTag = null;
while (iterator.hasNext()) {
final StartTag st = iterator.next();
if (HTMLElementName.BODY.equals(st.getName())) {
startTag = st;
break;
}
}
if (startTag == null)
return;
final StartTag body = startTag;
final OutputDocument document = new OutputDocument(source);
while (iterator.hasNext()) {
startTag = iterator.next();
replaceAttrs(startTag, document, rights);
}
OutputStreamWriter writer = new OutputStreamWriter(this.out, "UTF-8");
document.writeTo(writer, body.getEnd(), body.getElement().getEndTag()
.getBegin());
writer.flush();
}
示例10: main
import net.htmlparser.jericho.StartTag; //导入依赖的package包/类
public static void main(final String[] args) {
try {
final FileInputStream is = new FileInputStream("d:/test.html");
final FileOutputStream o = new FileOutputStream("d:/res1.txt");
final Source source = new Source(is);
final List<StartTag> list = source
.getAllStartTags(HTMLElementName.STYLE);
final Iterator<StartTag> iterator = list.iterator();
String text = "";
final PrintStream out = new PrintStream(o);
while (iterator.hasNext()) {
final StartTag tag = iterator.next();
final Segment s = new Segment(source, tag.getEnd(), tag
.getElement().getEndTag().getBegin());
text += s.toString();
// out.println(text);
// out.println("---------------");
}
is.close();
final Vector<Style> styles = new Vector<Style>();
getStyles(text, styles, createCounter());
for (int i = 0; i < styles.size(); i++) {
out.println(styles.get(i));
}
o.close();
} catch (final IOException e) {
e.printStackTrace();
}
}
示例11: parseProcessingInstruction
import net.htmlparser.jericho.StartTag; //导入依赖的package包/类
private void parseProcessingInstruction(StartTag tag) {
if (tag.getName().equals("?import")) {
parseImport(tag);
} else if (tag.getName().equals("?use")) {
parseUse(tag);
}
}
示例12: parseImport
import net.htmlparser.jericho.StartTag; //导入依赖的package包/类
private void parseImport(StartTag tag) {
String importedName = normalizeQualifiedName(tag.getTagContent().toString());
if (importedName.endsWith(".*")) {
classResolver.importPackage(importedName);
} else {
if (classResolver.findClass(importedName) == null) {
error(tag.getTagContent(), "Class was not found: " + importedName);
} else {
classResolver.importClass(importedName);
}
}
}
示例13: parseUse
import net.htmlparser.jericho.StartTag; //导入依赖的package包/类
private void parseUse(StartTag tag) {
String content = tag.getTagContent().toString();
String[] parts = StringUtils.split(content, ":", 2);
if (parts.length != 2) {
error(tag.getTagContent(), "Illegal syntax for 'use' instruction");
return;
}
String prefix = parts[0].trim();
String packageName = normalizeQualifiedName(parts[1]);
use(tag.getTagContent(), prefix, packageName);
}
示例14: getMetaValue
import net.htmlparser.jericho.StartTag; //导入依赖的package包/类
private static String getMetaValue(Source source, String key) {
for (int pos=0; pos<source.length();) {
StartTag startTag=source.getNextStartTag(pos,"name",key,false);
if (startTag==null) return null;
if (startTag.getName()==HTMLElementName.META)
return startTag.getAttributeValue("content"); // Attribute values are automatically decoded
pos=startTag.getEnd();
}
return null;
}
示例15: realWriteWithHTMLUpdate
import net.htmlparser.jericho.StartTag; //导入依赖的package包/类
public void realWriteWithHTMLUpdate() throws IOException {
flush();
ByteArrayOutputStream out = (ByteArrayOutputStream) this.out;
Source source = new Source(new String(out.toByteArray(), "UTF-8"));
source.fullSequentialParse();
List<StartTag> list = source.getAllStartTags("html");
if (list.size() == 0) {
realWrite();
return;
}
this.out = outputStream;
OutputStreamWriter writer = new OutputStreamWriter(this.out, "UTF-8");
OutputDocument document = new OutputDocument(source);
StringBuffer style = new StringBuffer();
if (this.styles.size() > 0) {
for (Style style2 : this.styles)
style.append(style2.toString());
}
List<StartTag> h = source.getAllStartTags("style");
if (h.size() > 0) {
document.insert(h.get(0).getElement().getEndTag().getBegin(), style);
} else {
style.insert(0, "\n<style>\n");
style.append("</style>\n");
h = source.getAllStartTags("head");
if (h.size() > 0) {
document.insert(h.get(0).getElement().getEndTag().getBegin(),
style);
} else {
style.insert(0, "\n<head>\n");
style.append("</head>\n");
document.insert(h.get(0).getElement().getEndTag().getBegin(),
style);
}
}
document.writeTo(writer);
writer.flush();
}