本文整理汇总了Java中net.htmlparser.jericho.Segment类的典型用法代码示例。如果您正苦于以下问题:Java Segment类的具体用法?Java Segment怎么用?Java Segment使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Segment类属于net.htmlparser.jericho包,在下文中一共展示了Segment类的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: compileExpr
import net.htmlparser.jericho.Segment; //导入依赖的package包/类
private TypedPlan compileExpr(Segment segment, Expr expr, GenericClass type) {
boolean hasErrors = false;
Compiler compiler = new Compiler(classRepository, classResolver, new TemplateScope());
TypedPlan result = compiler.compileLambda(expr, type);
PlanOffsetVisitor offsetVisitor = new PlanOffsetVisitor(segment.getBegin());
result.getPlan().acceptVisitor(offsetVisitor);
int offset = segment.getBegin();
for (Diagnostic diagnostic : compiler.getDiagnostics()) {
diagnostic = new Diagnostic(offset + diagnostic.getStart(), offset + diagnostic.getEnd(),
diagnostic.getMessage());
diagnostics.add(diagnostic);
hasErrors = true;
}
if (hasErrors) {
return null;
}
return result;
}
示例2: printHTMLPage
import net.htmlparser.jericho.Segment; //导入依赖的package包/类
public void printHTMLPage(Source source)
throws UnsupportedEncodingException, IOException {
List<StartTag> list = source.getAllStartTags(HTMLElementName.STYLE);
Iterator<StartTag> iterator = list.iterator();
String text = "";
while (iterator.hasNext()) {
final StartTag tag = iterator.next();
final Segment s = new Segment(source, tag.getEnd(), tag
.getElement().getEndTag().getBegin());
text += s.toString();
}
Vector<ReplaceRight> rights = Style.getStyles(text, styles, counter);
// генерація сторінки з оновленими стилями.
list = source.getAllStartTags();
iterator = list.iterator();
StartTag startTag = null;
while (iterator.hasNext()) {
final StartTag st = iterator.next();
if (HTMLElementName.BODY.equals(st.getName())) {
startTag = st;
break;
}
}
if (startTag == null)
return;
final StartTag body = startTag;
final OutputDocument document = new OutputDocument(source);
while (iterator.hasNext()) {
startTag = iterator.next();
replaceAttrs(startTag, document, rights);
}
OutputStreamWriter writer = new OutputStreamWriter(this.out, "UTF-8");
document.writeTo(writer, body.getEnd(), body.getElement().getEndTag()
.getBegin());
writer.flush();
}
示例3: main
import net.htmlparser.jericho.Segment; //导入依赖的package包/类
public static void main(final String[] args) {
try {
final FileInputStream is = new FileInputStream("d:/test.html");
final FileOutputStream o = new FileOutputStream("d:/res1.txt");
final Source source = new Source(is);
final List<StartTag> list = source
.getAllStartTags(HTMLElementName.STYLE);
final Iterator<StartTag> iterator = list.iterator();
String text = "";
final PrintStream out = new PrintStream(o);
while (iterator.hasNext()) {
final StartTag tag = iterator.next();
final Segment s = new Segment(source, tag.getEnd(), tag
.getElement().getEndTag().getBegin());
text += s.toString();
// out.println(text);
// out.println("---------------");
}
is.close();
final Vector<Style> styles = new Vector<Style>();
getStyles(text, styles, createCounter());
for (int i = 0; i < styles.size(); i++) {
out.println(styles.get(i));
}
o.close();
} catch (final IOException e) {
e.printStackTrace();
}
}
示例4: findSetter
import net.htmlparser.jericho.Segment; //导入依赖的package包/类
private GenericMethod findSetter(Segment segment, GenericClass cls, String name) {
String methodName = "set" + Character.toUpperCase(name.charAt(0)) + name.substring(1);
GenericMethod[] candidates = typeNavigator.findMethods(cls, methodName, 1);
if (candidates.length == 0) {
diagnostics.add(new Diagnostic(segment.getBegin(), segment.getEnd(), "Setter not found for key: "
+ name));
return null;
} else if (candidates.length > 1) {
diagnostics.add(new Diagnostic(segment.getBegin(), segment.getEnd(), "Ambiguous key: " + name));
return null;
} else {
return candidates[0];
}
}
示例5: reencodeTextSegment
import net.htmlparser.jericho.Segment; //导入依赖的package包/类
private static void reencodeTextSegment(Source source, OutputDocument outputDocument, int begin, int end, boolean formatWhiteSpace) {
if (begin>=end) return;
Segment textSegment=new Segment(source,begin,end);
String decodedText=CharacterReference.decode(textSegment);
String encodedText=formatWhiteSpace ? CharacterReference.encodeWithWhiteSpaceFormatting(decodedText) : CharacterReference.encode(decodedText);
outputDocument.replace(textSegment,encodedText);
}
示例6: reencodeTextSegment
import net.htmlparser.jericho.Segment; //导入依赖的package包/类
private void reencodeTextSegment(Source source, OutputDocument output, int begin, int end) {
if (begin >= end)
return;
Segment textSegment = new Segment(source, begin, end);
String decodedText = CharacterReference.decode(textSegment);
String encodedText = CharacterReference.encode(decodedText);
output.replace(textSegment, encodedText);
}
示例7: reencodeTextSegment
import net.htmlparser.jericho.Segment; //导入依赖的package包/类
private void reencodeTextSegment(Source source, OutputDocument output,
int begin, int end) {
if (begin >= end)
return;
Segment textSegment = new Segment(source, begin, end);
String decodedText = CharacterReference.decode(textSegment);
String encodedText = CharacterReference.encode(decodedText);
output.replace(textSegment, encodedText);
}
示例8: htmlToText
import net.htmlparser.jericho.Segment; //导入依赖的package包/类
/**
* Gets the content from html/text as plain text.
*/
public static synchronized String htmlToText(String html) {
LOG.info("extractFromHTML ... ");
// Adds line breaks to keep structure
html = html.replaceAll("<li>", "<li>, ");
html = html.replaceAll("</li>", ", </li>");
html = html.replaceAll("<dd>", "<dd>, ");
html = html.replaceAll("</dd>", ", </dd>");
final Source src = new Source(html);
return new TextExtractor(new Segment(src, src.getBegin(), src.getEnd()))
.setConvertNonBreakingSpaces(true).toString();
}
示例9: ComponentParser
import net.htmlparser.jericho.Segment; //导入依赖的package包/类
ComponentParser(ClassDescriberRepository classRepository, List<Diagnostic> diagnostics, Segment segment) {
this.classRepository = classRepository;
this.diagnostics = diagnostics;
this.segment = segment;
this.typeNavigator = new GenericTypeNavigator(classRepository);
}
示例10: compileSettingsObject
import net.htmlparser.jericho.Segment; //导入依赖的package包/类
private TypedPlan compileSettingsObject(Segment segment, ObjectExpr expr, GenericClass type) {
boolean hasErrors = false;
Compiler compiler = new Compiler(classRepository, classResolver, new TemplateScope());
GenericMethod sam = typeNavigator.findSingleAbstractMethod(type);
if (sam.getActualParameterTypes().length != 0 || !(sam.getActualReturnType() instanceof GenericClass)) {
diagnostics.add(new Diagnostic(segment.getBegin(), segment.getEnd(), "Wrong target lambda type"));
return null;
}
GenericClass objectType = (GenericClass) sam.getActualReturnType();
ObjectPlan objectPlan = new ObjectPlan(objectType.getName());
Set<String> requiredFields = collectRequiredFields(objectType.getName());
for (ObjectEntry entry : expr.getEntries()) {
GenericMethod setter = findSetter(segment, objectType, entry.getKey());
if (setter != null) {
requiredFields.remove(entry.getKey());
TypedPlan valuePlan = compiler.compile(entry.getValue(), setter.getActualParameterTypes()[0]);
ObjectPlanEntry planEntry = new ObjectPlanEntry(setter.getDescriber().getName(),
CompilerCommons.methodToDesc(setter.getDescriber()), valuePlan.getPlan());
objectPlan.getEntries().add(planEntry);
}
}
LambdaPlan plan = new LambdaPlan(objectPlan, type.getName(), sam.getDescriber().getName(),
CompilerCommons.methodToDesc(sam.getDescriber()), Collections.emptyList());
TypedPlan result = new TypedPlan(plan, type);
if (!requiredFields.isEmpty()) {
diagnostics.add(new Diagnostic(segment.getBegin(), segment.getEnd(), "Required field not set: "
+ requiredFields.iterator().next()));
}
PlanOffsetVisitor offsetVisitor = new PlanOffsetVisitor(segment.getBegin());
plan.acceptVisitor(offsetVisitor);
int offset = segment.getBegin();
for (Diagnostic diagnostic : compiler.getDiagnostics()) {
diagnostic = new Diagnostic(offset + diagnostic.getStart(), offset + diagnostic.getEnd(),
diagnostic.getMessage());
diagnostics.add(diagnostic);
hasErrors = true;
}
if (hasErrors) {
return null;
}
return result;
}
示例11: use
import net.htmlparser.jericho.Segment; //导入依赖的package包/类
private void use(Segment segment, String prefix, String packageName) {
String resourceName = "META-INF/flavour/component-packages/" + packageName;
try (InputStream input = resourceProvider.openResource(resourceName)) {
if (input == null) {
error(segment, "Component package was not found: " + packageName);
return;
}
BufferedReader reader = new BufferedReader(new InputStreamReader(input));
List<ElementComponentMetadata> componentList = new ArrayList<>();
List<AttributeComponentMetadata> attributeComponentList = new ArrayList<>();
while (true) {
String line = reader.readLine();
if (line == null) {
break;
}
line = line.trim();
if (line.isEmpty() || line.startsWith("#")) {
continue;
}
String className = packageName + "." + line;
ClassDescriber cls = classRepository.describe(className);
if (cls == null) {
error(segment, "Class " + className + " declared by component package was not found");
continue;
}
ComponentParser componentParser = new ComponentParser(classRepository, diagnostics, segment);
Object componentMetadata = componentParser.parse(cls);
if (componentMetadata instanceof ElementComponentMetadata) {
ElementComponentMetadata elemComponentMeta = (ElementComponentMetadata) componentMetadata;
componentList.add(elemComponentMeta);
} else if (componentMetadata instanceof AttributeComponentMetadata) {
AttributeComponentMetadata attrComponentMeta = (AttributeComponentMetadata) componentMetadata;
attributeComponentList.add(attrComponentMeta);
}
}
avaliableComponents.put(prefix, componentList);
avaliableAttrComponents.put(prefix, attributeComponentList);
} catch (IOException e) {
throw new RuntimeException("IO exception occurred parsing HTML input", e);
}
}
示例12: error
import net.htmlparser.jericho.Segment; //导入依赖的package包/类
private void error(Segment segment, String message) {
diagnostics.add(new Diagnostic(segment.getBegin(), segment.getEnd(), message));
}
示例13: removeNotAllowedTags
import net.htmlparser.jericho.Segment; //导入依赖的package包/类
/**
* Serduszko dla Bartka od Kasi <3
* @param htmlFragment
* @param docUri
* @return
*/
private String removeNotAllowedTags(String htmlFragment, URI docUri) {
Source source = new Source(htmlFragment);
OutputDocument outputDocument = new OutputDocument(source);
List<Element> elements = source.getAllElements();
for (Element element : elements) {
Attributes attrs = element.getAttributes();
Map<String, String> attrsUpdate = outputDocument.replace(attrs, true);
if (!element.getName().contains("a")) {
attrsUpdate.clear();
} else {
if (attrsUpdate.get("href")!=null) {
String link = attrsUpdate.get("href");
if (!link.contains("http")) {
URI documentUri = docUri;
URI anchorUri;
try {
anchorUri = new URI(link);
URI result = documentUri.resolve(anchorUri);
attrsUpdate.put("href", result.toString());
} catch (URISyntaxException e) {
outputDocument.remove(element);
}
}
}
}
if (NOT_ALLOWED_HTML_TAGS.contains(element.getName())) {
Segment content = element.getContent();
if (element.getName() == "script"
|| element.getName() == "style"
|| element.getName() == "form") {
outputDocument.remove(content);
}
outputDocument.remove(element.getStartTag());
if (!element.getStartTag().isSyntacticalEmptyElementTag()) {
outputDocument.remove(element.getEndTag());
}
}
}
String out = outputDocument.toString();
out = out.replaceAll("\\n", "");
out = out.replaceAll("\\t", "");
return out;
}
示例14: removeHtml
import net.htmlparser.jericho.Segment; //导入依赖的package包/类
/**
* Removes all HTML tags/markup present in the source string. Strings with
* no HTML will be returned unchanged.
*
* @param sourceString
* The string to strip HTML from.
* @return A string with all HTML stripped from it.
* @since 1.0.0
*/
public static String removeHtml(final String sourceString) {
if (StringUtils.isNotBlank(sourceString)) {
final TextExtractor extractor = new TextExtractor(
new Segment(new Source(sourceString), 0, sourceString.length()));
return extractor.toString();
}
return sourceString;
}