当前位置: 首页>>代码示例>>Java>>正文


Java CharacterReference类代码示例

本文整理汇总了Java中net.htmlparser.jericho.CharacterReference的典型用法代码示例。如果您正苦于以下问题:Java CharacterReference类的具体用法?Java CharacterReference怎么用?Java CharacterReference使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


CharacterReference类属于net.htmlparser.jericho包,在下文中一共展示了CharacterReference类的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getStartTagHTML

import net.htmlparser.jericho.CharacterReference; //导入依赖的package包/类
private static CharSequence getStartTagHTML(StartTag startTag) {
	// tidies and filters out non-approved attributes
	StringBuilder sb=new StringBuilder();
	sb.append('<').append(startTag.getName());
  for (Attribute attribute : startTag.getAttributes()) {
    if (VALID_ATTRIBUTE_NAMES.contains(attribute.getKey())) {
			sb.append(' ').append(attribute.getName());
			if (attribute.getValue()!=null) {
				sb.append("=\"");
			  sb.append(CharacterReference.encode(attribute.getValue()));
				sb.append('"');
			}
		}
  }
  if (startTag.getElement().getEndTag()==null && !HTMLElements.getEndTagOptionalElementNames().contains(startTag.getName())) sb.append(" /");
	sb.append('>');
	return sb;
}
 
开发者ID:trackplus,项目名称:Genji,代码行数:19,代码来源:HTMLSanitiser.java

示例2: getStartTagHTML

import net.htmlparser.jericho.CharacterReference; //导入依赖的package包/类
private CharSequence getStartTagHTML(StartTag startTag) {
	// tidies and filters out non-approved attributes
	StringBuilder sb = new StringBuilder();
	sb.append('<').append(startTag.getName());
	for (Attribute attribute : startTag.getAttributes()) {
		if (allowedAttributes.contains(attribute.getKey())) {
			sb.append(' ').append(attribute.getName());
			if (attribute.getValue() != null) {
				sb.append("=\"");
				sb.append(CharacterReference.encode(attribute.getValue()));
				sb.append('"');
			}
		}
	}
	if (startTag.getElement().getEndTag() == null
			&& !HTMLElements.getEndTagOptionalElementNames().contains(
					startTag.getName()))
		sb.append(" /");
	sb.append('>');
	return sb;
}
 
开发者ID:camaradosdeputadosoficial,项目名称:edemocracia,代码行数:22,代码来源:HtmlStripperDiscussion.java

示例3: getStartTagHTML

import net.htmlparser.jericho.CharacterReference; //导入依赖的package包/类
private CharSequence getStartTagHTML(StartTag startTag) {
	// tidies and filters out non-approved attributes
	StringBuilder sb = new StringBuilder();
	sb.append('<').append(startTag.getName());
	for (Attribute attribute : startTag.getAttributes()) {
		if (allowedAttributes.contains(attribute.getKey())) {
			sb.append(' ').append(attribute.getName());
			if (attribute.getValue() != null) {
				sb.append("=\"");
				sb.append(CharacterReference.encode(attribute.getValue()));
				sb.append('"');
			}
		}
	}
	if (startTag.getElement().getEndTag() == null && !HTMLElements.getEndTagOptionalElementNames().contains(startTag.getName()))
		sb.append(" /");
	sb.append('>');
	return sb;
}
 
开发者ID:camaradosdeputadosoficial,项目名称:edemocracia,代码行数:20,代码来源:HtmlStripper.java

示例4: parseText

import net.htmlparser.jericho.CharacterReference; //导入依赖的package包/类
private String parseText(int start, int end) {
    StringBuilder sb = new StringBuilder();
    while (start < end) {
        CharacterReference ref = source.getNextCharacterReference(start);
        if (ref == null || ref.getBegin() >= end) {
            break;
        }
        sb.append(source.subSequence(start, ref.getBegin()));
        sb.append(ref.getChar());
        start = ref.getEnd();
    }
    sb.append(source.subSequence(start, end));
    return sb.toString();
}
 
开发者ID:konsoletyper,项目名称:teavm-flavour,代码行数:15,代码来源:Parser.java

示例5: reencodeTextSegment

import net.htmlparser.jericho.CharacterReference; //导入依赖的package包/类
private static void reencodeTextSegment(Source source, OutputDocument outputDocument, int begin, int end, boolean formatWhiteSpace) {
  if (begin>=end) return;
  Segment textSegment=new Segment(source,begin,end);
	String decodedText=CharacterReference.decode(textSegment);
	String encodedText=formatWhiteSpace ? CharacterReference.encodeWithWhiteSpaceFormatting(decodedText) : CharacterReference.encode(decodedText);
   outputDocument.replace(textSegment,encodedText);
}
 
开发者ID:trackplus,项目名称:Genji,代码行数:8,代码来源:HTMLSanitiser.java

示例6: reencodeTextSegment

import net.htmlparser.jericho.CharacterReference; //导入依赖的package包/类
private void reencodeTextSegment(Source source, OutputDocument output, int begin, int end) {
	if (begin >= end)
		return;
	Segment textSegment = new Segment(source, begin, end);
	String decodedText = CharacterReference.decode(textSegment);
	String encodedText = CharacterReference.encode(decodedText);
	output.replace(textSegment, encodedText);
}
 
开发者ID:camaradosdeputadosoficial,项目名称:edemocracia,代码行数:9,代码来源:HtmlStripper.java

示例7: reencodeTextSegment

import net.htmlparser.jericho.CharacterReference; //导入依赖的package包/类
private void reencodeTextSegment(Source source, OutputDocument output,
		int begin, int end) {
	if (begin >= end)
		return;
	Segment textSegment = new Segment(source, begin, end);
	String decodedText = CharacterReference.decode(textSegment);
	String encodedText = CharacterReference.encode(decodedText);
	output.replace(textSegment, encodedText);
}
 
开发者ID:camaradosdeputadosoficial,项目名称:edemocracia,代码行数:10,代码来源:HtmlStripper.java

示例8: mosesPostprocess

import net.htmlparser.jericho.CharacterReference; //导入依赖的package包/类
private String mosesPostprocess(String text, Language targetLanguage) {
    String result = CharacterReference.decode(text);
    
    result = DeNormalize.processSingleLine(result).replaceAll("\\s+", " ").trim();

    if (!targetLanguage.isSpaceDelimited()) {
        result = result.replaceAll("(?<=[\u3001-\u9fa0])\\s+(?=[\u3001-\u9fa0])", "");
    }

    return cleanSpacesAroundTags(result, text);
}
 
开发者ID:amake,项目名称:omegat-moses-mt,代码行数:12,代码来源:MosesTranslate.java

示例9: getTitle

import net.htmlparser.jericho.CharacterReference; //导入依赖的package包/类
/**
 *  Extracts the title out of a text document using Jericho parser
 * @param source Source
 * @return String
 */
private static String getTitle(Source source) {
    net.htmlparser.jericho.Element titleElement = source.getFirstElement(
            net.htmlparser.jericho.HTMLElementName.TITLE);
    if (titleElement == null) {
        return null;
    }
    // TITLE element never contains other tags so just decode it collapsing whitespace:
    return CharacterReference.decodeCollapseWhiteSpace(titleElement.
            getContent());
}
 
开发者ID:shiftdirector,项目名称:youseer,代码行数:16,代码来源:Worker.java

示例10: getTitle

import net.htmlparser.jericho.CharacterReference; //导入依赖的package包/类
private static String getTitle(Source source) {
	Element titleElement=source.getFirstElement(HTMLElementName.TITLE);
	if (titleElement==null) return null;
	// TITLE element never contains other tags so just decode it collapsing whitespace:
	return CharacterReference.decodeCollapseWhiteSpace(titleElement.getContent());
}
 
开发者ID:EUMSSI,项目名称:EUMSSI-tools,代码行数:7,代码来源:ExtractGuardian.java


注:本文中的net.htmlparser.jericho.CharacterReference类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。