本文整理汇总了Java中org.apache.lucene.analysis.charfilter.HTMLStripCharFilter类的典型用法代码示例。如果您正苦于以下问题:Java HTMLStripCharFilter类的具体用法?Java HTMLStripCharFilter怎么用?Java HTMLStripCharFilter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
HTMLStripCharFilter类属于org.apache.lucene.analysis.charfilter包,在下文中一共展示了HTMLStripCharFilter类的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: filter
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter; //导入依赖的package包/类
private String filter(String value) {
StringBuilder out = new StringBuilder();
StringReader strReader = new StringReader(value);
try {
HTMLStripCharFilter html = new HTMLStripCharFilter(new BufferedReader(strReader));
char[] cbuf = new char[1024 * 10];
while (true) {
int count = html.read(cbuf);
if (count == -1)
break; // end of stream mark is -1
if (count > 0)
out.append(cbuf, 0, count);
}
html.close();
} catch (IOException e) {
throw new RuntimeException("Failed stripping HTML for value: "
+ value, e);
}
return out.toString();
}
示例2: stripHTML
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter; //导入依赖的package包/类
private Object stripHTML(String value, String column) {
StringBuilder out = new StringBuilder();
StringReader strReader = new StringReader(value);
try {
HTMLStripCharFilter html = new HTMLStripCharFilter(strReader.markSupported() ? strReader : new BufferedReader(strReader));
char[] cbuf = new char[1024 * 10];
while (true) {
int count = html.read(cbuf);
if (count == -1)
break; // end of stream mark is -1
if (count > 0)
out.append(cbuf, 0, count);
}
html.close();
} catch (IOException e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Failed stripping HTML for column: " + column, e);
}
return out.toString();
}
示例3: analyzeReturnTokens
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter; //导入依赖的package包/类
private String[] analyzeReturnTokens(String docText) {
List<String> result = new ArrayList<>();
Reader filter = new HTMLStripCharFilter(new StringReader(docText),
Collections.singleton("unescaped"));
WhitespaceTokenizer ts = new WhitespaceTokenizer();
final CharTermAttribute termAttribute = ts.addAttribute(CharTermAttribute.class);
try {
ts.setReader(filter);
ts.reset();
while (ts.incrementToken()) {
result.add(termAttribute.toString());
}
ts.end();
} catch (IOException e) {
throw new RuntimeException(e);
} finally {
IOUtils.closeQuietly(ts);
}
return result.toArray(new String[result.size()]);
}
示例4: filterHTML
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter; //导入依赖的package包/类
public static String filterHTML(Reader source) throws IOException {
if (source == null) {
return null;
}
StringBuilder builder = new StringBuilder();
HTMLStripCharFilter reader = new HTMLStripCharFilter(source);
int ch;
while ((ch = reader.read()) != -1) {
builder.append((char) ch);
}
return builder.toString();
}
示例5: analyzeTagOne
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter; //导入依赖的package包/类
private int[] analyzeTagOne(String docText, String start, String end) {
int[] result = {-1, -1};
Reader filter = new HTMLStripCharFilter(new StringReader(docText));
WhitespaceTokenizer ts = new WhitespaceTokenizer();
final CharTermAttribute termAttribute = ts.addAttribute(CharTermAttribute.class);
final OffsetAttribute offsetAttribute = ts.addAttribute(OffsetAttribute.class);
try {
ts.setReader(filter);
ts.reset();
while (ts.incrementToken()) {
final String termString = termAttribute.toString();
if (termString.equals(start))
result[0] = offsetAttribute.startOffset();
if (termString.equals(end)) {
result[1] = offsetAttribute.endOffset();
return result;
}
}
ts.end();
} catch (IOException e) {
throw new RuntimeException(e);
} finally {
IOUtils.closeQuietly(ts);
}
return result;
}
示例6: create
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter; //导入依赖的package包/类
@Override
public Reader create(Reader tokenStream) {
return new HTMLStripCharFilter(tokenStream, escapedTags);
}