本文整理汇总了Java中org.apache.lucene.search.highlight.Highlighter.getBestFragments方法的典型用法代码示例。如果您正苦于以下问题:Java Highlighter.getBestFragments方法的具体用法?Java Highlighter.getBestFragments怎么用?Java Highlighter.getBestFragments使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.search.highlight.Highlighter
的用法示例。
在下文中一共展示了Highlighter.getBestFragments方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: highlight
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
@Override
public String highlight(String locale, boolean useStopWords, String query, String content, String pre, String post, int preview) {
Analyzer analyzer = termAnalyzers.findAnalyzer(locale, useStopWords);
QueryParser parser = new QueryParser(defaultField, analyzer);
String summary = null;
try {
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(pre, post);
Highlighter hg = new Highlighter(formatter, new QueryTermScorer(parser.parse(query)));
hg.setMaxDocCharsToAnalyze(preview);
hg.setTextFragmenter(new SimpleFragmenter(100));
TokenStream tokens = TokenSources.getTokenStream(defaultField, content, analyzer);
summary = hg.getBestFragments(tokens, content, 4, " ... ");
} catch (InvalidTokenOffsetsException | IOException | ParseException ex) {
LOG.error("Failed to highlight", ex);
}
return StringUtils.isBlank(summary) ? null : summary;
}
示例2: highlightField
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
/**
* This method intended for use with
* <tt>testHighlightingWithDefaultField()</tt>
*/
private String highlightField(Query query, String fieldName,
String text) throws IOException, InvalidTokenOffsetsException {
TokenStream tokenStream = new MockAnalyzer(random(), MockTokenizer.SIMPLE,
true, MockTokenFilter.ENGLISH_STOPSET, true).tokenStream(fieldName,
new StringReader(text));
// Assuming "<B>", "</B>" used to highlight
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
MyQueryScorer scorer = new MyQueryScorer(query, fieldName, FIELD_NAME);
Highlighter highlighter = new Highlighter(formatter, scorer);
highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));
String rv = highlighter.getBestFragments(tokenStream, text, 1,
"(FIELD TEXT TRUNCATED)");
return rv.length() == 0 ? text : rv;
}
示例3: highlightField
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
/**
* This method intended for use with
* <tt>testHighlightingWithDefaultField()</tt>
*/
private String highlightField(Query query, String fieldName,
String text) throws IOException, InvalidTokenOffsetsException {
TokenStream tokenStream = new MockAnalyzer(random(), MockTokenizer.SIMPLE,
true, MockTokenFilter.ENGLISH_STOPSET).tokenStream(fieldName, text);
// Assuming "<B>", "</B>" used to highlight
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
MyQueryScorer scorer = new MyQueryScorer(query, fieldName, FIELD_NAME);
Highlighter highlighter = new Highlighter(formatter, scorer);
highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));
String rv = highlighter.getBestFragments(tokenStream, text, 1,
"(FIELD TEXT TRUNCATED)");
return rv.length() == 0 ? text : rv;
}
示例4: doHighlight
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
/**
* Highlight (bold,color) query words in result-document. Set HighlightResult for content or description.
*
* @param query
* @param analyzer
* @param doc
* @param resultDocument
* @throws IOException
*/
private void doHighlight(final Query query, final Analyzer analyzer, final Document doc, final ResultDocument resultDocument) throws IOException {
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(HIGHLIGHT_PRE_TAG, HIGHLIGHT_POST_TAG), new QueryScorer(query));
// Get 3 best fragments of content and seperate with a "..."
try {
// highlight content
final String content = doc.get(AbstractOlatDocument.CONTENT_FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(AbstractOlatDocument.CONTENT_FIELD_NAME, new StringReader(content));
String highlightResult = highlighter.getBestFragments(tokenStream, content, 3, HIGHLIGHT_SEPARATOR);
// if no highlightResult is in content => look in description
if (highlightResult.length() == 0) {
final String description = doc.get(AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
tokenStream = analyzer.tokenStream(AbstractOlatDocument.DESCRIPTION_FIELD_NAME, new StringReader(description));
highlightResult = highlighter.getBestFragments(tokenStream, description, 3, HIGHLIGHT_SEPARATOR);
resultDocument.setHighlightingDescription(true);
}
resultDocument.setHighlightResult(highlightResult);
// highlight title
final String title = doc.get(AbstractOlatDocument.TITLE_FIELD_NAME);
tokenStream = analyzer.tokenStream(AbstractOlatDocument.TITLE_FIELD_NAME, new StringReader(title));
final String highlightTitle = highlighter.getBestFragments(tokenStream, title, 3, " ");
resultDocument.setHighlightTitle(highlightTitle);
} catch (final InvalidTokenOffsetsException e) {
log.warn("", e);
}
}
示例5: getResult
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
public String getResult(String fieldName, String fieldValue) throws Exception{
BuguIndex index = BuguIndex.getInstance();
QueryParser parser = new QueryParser(index.getVersion(), fieldName, index.getAnalyzer());
Query query = parser.parse(keywords);
TokenStream tokens = index.getAnalyzer().tokenStream(fieldName, new StringReader(fieldValue));
QueryScorer scorer = new QueryScorer(query, fieldName);
Highlighter highlighter = new Highlighter(formatter, scorer);
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
return highlighter.getBestFragments(tokens, fieldValue, maxFragments, "...");
}
示例6: main
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length != 0) {
QUERY = args[0];
}
// 将庖丁封装成符合Lucene要求的Analyzer规范
Analyzer analyzer = new PaodingAnalyzer();
//读取本类目录下的text.txt文件
String content = ContentReader.readText(English.class);
//接下来是标准的Lucene建立索引和检索的代码
Directory ramDir = new RAMDirectory();
IndexWriter writer = new IndexWriter(ramDir, analyzer);
Document doc = new Document();
Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(fd);
writer.addDocument(doc);
writer.optimize();
writer.close();
IndexReader reader = IndexReader.open(ramDir);
String queryString = QUERY;
QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
Query query = parser.parse(queryString);
Searcher searcher = new IndexSearcher(ramDir);
query = query.rewrite(reader);
System.out.println("Searching for: " + query.toString(FIELD_NAME));
Hits hits = searcher.search(query);
BoldFormatter formatter = new BoldFormatter();
Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
query));
highlighter.setTextFragmenter(new SimpleFragmenter(50));
for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME);
int maxNumFragmentsRequired = 5;
String fragmentSeparator = "...";
TermPositionVector tpv = (TermPositionVector) reader
.getTermFreqVector(hits.id(i), FIELD_NAME);
TokenStream tokenStream = TokenSources.getTokenStream(tpv);
String result = highlighter.getBestFragments(tokenStream, text,
maxNumFragmentsRequired, fragmentSeparator);
System.out.println("\n" + result);
}
reader.close();
}
示例7: main
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length != 0) {
QUERY = args[0];
}
// 将庖丁封装成符合Lucene要求的Analyzer规范
Analyzer analyzer = new PaodingAnalyzer();
//读取本类目录下的text.txt文件
String content = ContentReader.readText(Chinese.class);
//接下来是标准的Lucene建立索引和检索的代码
Directory ramDir = new RAMDirectory();
IndexWriter writer = new IndexWriter(ramDir, analyzer);
Document doc = new Document();
Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(fd);
writer.addDocument(doc);
writer.optimize();
writer.close();
IndexReader reader = IndexReader.open(ramDir);
String queryString = QUERY;
QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
Query query = parser.parse(queryString);
Searcher searcher = new IndexSearcher(ramDir);
query = query.rewrite(reader);
System.out.println("Searching for: " + query.toString(FIELD_NAME));
Hits hits = searcher.search(query);
BoldFormatter formatter = new BoldFormatter();
Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
query));
highlighter.setTextFragmenter(new SimpleFragmenter(50));
for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME);
int maxNumFragmentsRequired = 5;
String fragmentSeparator = "...";
TermPositionVector tpv = (TermPositionVector) reader
.getTermFreqVector(hits.id(i), FIELD_NAME);
TokenStream tokenStream = TokenSources.getTokenStream(tpv);
String result = highlighter.getBestFragments(tokenStream, text,
maxNumFragmentsRequired, fragmentSeparator);
System.out.println("\n" + result);
}
reader.close();
}
示例8: main
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length != 1) {
System.err.println("Usage: HighlightIt <filename-out>");
System.exit(-1);
}
String filename = args[0];
String searchText = "term"; //
QueryParser parser = new QueryParser(Version.LUCENE_41, //
"f", //
new StandardAnalyzer(Version.LUCENE_41));// #1
Query query = parser.parse(searchText); //
SimpleHTMLFormatter formatter = //
new SimpleHTMLFormatter("<span class=\"highlight\">", //
"</span>"); //
TokenStream tokens = new StandardAnalyzer(Version.LUCENE_41) //
.tokenStream("f", new StringReader(text)); //
QueryScorer scorer = new QueryScorer(query, "f"); //
Highlighter highlighter = new Highlighter(formatter, scorer); //
highlighter.setTextFragmenter( //
new SimpleSpanFragmenter(scorer)); //
String result = //
highlighter.getBestFragments(tokens, text, 3, "..."); //
FileWriter writer = new FileWriter(filename); //
writer.write("<html>"); //
writer.write("<style>\n" + //
".highlight {\n" + //
" background: yellow;\n" + //
"}\n" + //
"</style>"); //
writer.write("<body>"); //
writer.write(result); //
writer.write("</body></html>"); //
writer.close(); //
}