本文整理汇总了Java中org.apache.lucene.search.highlight.TokenSources.getTokenStream方法的典型用法代码示例。如果您正苦于以下问题:Java TokenSources.getTokenStream方法的具体用法?Java TokenSources.getTokenStream怎么用?Java TokenSources.getTokenStream使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.search.highlight.TokenSources
的用法示例。
在下文中一共展示了TokenSources.getTokenStream方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: highlight
import org.apache.lucene.search.highlight.TokenSources; //导入方法依赖的package包/类
@Override
public String highlight(String locale, boolean useStopWords, String query, String content, String pre, String post, int preview) {
Analyzer analyzer = termAnalyzers.findAnalyzer(locale, useStopWords);
QueryParser parser = new QueryParser(defaultField, analyzer);
String summary = null;
try {
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(pre, post);
Highlighter hg = new Highlighter(formatter, new QueryTermScorer(parser.parse(query)));
hg.setMaxDocCharsToAnalyze(preview);
hg.setTextFragmenter(new SimpleFragmenter(100));
TokenStream tokens = TokenSources.getTokenStream(defaultField, content, analyzer);
summary = hg.getBestFragments(tokens, content, 4, " ... ");
} catch (InvalidTokenOffsetsException | IOException | ParseException ex) {
LOG.error("Failed to highlight", ex);
}
return StringUtils.isBlank(summary) ? null : summary;
}
示例2: performHighlighting
import org.apache.lucene.search.highlight.TokenSources; //导入方法依赖的package包/类
/**
* Performs highlighting for a given query and a given document.
*
* @param indexSearcher the IndexSearcher performing the query
* @param query the Tripod LuceneQuery
* @param scoreDoc the Lucene ScoreDoc
* @param doc the Lucene Document
* @param highlighter the Highlighter to use
* @param result the QueryResult to add the highlights to
* @throws IOException if an error occurs performing the highlighting
* @throws InvalidTokenOffsetsException if an error occurs performing the highlighting
*/
protected void performHighlighting(final IndexSearcher indexSearcher, final Query query, final ScoreDoc scoreDoc,
final Document doc, final Highlighter highlighter, final QR result)
throws IOException, InvalidTokenOffsetsException {
if (query.getHighlightFields() == null || query.getHighlightFields().isEmpty()) {
return;
}
final List<Highlight> highlights = new ArrayList<>();
final List<String> hlFieldNames = getHighlightFieldNames(query, doc);
// process each field to highlight on
for (String hlField : hlFieldNames) {
final String text = doc.get(hlField);
if (StringUtils.isEmpty(text)) {
continue;
}
final List<String> snippets = new ArrayList<>();
final Fields tvFields = indexSearcher.getIndexReader().getTermVectors(scoreDoc.doc);
final int maxStartOffset = highlighter.getMaxDocCharsToAnalyze() -1;
// get the snippets for the given field
final TokenStream tokenStream = TokenSources.getTokenStream(hlField, tvFields, text, analyzer, maxStartOffset);
final TextFragment[] textFragments = highlighter.getBestTextFragments(tokenStream, text, false, 10);
for (TextFragment textFragment : textFragments) {
if (textFragment != null && textFragment.getScore() > 0) {
snippets.add(textFragment.toString());
}
}
// if we have snippets then add a highlight result to the QueryResult
if (snippets.size() > 0) {
highlights.add(new Highlight(hlField, snippets));
}
}
result.setHighlights(highlights);
}
示例3: main
import org.apache.lucene.search.highlight.TokenSources; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length != 0) {
QUERY = args[0];
}
// 将庖丁封装成符合Lucene要求的Analyzer规范
Analyzer analyzer = new PaodingAnalyzer();
//读取本类目录下的text.txt文件
String content = ContentReader.readText(English.class);
//接下来是标准的Lucene建立索引和检索的代码
Directory ramDir = new RAMDirectory();
IndexWriter writer = new IndexWriter(ramDir, analyzer);
Document doc = new Document();
Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(fd);
writer.addDocument(doc);
writer.optimize();
writer.close();
IndexReader reader = IndexReader.open(ramDir);
String queryString = QUERY;
QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
Query query = parser.parse(queryString);
Searcher searcher = new IndexSearcher(ramDir);
query = query.rewrite(reader);
System.out.println("Searching for: " + query.toString(FIELD_NAME));
Hits hits = searcher.search(query);
BoldFormatter formatter = new BoldFormatter();
Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
query));
highlighter.setTextFragmenter(new SimpleFragmenter(50));
for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME);
int maxNumFragmentsRequired = 5;
String fragmentSeparator = "...";
TermPositionVector tpv = (TermPositionVector) reader
.getTermFreqVector(hits.id(i), FIELD_NAME);
TokenStream tokenStream = TokenSources.getTokenStream(tpv);
String result = highlighter.getBestFragments(tokenStream, text,
maxNumFragmentsRequired, fragmentSeparator);
System.out.println("\n" + result);
}
reader.close();
}
示例4: main
import org.apache.lucene.search.highlight.TokenSources; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length != 0) {
QUERY = args[0];
}
// 将庖丁封装成符合Lucene要求的Analyzer规范
Analyzer analyzer = new PaodingAnalyzer();
//读取本类目录下的text.txt文件
String content = ContentReader.readText(Chinese.class);
//接下来是标准的Lucene建立索引和检索的代码
Directory ramDir = new RAMDirectory();
IndexWriter writer = new IndexWriter(ramDir, analyzer);
Document doc = new Document();
Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(fd);
writer.addDocument(doc);
writer.optimize();
writer.close();
IndexReader reader = IndexReader.open(ramDir);
String queryString = QUERY;
QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
Query query = parser.parse(queryString);
Searcher searcher = new IndexSearcher(ramDir);
query = query.rewrite(reader);
System.out.println("Searching for: " + query.toString(FIELD_NAME));
Hits hits = searcher.search(query);
BoldFormatter formatter = new BoldFormatter();
Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
query));
highlighter.setTextFragmenter(new SimpleFragmenter(50));
for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME);
int maxNumFragmentsRequired = 5;
String fragmentSeparator = "...";
TermPositionVector tpv = (TermPositionVector) reader
.getTermFreqVector(hits.id(i), FIELD_NAME);
TokenStream tokenStream = TokenSources.getTokenStream(tpv);
String result = highlighter.getBestFragments(tokenStream, text,
maxNumFragmentsRequired, fragmentSeparator);
System.out.println("\n" + result);
}
reader.close();
}