本文整理汇总了Java中org.apache.lucene.search.highlight.TokenSources类的典型用法代码示例。如果您正苦于以下问题:Java TokenSources类的具体用法?Java TokenSources怎么用?Java TokenSources使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
TokenSources类属于org.apache.lucene.search.highlight包,在下文中一共展示了TokenSources类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: highlight
import org.apache.lucene.search.highlight.TokenSources; //导入依赖的package包/类
@Override
public String highlight(String locale, boolean useStopWords, String query, String content, String pre, String post, int preview) {
Analyzer analyzer = termAnalyzers.findAnalyzer(locale, useStopWords);
QueryParser parser = new QueryParser(defaultField, analyzer);
String summary = null;
try {
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(pre, post);
Highlighter hg = new Highlighter(formatter, new QueryTermScorer(parser.parse(query)));
hg.setMaxDocCharsToAnalyze(preview);
hg.setTextFragmenter(new SimpleFragmenter(100));
TokenStream tokens = TokenSources.getTokenStream(defaultField, content, analyzer);
summary = hg.getBestFragments(tokens, content, 4, " ... ");
} catch (InvalidTokenOffsetsException | IOException | ParseException ex) {
LOG.error("Failed to highlight", ex);
}
return StringUtils.isBlank(summary) ? null : summary;
}
示例2: testHits
import org.apache.lucene.search.highlight.TokenSources; //导入依赖的package包/类
public void testHits() throws Exception {
IndexSearcher searcher = new IndexSearcher(TestUtil.getBookIndexDirectory());
TermQuery query = new TermQuery(new Term("title", "action"));
TopDocs hits = searcher.search(query, 10);
QueryScorer scorer = new QueryScorer(query, "title");
Highlighter highlighter = new Highlighter(scorer);
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
Analyzer analyzer = new SimpleAnalyzer();
for (ScoreDoc sd : hits.scoreDocs) {
StoredDocument doc = searcher.doc(sd.doc);
String title = doc.get("title");
TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc,
analyzer);
String fragment = highlighter.getBestFragment(stream, title);
LOGGER.info(fragment);
}
}
示例3: performHighlighting
import org.apache.lucene.search.highlight.TokenSources; //导入依赖的package包/类
/**
* Performs highlighting for a given query and a given document.
*
* @param indexSearcher the IndexSearcher performing the query
* @param query the Tripod LuceneQuery
* @param scoreDoc the Lucene ScoreDoc
* @param doc the Lucene Document
* @param highlighter the Highlighter to use
* @param result the QueryResult to add the highlights to
* @throws IOException if an error occurs performing the highlighting
* @throws InvalidTokenOffsetsException if an error occurs performing the highlighting
*/
protected void performHighlighting(final IndexSearcher indexSearcher, final Query query, final ScoreDoc scoreDoc,
final Document doc, final Highlighter highlighter, final QR result)
throws IOException, InvalidTokenOffsetsException {
if (query.getHighlightFields() == null || query.getHighlightFields().isEmpty()) {
return;
}
final List<Highlight> highlights = new ArrayList<>();
final List<String> hlFieldNames = getHighlightFieldNames(query, doc);
// process each field to highlight on
for (String hlField : hlFieldNames) {
final String text = doc.get(hlField);
if (StringUtils.isEmpty(text)) {
continue;
}
final List<String> snippets = new ArrayList<>();
final Fields tvFields = indexSearcher.getIndexReader().getTermVectors(scoreDoc.doc);
final int maxStartOffset = highlighter.getMaxDocCharsToAnalyze() -1;
// get the snippets for the given field
final TokenStream tokenStream = TokenSources.getTokenStream(hlField, tvFields, text, analyzer, maxStartOffset);
final TextFragment[] textFragments = highlighter.getBestTextFragments(tokenStream, text, false, 10);
for (TextFragment textFragment : textFragments) {
if (textFragment != null && textFragment.getScore() > 0) {
snippets.add(textFragment.toString());
}
}
// if we have snippets then add a highlight result to the QueryResult
if (snippets.size() > 0) {
highlights.add(new Highlight(hlField, snippets));
}
}
result.setHighlights(highlights);
}
示例4: getBenchmarkHighlighter
import org.apache.lucene.search.highlight.TokenSources; //导入依赖的package包/类
@Override
protected BenchmarkHighlighter getBenchmarkHighlighter(Query q){
highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q));
highlighter.setMaxDocCharsToAnalyze(maxDocCharsToAnalyze);
return new BenchmarkHighlighter(){
@Override
public int doHighlight(IndexReader reader, int doc, String field,
Document document, Analyzer analyzer, String text) throws Exception {
TokenStream ts = TokenSources.getAnyTokenStream(reader, doc, field, document, analyzer);
TextFragment[] frag = highlighter.getBestTextFragments(ts, text, mergeContiguous, maxFrags);
return frag != null ? frag.length : 0;
}
};
}
示例5: getBenchmarkHighlighter
import org.apache.lucene.search.highlight.TokenSources; //导入依赖的package包/类
@Override
public BenchmarkHighlighter getBenchmarkHighlighter(Query q) {
highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q));
return new BenchmarkHighlighter() {
@Override
public int doHighlight(IndexReader reader, int doc, String field, Document document, Analyzer analyzer, String text) throws Exception {
TokenStream ts = TokenSources.getAnyTokenStream(reader, doc, field, document, analyzer);
TextFragment[] frag = highlighter.getBestTextFragments(ts, text, mergeContiguous, maxFrags);
numHighlightedResults += frag != null ? frag.length : 0;
return frag != null ? frag.length : 0;
}
};
}
示例6: searchData
import org.apache.lucene.search.highlight.TokenSources; //导入依赖的package包/类
private String searchData(String key) throws IOException, ParseException, InvalidTokenOffsetsException {
Directory directory = FSDirectory.open(new File(filePath));
IndexSearcher indexSearcher = new IndexSearcher(directory);
QueryParser queryParser = new QueryParser(Version.LUCENE_31, "foods",
new SmartChineseAnalyzer(Version.LUCENE_31, true));
//queryParser.setDefaultOperator(Operator.AND);
Query query = queryParser.parse(key);
TopDocs docs = indexSearcher.search(query, 10);
QueryScorer queryScorer = new QueryScorer(query, "foods");
Highlighter highlighter = new Highlighter(queryScorer);
highlighter.setTextFragmenter(new SimpleSpanFragmenter(queryScorer));
List<SearchResult> searchResults = new ArrayList<SearchResult>();
if (docs != null) {
for (ScoreDoc scoreDoc : docs.scoreDocs) {
Document doc = indexSearcher.doc(scoreDoc.doc);
TokenStream tokenStream = TokenSources.getAnyTokenStream(
indexSearcher.getIndexReader(), scoreDoc.doc, "foods", doc,
new SmartChineseAnalyzer(Version.LUCENE_31, true));
SearchResult searchResult = new SearchResult();
searchResult.setRestaurantId(Long.valueOf(doc.get("id")));
searchResult.setRestaurantName(doc.get("restaurant_name"));
searchResult.setKey(key);
searchResult.setFoods(Arrays.asList(highlighter.
getBestFragment(tokenStream, doc.get("foods")).split(" ")));
searchResults.add(searchResult);
}
} else {
searchResults = null;
}
indexSearcher.close();
directory.close();
return new Gson().toJson(searchResults);
}
示例7: highlight
import org.apache.lucene.search.highlight.TokenSources; //导入依赖的package包/类
/**
* NOTE: This method will not preserve the correct field types.
*
* @param preTag
* @param postTag
*/
public static Document highlight(int docId, Document document, Query query, FieldManager fieldManager,
IndexReader reader, String preTag, String postTag) throws IOException, InvalidTokenOffsetsException {
String fieldLessFieldName = fieldManager.getFieldLessFieldName();
Query fixedQuery = fixSuperQuery(query, null, fieldLessFieldName);
Analyzer analyzer = fieldManager.getAnalyzerForQuery();
SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(preTag, postTag);
Document result = new Document();
for (IndexableField f : document) {
String name = f.name();
if (fieldLessFieldName.equals(name) || FIELDS_NOT_TO_HIGHLIGHT.contains(name)) {
result.add(f);
continue;
}
String text = f.stringValue();
Number numericValue = f.numericValue();
Query fieldFixedQuery;
if (fieldManager.isFieldLessIndexed(name)) {
fieldFixedQuery = fixSuperQuery(query, name, fieldLessFieldName);
} else {
fieldFixedQuery = fixedQuery;
}
if (numericValue != null) {
if (shouldNumberBeHighlighted(name, numericValue, fieldFixedQuery)) {
String numberHighlight = preTag + text + postTag;
result.add(new StringField(name, numberHighlight, Store.YES));
}
} else {
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(fieldFixedQuery, name));
TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, docId, name, analyzer);
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);
for (int j = 0; j < frag.length; j++) {
if ((frag[j] != null) && (frag[j].getScore() > 0)) {
result.add(new StringField(name, frag[j].toString(), Store.YES));
}
}
}
}
return result;
}
示例8: main
import org.apache.lucene.search.highlight.TokenSources; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length != 0) {
QUERY = args[0];
}
// 将庖丁封装成符合Lucene要求的Analyzer规范
Analyzer analyzer = new PaodingAnalyzer();
//读取本类目录下的text.txt文件
String content = ContentReader.readText(English.class);
//接下来是标准的Lucene建立索引和检索的代码
Directory ramDir = new RAMDirectory();
IndexWriter writer = new IndexWriter(ramDir, analyzer);
Document doc = new Document();
Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(fd);
writer.addDocument(doc);
writer.optimize();
writer.close();
IndexReader reader = IndexReader.open(ramDir);
String queryString = QUERY;
QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
Query query = parser.parse(queryString);
Searcher searcher = new IndexSearcher(ramDir);
query = query.rewrite(reader);
System.out.println("Searching for: " + query.toString(FIELD_NAME));
Hits hits = searcher.search(query);
BoldFormatter formatter = new BoldFormatter();
Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
query));
highlighter.setTextFragmenter(new SimpleFragmenter(50));
for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME);
int maxNumFragmentsRequired = 5;
String fragmentSeparator = "...";
TermPositionVector tpv = (TermPositionVector) reader
.getTermFreqVector(hits.id(i), FIELD_NAME);
TokenStream tokenStream = TokenSources.getTokenStream(tpv);
String result = highlighter.getBestFragments(tokenStream, text,
maxNumFragmentsRequired, fragmentSeparator);
System.out.println("\n" + result);
}
reader.close();
}
示例9: main
import org.apache.lucene.search.highlight.TokenSources; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length != 0) {
QUERY = args[0];
}
// 将庖丁封装成符合Lucene要求的Analyzer规范
Analyzer analyzer = new PaodingAnalyzer();
//读取本类目录下的text.txt文件
String content = ContentReader.readText(Chinese.class);
//接下来是标准的Lucene建立索引和检索的代码
Directory ramDir = new RAMDirectory();
IndexWriter writer = new IndexWriter(ramDir, analyzer);
Document doc = new Document();
Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(fd);
writer.addDocument(doc);
writer.optimize();
writer.close();
IndexReader reader = IndexReader.open(ramDir);
String queryString = QUERY;
QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
Query query = parser.parse(queryString);
Searcher searcher = new IndexSearcher(ramDir);
query = query.rewrite(reader);
System.out.println("Searching for: " + query.toString(FIELD_NAME));
Hits hits = searcher.search(query);
BoldFormatter formatter = new BoldFormatter();
Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
query));
highlighter.setTextFragmenter(new SimpleFragmenter(50));
for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME);
int maxNumFragmentsRequired = 5;
String fragmentSeparator = "...";
TermPositionVector tpv = (TermPositionVector) reader
.getTermFreqVector(hits.id(i), FIELD_NAME);
TokenStream tokenStream = TokenSources.getTokenStream(tpv);
String result = highlighter.getBestFragments(tokenStream, text,
maxNumFragmentsRequired, fragmentSeparator);
System.out.println("\n" + result);
}
reader.close();
}