本文整理汇总了Java中org.apache.lucene.search.highlight.Highlighter.setTextFragmenter方法的典型用法代码示例。如果您正苦于以下问题:Java Highlighter.setTextFragmenter方法的具体用法?Java Highlighter.setTextFragmenter怎么用?Java Highlighter.setTextFragmenter使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.search.highlight.Highlighter
的用法示例。
在下文中一共展示了Highlighter.setTextFragmenter方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getHighlightString
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
public static String getHighlightString (String text, String keyword) throws IOException {
TermQuery query = new TermQuery(new Term("f", keyword));
QueryScorer scorer = new QueryScorer(query);
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">","</span>");
Highlighter highlighter = new Highlighter(formatter, scorer);
Fragmenter fragmenter = new SimpleFragmenter(50);
highlighter.setTextFragmenter(fragmenter);
TokenStream tokenStream = new StandardAnalyzer(Version.LUCENE_20).tokenStream("f", new StringReader(text));
//String result = highlighter.getBestFragments(tokenStream, text, 30, "...");
StringBuilder writer = new StringBuilder("");
writer.append("<html>");
writer.append("<style>\n" +
".highlight {\n" +
" background: yellow;\n" +
"}\n" +
"</style>");
writer.append("<body>");
writer.append("");
writer.append("</body></html>");
return ( writer.toString() );
}
示例2: searToHighlighterCss
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
/**
* ����
* @param analyzer
* @param searcher
* @throws IOException
* @throws InvalidTokenOffsetsException
*/
public void searToHighlighterCss(Analyzer analyzer,IndexSearcher searcher) throws IOException, InvalidTokenOffsetsException{
Term term =new Term("Content", new String("免费".getBytes(),"GBK"));//��ѯ��������˼����Ҫ�����Ա�Ϊ���������
TermQuery query =new TermQuery(term);
TopDocs docs =searcher.search(query, 10);//����
/**�Զ����ע�����ı���ǩ*/
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"hightlighterCss\">","</span>");
/**����QueryScorer*/
QueryScorer scorer=new QueryScorer(query);
/**����Fragmenter*/
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
Highlighter highlight=new Highlighter(formatter,scorer);
highlight.setTextFragmenter(fragmenter);
for(ScoreDoc doc:docs.scoreDocs){//��ȡ���ҵ��ĵ����������
Document document =searcher.doc(doc.doc);
String value = document.getField("Content").toString();
TokenStream tokenStream = analyzer.tokenStream("Content", new StringReader(value));
String str1 = highlight.getBestFragment(tokenStream, value);
System.out.println(str1);
}
}
示例3: highlight
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
@Override
public String highlight(String locale, boolean useStopWords, String query, String content, String pre, String post, int preview) {
Analyzer analyzer = termAnalyzers.findAnalyzer(locale, useStopWords);
QueryParser parser = new QueryParser(defaultField, analyzer);
String summary = null;
try {
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(pre, post);
Highlighter hg = new Highlighter(formatter, new QueryTermScorer(parser.parse(query)));
hg.setMaxDocCharsToAnalyze(preview);
hg.setTextFragmenter(new SimpleFragmenter(100));
TokenStream tokens = TokenSources.getTokenStream(defaultField, content, analyzer);
summary = hg.getBestFragments(tokens, content, 4, " ... ");
} catch (InvalidTokenOffsetsException | IOException | ParseException ex) {
LOG.error("Failed to highlight", ex);
}
return StringUtils.isBlank(summary) ? null : summary;
}
示例4: highlightField
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
/**
* This method intended for use with
* <tt>testHighlightingWithDefaultField()</tt>
*/
private String highlightField(Query query, String fieldName,
String text) throws IOException, InvalidTokenOffsetsException {
TokenStream tokenStream = new MockAnalyzer(random(), MockTokenizer.SIMPLE,
true, MockTokenFilter.ENGLISH_STOPSET, true).tokenStream(fieldName,
new StringReader(text));
// Assuming "<B>", "</B>" used to highlight
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
MyQueryScorer scorer = new MyQueryScorer(query, fieldName, FIELD_NAME);
Highlighter highlighter = new Highlighter(formatter, scorer);
highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));
String rv = highlighter.getBestFragments(tokenStream, text, 1,
"(FIELD TEXT TRUNCATED)");
return rv.length() == 0 ? text : rv;
}
示例5: testHits
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
public void testHits() throws Exception {
IndexSearcher searcher = new IndexSearcher(TestUtil.getBookIndexDirectory());
TermQuery query = new TermQuery(new Term("title", "action"));
TopDocs hits = searcher.search(query, 10);
QueryScorer scorer = new QueryScorer(query, "title");
Highlighter highlighter = new Highlighter(scorer);
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
Analyzer analyzer = new SimpleAnalyzer();
for (ScoreDoc sd : hits.scoreDocs) {
StoredDocument doc = searcher.doc(sd.doc);
String title = doc.get("title");
TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc,
analyzer);
String fragment = highlighter.getBestFragment(stream, title);
LOGGER.info(fragment);
}
}
示例6: displayHtmlHighlight
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
static String displayHtmlHighlight(Query query, Analyzer analyzer, String fieldName, String fieldContent,
int fragmentSize) throws IOException, InvalidTokenOffsetsException {
Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font color='red'>", "</font>"),
new QueryScorer(query));
Fragmenter fragmenter = new SimpleFragmenter(fragmentSize);
highlighter.setTextFragmenter(fragmenter);
return highlighter.getBestFragment(analyzer, fieldName, fieldContent);
}
示例7: search
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
public static void search(String indexDir, String q) throws Exception {
Directory dir = FSDirectory.open(Paths.get(indexDir));
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher is = new IndexSearcher(reader);
// Analyzer analyzer=new StandardAnalyzer(); // 标准分词器
SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
QueryParser parser = new QueryParser("desc", analyzer);
Query query = parser.parse(q);
long start = System.currentTimeMillis();
TopDocs hits = is.search(query, 10);
long end = System.currentTimeMillis();
System.out.println("匹配 " + q + " ,总共花费" + (end - start) + "毫秒" + "查询到" + hits.totalHits + "个记录");
QueryScorer scorer = new QueryScorer(query);
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer);
highlighter.setTextFragmenter(fragmenter);
for (ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = is.doc(scoreDoc.doc);
System.out.println(doc.get("city"));
System.out.println(doc.get("desc"));
String desc = doc.get("desc");
if (desc != null) {
TokenStream tokenStream = analyzer.tokenStream("desc", new StringReader(desc));
System.out.println(highlighter.getBestFragment(tokenStream, desc));
}
}
reader.close();
}
示例8: HighlightingHelper
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
HighlightingHelper(Query query, Analyzer analyzer) {
this.analyzer = analyzer;
Formatter formatter = new SimpleHTMLFormatter();
Encoder encoder = new MinimalHTMLEncoder();
scorer = new QueryScorer(query);
highlighter = new Highlighter(formatter, encoder, scorer);
fragmentLength = DEFAULT_FRAGMENT_LENGTH;
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength);
highlighter.setTextFragmenter(fragmenter);
}
示例9: highlightField
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
/**
* This method intended for use with
* <tt>testHighlightingWithDefaultField()</tt>
*/
private String highlightField(Query query, String fieldName,
String text) throws IOException, InvalidTokenOffsetsException {
TokenStream tokenStream = new MockAnalyzer(random(), MockTokenizer.SIMPLE,
true, MockTokenFilter.ENGLISH_STOPSET).tokenStream(fieldName, text);
// Assuming "<B>", "</B>" used to highlight
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
MyQueryScorer scorer = new MyQueryScorer(query, fieldName, FIELD_NAME);
Highlighter highlighter = new Highlighter(formatter, scorer);
highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));
String rv = highlighter.getBestFragments(tokenStream, text, 1,
"(FIELD TEXT TRUNCATED)");
return rv.length() == 0 ? text : rv;
}
示例10: createHighlighter
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
protected Highlighter createHighlighter(org.apache.lucene.search.Query luceneQuery) {
SimpleHTMLFormatter format = new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>");
Highlighter highlighter = new Highlighter(format, new QueryScorer(luceneQuery));// 高亮
// highlighter.setTextFragmenter(new
// SimpleFragmenter(Integer.MAX_VALUE));
highlighter.setTextFragmenter(new SimpleFragmenter(200));
return highlighter;
}
示例11: getResult
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
public String getResult(String fieldName, String fieldValue) throws Exception{
BuguIndex index = BuguIndex.getInstance();
QueryParser parser = new QueryParser(index.getVersion(), fieldName, index.getAnalyzer());
Query query = parser.parse(keywords);
TokenStream tokens = index.getAnalyzer().tokenStream(fieldName, new StringReader(fieldValue));
QueryScorer scorer = new QueryScorer(query, fieldName);
Highlighter highlighter = new Highlighter(formatter, scorer);
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
return highlighter.getBestFragments(tokens, fieldValue, maxFragments, "...");
}
示例12: testHighlighting
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
public void testHighlighting() throws Exception {
String text = "The quick brown fox jumps over the lazy dog";
TermQuery query = new TermQuery(new Term("field", "fox"));
TokenStream tokenStream = new SimpleAnalyzer().tokenStream("field", new StringReader(text));
QueryScorer scorer = new QueryScorer(query, "field");
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
Highlighter highlighter = new Highlighter(scorer);
highlighter.setTextFragmenter(fragmenter);
assertEquals("The quick brown <B>fox</B> jumps over the lazy dog",
highlighter.getBestFragment(tokenStream, text));
}
示例13: searchData
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
private String searchData(String key) throws IOException, ParseException, InvalidTokenOffsetsException {
Directory directory = FSDirectory.open(new File(filePath));
IndexSearcher indexSearcher = new IndexSearcher(directory);
QueryParser queryParser = new QueryParser(Version.LUCENE_31, "foods",
new SmartChineseAnalyzer(Version.LUCENE_31, true));
//queryParser.setDefaultOperator(Operator.AND);
Query query = queryParser.parse(key);
TopDocs docs = indexSearcher.search(query, 10);
QueryScorer queryScorer = new QueryScorer(query, "foods");
Highlighter highlighter = new Highlighter(queryScorer);
highlighter.setTextFragmenter(new SimpleSpanFragmenter(queryScorer));
List<SearchResult> searchResults = new ArrayList<SearchResult>();
if (docs != null) {
for (ScoreDoc scoreDoc : docs.scoreDocs) {
Document doc = indexSearcher.doc(scoreDoc.doc);
TokenStream tokenStream = TokenSources.getAnyTokenStream(
indexSearcher.getIndexReader(), scoreDoc.doc, "foods", doc,
new SmartChineseAnalyzer(Version.LUCENE_31, true));
SearchResult searchResult = new SearchResult();
searchResult.setRestaurantId(Long.valueOf(doc.get("id")));
searchResult.setRestaurantName(doc.get("restaurant_name"));
searchResult.setKey(key);
searchResult.setFoods(Arrays.asList(highlighter.
getBestFragment(tokenStream, doc.get("foods")).split(" ")));
searchResults.add(searchResult);
}
} else {
searchResults = null;
}
indexSearcher.close();
directory.close();
return new Gson().toJson(searchResults);
}
示例14: main
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length != 0) {
QUERY = args[0];
}
// 将庖丁封装成符合Lucene要求的Analyzer规范
Analyzer analyzer = new PaodingAnalyzer();
//读取本类目录下的text.txt文件
String content = ContentReader.readText(English.class);
//接下来是标准的Lucene建立索引和检索的代码
Directory ramDir = new RAMDirectory();
IndexWriter writer = new IndexWriter(ramDir, analyzer);
Document doc = new Document();
Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(fd);
writer.addDocument(doc);
writer.optimize();
writer.close();
IndexReader reader = IndexReader.open(ramDir);
String queryString = QUERY;
QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
Query query = parser.parse(queryString);
Searcher searcher = new IndexSearcher(ramDir);
query = query.rewrite(reader);
System.out.println("Searching for: " + query.toString(FIELD_NAME));
Hits hits = searcher.search(query);
BoldFormatter formatter = new BoldFormatter();
Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
query));
highlighter.setTextFragmenter(new SimpleFragmenter(50));
for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME);
int maxNumFragmentsRequired = 5;
String fragmentSeparator = "...";
TermPositionVector tpv = (TermPositionVector) reader
.getTermFreqVector(hits.id(i), FIELD_NAME);
TokenStream tokenStream = TokenSources.getTokenStream(tpv);
String result = highlighter.getBestFragments(tokenStream, text,
maxNumFragmentsRequired, fragmentSeparator);
System.out.println("\n" + result);
}
reader.close();
}
示例15: main
import org.apache.lucene.search.highlight.Highlighter; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length != 0) {
QUERY = args[0];
}
// 将庖丁封装成符合Lucene要求的Analyzer规范
Analyzer analyzer = new PaodingAnalyzer();
//读取本类目录下的text.txt文件
String content = ContentReader.readText(Chinese.class);
//接下来是标准的Lucene建立索引和检索的代码
Directory ramDir = new RAMDirectory();
IndexWriter writer = new IndexWriter(ramDir, analyzer);
Document doc = new Document();
Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(fd);
writer.addDocument(doc);
writer.optimize();
writer.close();
IndexReader reader = IndexReader.open(ramDir);
String queryString = QUERY;
QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
Query query = parser.parse(queryString);
Searcher searcher = new IndexSearcher(ramDir);
query = query.rewrite(reader);
System.out.println("Searching for: " + query.toString(FIELD_NAME));
Hits hits = searcher.search(query);
BoldFormatter formatter = new BoldFormatter();
Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
query));
highlighter.setTextFragmenter(new SimpleFragmenter(50));
for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME);
int maxNumFragmentsRequired = 5;
String fragmentSeparator = "...";
TermPositionVector tpv = (TermPositionVector) reader
.getTermFreqVector(hits.id(i), FIELD_NAME);
TokenStream tokenStream = TokenSources.getTokenStream(tpv);
String result = highlighter.getBestFragments(tokenStream, text,
maxNumFragmentsRequired, fragmentSeparator);
System.out.println("\n" + result);
}
reader.close();
}