当前位置: 首页>>代码示例>>Java>>正文


Java TokenSources.getTokenStream方法代码示例

本文整理汇总了Java中org.apache.lucene.search.highlight.TokenSources.getTokenStream方法的典型用法代码示例。如果您正苦于以下问题:Java TokenSources.getTokenStream方法的具体用法?Java TokenSources.getTokenStream怎么用?Java TokenSources.getTokenStream使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.search.highlight.TokenSources的用法示例。


在下文中一共展示了TokenSources.getTokenStream方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: highlight

import org.apache.lucene.search.highlight.TokenSources; //导入方法依赖的package包/类
@Override
public String highlight(String locale, boolean useStopWords, String query, String content, String pre, String post, int preview) {
    Analyzer analyzer = termAnalyzers.findAnalyzer(locale, useStopWords);
    QueryParser parser = new QueryParser(defaultField, analyzer);

    String summary = null;
    try {
        SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(pre, post);
        Highlighter hg = new Highlighter(formatter, new QueryTermScorer(parser.parse(query)));
        hg.setMaxDocCharsToAnalyze(preview);
        hg.setTextFragmenter(new SimpleFragmenter(100));

        TokenStream tokens = TokenSources.getTokenStream(defaultField, content, analyzer);
        summary = hg.getBestFragments(tokens, content, 4, " ... ");
    } catch (InvalidTokenOffsetsException | IOException | ParseException ex) {
        LOG.error("Failed to highlight", ex);
    }

    return StringUtils.isBlank(summary) ? null : summary;
}
 
开发者ID:jivesoftware,项目名称:miru,代码行数:21,代码来源:LuceneBackedQueryParser.java

示例2: performHighlighting

import org.apache.lucene.search.highlight.TokenSources; //导入方法依赖的package包/类
/**
 * Performs highlighting for a given query and a given document.
 *
 * @param indexSearcher the IndexSearcher performing the query
 * @param query the Tripod LuceneQuery
 * @param scoreDoc the Lucene ScoreDoc
 * @param doc the Lucene Document
 * @param highlighter the Highlighter to use
 * @param result the QueryResult to add the highlights to
 * @throws IOException if an error occurs performing the highlighting
 * @throws InvalidTokenOffsetsException if an error occurs performing the highlighting
 */
protected void performHighlighting(final IndexSearcher indexSearcher, final Query query, final ScoreDoc scoreDoc,
                                   final Document doc, final Highlighter highlighter, final QR result)
        throws IOException, InvalidTokenOffsetsException {

    if (query.getHighlightFields() == null || query.getHighlightFields().isEmpty()) {
        return;
    }

    final List<Highlight> highlights = new ArrayList<>();
    final List<String> hlFieldNames = getHighlightFieldNames(query, doc);

    // process each field to highlight on
    for (String hlField : hlFieldNames) {
        final String text = doc.get(hlField);
        if (StringUtils.isEmpty(text)) {
            continue;
        }

        final List<String> snippets = new ArrayList<>();
        final Fields tvFields = indexSearcher.getIndexReader().getTermVectors(scoreDoc.doc);
        final int maxStartOffset = highlighter.getMaxDocCharsToAnalyze() -1;

        // get the snippets for the given field
        final TokenStream tokenStream = TokenSources.getTokenStream(hlField, tvFields, text, analyzer, maxStartOffset);
        final TextFragment[] textFragments = highlighter.getBestTextFragments(tokenStream, text, false, 10);
        for (TextFragment textFragment : textFragments) {
            if (textFragment != null && textFragment.getScore() > 0) {
                snippets.add(textFragment.toString());
            }
        }

        // if we have snippets then add a highlight result to the QueryResult
        if (snippets.size() > 0) {
            highlights.add(new Highlight(hlField, snippets));
        }
    }

    result.setHighlights(highlights);
}
 
开发者ID:bbende,项目名称:tripod,代码行数:52,代码来源:LuceneService.java

示例3: main

import org.apache.lucene.search.highlight.TokenSources; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
	if (args.length != 0) {
		QUERY = args[0];
	}
	// 将庖丁封装成符合Lucene要求的Analyzer规范
	Analyzer analyzer = new PaodingAnalyzer();
	
	//读取本类目录下的text.txt文件
	String content = ContentReader.readText(English.class);

	//接下来是标准的Lucene建立索引和检索的代码
	Directory ramDir = new RAMDirectory();
	IndexWriter writer = new IndexWriter(ramDir, analyzer);
	Document doc = new Document();
	Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
			Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
	doc.add(fd);
	writer.addDocument(doc);
	writer.optimize();
	writer.close();

	IndexReader reader = IndexReader.open(ramDir);
	String queryString = QUERY;
	QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
	Query query = parser.parse(queryString);
	Searcher searcher = new IndexSearcher(ramDir);
	query = query.rewrite(reader);
	System.out.println("Searching for: " + query.toString(FIELD_NAME));
	Hits hits = searcher.search(query);

	BoldFormatter formatter = new BoldFormatter();
	Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
			query));
	highlighter.setTextFragmenter(new SimpleFragmenter(50));
	for (int i = 0; i < hits.length(); i++) {
		String text = hits.doc(i).get(FIELD_NAME);
		int maxNumFragmentsRequired = 5;
		String fragmentSeparator = "...";
		TermPositionVector tpv = (TermPositionVector) reader
				.getTermFreqVector(hits.id(i), FIELD_NAME);
		TokenStream tokenStream = TokenSources.getTokenStream(tpv);
		String result = highlighter.getBestFragments(tokenStream, text,
				maxNumFragmentsRequired, fragmentSeparator);
		System.out.println("\n" + result);
	}
	reader.close();
}
 
开发者ID:no8899,项目名称:paoding-for-lucene-2.4,代码行数:48,代码来源:English.java

示例4: main

import org.apache.lucene.search.highlight.TokenSources; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
	if (args.length != 0) {
		QUERY = args[0];
	}
	// 将庖丁封装成符合Lucene要求的Analyzer规范
	Analyzer analyzer = new PaodingAnalyzer();
	
	//读取本类目录下的text.txt文件
	String content = ContentReader.readText(Chinese.class);

	//接下来是标准的Lucene建立索引和检索的代码
	Directory ramDir = new RAMDirectory();
	IndexWriter writer = new IndexWriter(ramDir, analyzer);
	Document doc = new Document();
	Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
			Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
	doc.add(fd);
	writer.addDocument(doc);
	writer.optimize();
	writer.close();

	IndexReader reader = IndexReader.open(ramDir);
	String queryString = QUERY;
	QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
	Query query = parser.parse(queryString);
	Searcher searcher = new IndexSearcher(ramDir);
	query = query.rewrite(reader);
	System.out.println("Searching for: " + query.toString(FIELD_NAME));
	Hits hits = searcher.search(query);

	BoldFormatter formatter = new BoldFormatter();
	Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
			query));
	highlighter.setTextFragmenter(new SimpleFragmenter(50));
	for (int i = 0; i < hits.length(); i++) {
		String text = hits.doc(i).get(FIELD_NAME);
		int maxNumFragmentsRequired = 5;
		String fragmentSeparator = "...";
		TermPositionVector tpv = (TermPositionVector) reader
				.getTermFreqVector(hits.id(i), FIELD_NAME);
		TokenStream tokenStream = TokenSources.getTokenStream(tpv);
		String result = highlighter.getBestFragments(tokenStream, text,
				maxNumFragmentsRequired, fragmentSeparator);
		System.out.println("\n" + result);
	}
	reader.close();
}
 
开发者ID:no8899,项目名称:paoding-for-lucene-2.4,代码行数:48,代码来源:Chinese.java


注:本文中的org.apache.lucene.search.highlight.TokenSources.getTokenStream方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。