当前位置: 首页>>代码示例>>Java>>正文


Java StandardFilter类代码示例

本文整理汇总了Java中org.apache.lucene.analysis.standard.StandardFilter的典型用法代码示例。如果您正苦于以下问题:Java StandardFilter类的具体用法?Java StandardFilter怎么用?Java StandardFilter使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


StandardFilter类属于org.apache.lucene.analysis.standard包,在下文中一共展示了StandardFilter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: reusableTokenStream

import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
    class SavedStreams {
        StandardTokenizer tokenStream;
        TokenStream filteredTokenStream;
    }

    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
        streams = new SavedStreams();
        setPreviousTokenStream(streams);
        streams.tokenStream = new StandardTokenizer(LUCENE_VERSION, reader);
        streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
        streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
        streams.filteredTokenStream = new StopFilter(true, streams.filteredTokenStream, STOP_WORDS_SET);
        streams.filteredTokenStream = new ASCIIFoldingFilter(streams.filteredTokenStream);
    } else {
        streams.tokenStream.reset(reader);
    }
    streams.tokenStream.setMaxTokenLength(DEFAULT_MAX_TOKEN_LENGTH);

    return streams.filteredTokenStream;
}
 
开发者ID:airsonic,项目名称:airsonic,代码行数:24,代码来源:SearchService.java

示例2: testStandardTokenizer

import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
public void testStandardTokenizer() throws Exception {

        String source = "우리나라라면에서부터 일본라면이 파생되었잖니?";
        source = "너는 너는 다시 내게 돌아 올거야. school is a good place 呵呵大笑 呵呵大笑";

        long start = System.currentTimeMillis();

        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
        TokenStream stream = analyzer.tokenStream("s", new StringReader(source));
        TokenStream tok = new StandardFilter(Version.LUCENE_36, stream);

        while (tok.incrementToken()) {
            CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class);
            OffsetAttribute offAttr = stream.getAttribute(OffsetAttribute.class);
            PositionIncrementAttribute posAttr = stream.getAttribute(PositionIncrementAttribute.class);
            TypeAttribute typeAttr = stream.getAttribute(TypeAttribute.class);

            System.out.println(new String(termAttr.buffer(), 0, termAttr.length()));
        }

        System.out.println((System.currentTimeMillis() - start) + "ms");
    }
 
开发者ID:debop,项目名称:lucene-korean,代码行数:23,代码来源:KoreanAnalyzerTest.java

示例3: tokenize

import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
public List<String> tokenize(Analyzer analyzer, String data) {
    List<String> terms = Lists.newArrayList();
    try {
        TokenStream tokens = new StandardFilter(analyzer.tokenStream(null, new StringReader(data)));
        tokens.reset();

        while (tokens.incrementToken()) {
            CharTermAttribute termAttribute = tokens.getAttribute(CharTermAttribute.class);
            String term = trimToNull(termAttribute.toString());
            if (term != null) {
                terms.add(term);
            }
        }

        tokens.end();
        tokens.close();
    } catch (IOException ioe) {
        LOG.warn("Unable to tokenize data. cause: {}", new Object[] { ioe.getMessage() }, ioe);
    }
    return terms;
}
 
开发者ID:jivesoftware,项目名称:miru,代码行数:22,代码来源:TermTokenizer.java

示例4: createComponents

import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
@Override
	protected TokenStreamComponents createComponents(String fieldName) {
		
//		Tokenizer _IKTokenizer = new IKTokenizer(in , this.useSmart());
		final Tokenizer src= new IKTokenizer(new StringReader(""), this.useSmart());
        TokenStream tok = new StandardFilter(src);
        return new TokenStreamComponents(src, tok) {
            @Override
            protected void setReader(final Reader reader) {
                super.setReader(reader);
            }
        };
	}
 
开发者ID:TFdream,项目名称:lucene-analyzer-ik,代码行数:14,代码来源:IKAnalyzer.java

示例5: createComponents

import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    final AutocompleteTokenizer tokenizer = new AutocompleteTokenizer(reader);

    TokenFilter filter = new StandardFilter(tokenizer);

    return new TokenStreamComponents(tokenizer, filter);
}
 
开发者ID:gncloud,项目名称:fastcatsearch3,代码行数:9,代码来源:AutocompleteAnalyzer.java

示例6: tokenStream

import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
/**
 * @param fieldName ignored param
 * @param reader contains data to parse
 * @return TokenStream of ngrams
 */
public TokenStream tokenStream(String fieldName, Reader reader) {
    return new NGramTokenFilter(
            new LowerCaseFilter(
                new StandardFilter(
                    new StandardTokenizer(reader))), min_ngram, max_ngram);
}
 
开发者ID:spacewalkproject,项目名称:spacewalk,代码行数:12,代码来源:NGramAnalyzer.java

示例7: createComponents

import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {

  Version matchVersion = Version.LUCENE_45;

  final Tokenizer source = new WhitespaceTokenizer(matchVersion, reader);
  TokenStream result = new StandardFilter(matchVersion, source);
  result = new DiarienummerTokenFilter(result);
  return new TokenStreamComponents(source, result);

}
 
开发者ID:Helsingborg,项目名称:solarie,代码行数:12,代码来源:DiarienummerAnalyzer.java

示例8: createComponents

import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    Tokenizer source = new StandardTokenizer(LuceneConfig.USED_VERSION, reader);
    TokenStream result = new StandardFilter(LuceneConfig.USED_VERSION, source);
    TokenStreamComponents components = new TokenStreamComponents(source, result);
    return components;
}
 
开发者ID:Tietoarkisto,项目名称:metka,代码行数:8,代码来源:DefaultAnalyzer.java

示例9: tokenStream

import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
public TokenStream tokenStream(String fieldName, Reader reader) {
  TokenStream result = new SynonymFilter(
                        new StopFilter(true,
                          new LowerCaseFilter(
                            new StandardFilter(
                              new StandardTokenizer(
                               Version.LUCENE_41, reader))),
                          StopAnalyzer.ENGLISH_STOP_WORDS_SET),
                        engine
                       );
  return result;
}
 
开发者ID:xuzhikethinker,项目名称:t4f-data,代码行数:13,代码来源:SynonymAnalyzer.java

示例10: create

import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
@Override
public TokenStream create(TokenStream tokenStream) {
    return new StandardFilter(tokenStream);
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:5,代码来源:StandardTokenFilterFactory.java

示例11: create

import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
@Override
public StandardFilter create(TokenStream input) {
  return new StandardFilter(input);
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:5,代码来源:StandardFilterFactory.java

示例12: createComponents

import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String paramString) {
	Tokenizer source = new NGramTokenizer(n, n);
	TokenStream result =  new StandardFilter(source);
	return new TokenStreamComponents(source, result);
}
 
开发者ID:ksgwr,项目名称:LuceneDB,代码行数:7,代码来源:NgramAnalyzer.java

示例13: create

import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
@Override
public StandardFilter create(TokenStream input) {
  return new StandardFilter(luceneMatchVersion, input);
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:5,代码来源:StandardFilterFactory.java

示例14: StdTermFilter

import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
/** Construct the rewriter */
public StdTermFilter() {
  dribble = new DribbleStream();
  filter = new StandardFilter(new LowerCaseFilter(dribble));
}
 
开发者ID:CDLUC3,项目名称:dash-xtf,代码行数:6,代码来源:StdTermFilter.java

示例15: createComponents

import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {

	final NGramWordTokenizer tokenizer = new NGramWordTokenizer(reader, 2, 3);

	TokenFilter filter = new StandardFilter(tokenizer);

	return new TokenStreamComponents(tokenizer, filter);
}
 
开发者ID:gncloud,项目名称:fastcatsearch3,代码行数:10,代码来源:NGramWordAnalyzer.java


注:本文中的org.apache.lucene.analysis.standard.StandardFilter类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。