本文整理汇总了Java中org.apache.lucene.analysis.standard.StandardFilter类的典型用法代码示例。如果您正苦于以下问题:Java StandardFilter类的具体用法?Java StandardFilter怎么用?Java StandardFilter使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
StandardFilter类属于org.apache.lucene.analysis.standard包,在下文中一共展示了StandardFilter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: reusableTokenStream
import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
class SavedStreams {
StandardTokenizer tokenStream;
TokenStream filteredTokenStream;
}
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
if (streams == null) {
streams = new SavedStreams();
setPreviousTokenStream(streams);
streams.tokenStream = new StandardTokenizer(LUCENE_VERSION, reader);
streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
streams.filteredTokenStream = new StopFilter(true, streams.filteredTokenStream, STOP_WORDS_SET);
streams.filteredTokenStream = new ASCIIFoldingFilter(streams.filteredTokenStream);
} else {
streams.tokenStream.reset(reader);
}
streams.tokenStream.setMaxTokenLength(DEFAULT_MAX_TOKEN_LENGTH);
return streams.filteredTokenStream;
}
示例2: testStandardTokenizer
import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
public void testStandardTokenizer() throws Exception {
String source = "우리나라라면에서부터 일본라면이 파생되었잖니?";
source = "너는 너는 다시 내게 돌아 올거야. school is a good place 呵呵大笑 呵呵大笑";
long start = System.currentTimeMillis();
StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
TokenStream stream = analyzer.tokenStream("s", new StringReader(source));
TokenStream tok = new StandardFilter(Version.LUCENE_36, stream);
while (tok.incrementToken()) {
CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class);
OffsetAttribute offAttr = stream.getAttribute(OffsetAttribute.class);
PositionIncrementAttribute posAttr = stream.getAttribute(PositionIncrementAttribute.class);
TypeAttribute typeAttr = stream.getAttribute(TypeAttribute.class);
System.out.println(new String(termAttr.buffer(), 0, termAttr.length()));
}
System.out.println((System.currentTimeMillis() - start) + "ms");
}
示例3: tokenize
import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
public List<String> tokenize(Analyzer analyzer, String data) {
List<String> terms = Lists.newArrayList();
try {
TokenStream tokens = new StandardFilter(analyzer.tokenStream(null, new StringReader(data)));
tokens.reset();
while (tokens.incrementToken()) {
CharTermAttribute termAttribute = tokens.getAttribute(CharTermAttribute.class);
String term = trimToNull(termAttribute.toString());
if (term != null) {
terms.add(term);
}
}
tokens.end();
tokens.close();
} catch (IOException ioe) {
LOG.warn("Unable to tokenize data. cause: {}", new Object[] { ioe.getMessage() }, ioe);
}
return terms;
}
示例4: createComponents
import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName) {
// Tokenizer _IKTokenizer = new IKTokenizer(in , this.useSmart());
final Tokenizer src= new IKTokenizer(new StringReader(""), this.useSmart());
TokenStream tok = new StandardFilter(src);
return new TokenStreamComponents(src, tok) {
@Override
protected void setReader(final Reader reader) {
super.setReader(reader);
}
};
}
示例5: createComponents
import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
final AutocompleteTokenizer tokenizer = new AutocompleteTokenizer(reader);
TokenFilter filter = new StandardFilter(tokenizer);
return new TokenStreamComponents(tokenizer, filter);
}
示例6: tokenStream
import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
/**
* @param fieldName ignored param
* @param reader contains data to parse
* @return TokenStream of ngrams
*/
public TokenStream tokenStream(String fieldName, Reader reader) {
return new NGramTokenFilter(
new LowerCaseFilter(
new StandardFilter(
new StandardTokenizer(reader))), min_ngram, max_ngram);
}
示例7: createComponents
import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Version matchVersion = Version.LUCENE_45;
final Tokenizer source = new WhitespaceTokenizer(matchVersion, reader);
TokenStream result = new StandardFilter(matchVersion, source);
result = new DiarienummerTokenFilter(result);
return new TokenStreamComponents(source, result);
}
示例8: createComponents
import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer source = new StandardTokenizer(LuceneConfig.USED_VERSION, reader);
TokenStream result = new StandardFilter(LuceneConfig.USED_VERSION, source);
TokenStreamComponents components = new TokenStreamComponents(source, result);
return components;
}
示例9: tokenStream
import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new SynonymFilter(
new StopFilter(true,
new LowerCaseFilter(
new StandardFilter(
new StandardTokenizer(
Version.LUCENE_41, reader))),
StopAnalyzer.ENGLISH_STOP_WORDS_SET),
engine
);
return result;
}
示例10: create
import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
@Override
public TokenStream create(TokenStream tokenStream) {
return new StandardFilter(tokenStream);
}
示例11: create
import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
@Override
public StandardFilter create(TokenStream input) {
return new StandardFilter(input);
}
示例12: createComponents
import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String paramString) {
Tokenizer source = new NGramTokenizer(n, n);
TokenStream result = new StandardFilter(source);
return new TokenStreamComponents(source, result);
}
示例13: create
import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
@Override
public StandardFilter create(TokenStream input) {
return new StandardFilter(luceneMatchVersion, input);
}
示例14: StdTermFilter
import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
/** Construct the rewriter */
public StdTermFilter() {
dribble = new DribbleStream();
filter = new StandardFilter(new LowerCaseFilter(dribble));
}
示例15: createComponents
import org.apache.lucene.analysis.standard.StandardFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
final NGramWordTokenizer tokenizer = new NGramWordTokenizer(reader, 2, 3);
TokenFilter filter = new StandardFilter(tokenizer);
return new TokenStreamComponents(tokenizer, filter);
}