本文整理汇总了Java中org.apache.lucene.analysis.snowball.SnowballFilter类的典型用法代码示例。如果您正苦于以下问题:Java SnowballFilter类的具体用法?Java SnowballFilter怎么用?Java SnowballFilter使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
SnowballFilter类属于org.apache.lucene.analysis.snowball包,在下文中一共展示了SnowballFilter类的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testPorter2FilterFactory
import org.apache.lucene.analysis.snowball.SnowballFilter; //导入依赖的package包/类
public void testPorter2FilterFactory() throws IOException {
int iters = scaledRandomIntBetween(20, 100);
for (int i = 0; i < iters; i++) {
Version v = VersionUtils.randomVersion(random());
Settings settings = Settings.builder()
.put("index.analysis.filter.my_porter2.type", "stemmer")
.put("index.analysis.filter.my_porter2.language", "porter2")
.put("index.analysis.analyzer.my_porter2.tokenizer","whitespace")
.put("index.analysis.analyzer.my_porter2.filter","my_porter2")
.put(SETTING_VERSION_CREATED,v)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_porter2");
assertThat(tokenFilter, instanceOf(StemmerTokenFilterFactory.class));
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader("foo bar"));
TokenStream create = tokenFilter.create(tokenizer);
IndexAnalyzers indexAnalyzers = analysis.indexAnalyzers;
NamedAnalyzer analyzer = indexAnalyzers.get("my_porter2");
assertThat(create, instanceOf(SnowballFilter.class));
assertAnalyzesTo(analyzer, "possibly", new String[]{"possibl"});
}
}
示例2: getDefaultStopSet
import org.apache.lucene.analysis.snowball.SnowballFilter; //导入依赖的package包/类
/**
* Ritorna il set di stop words di default per una lingua
*
* @param language lingua
* @return set di stop words
*/
public static CharArraySet getDefaultStopSet(String language) {
try {
if ("en".equalsIgnoreCase(language)) {
return StandardAnalyzer.STOP_WORDS_SET;
} else if ("es".equalsIgnoreCase(language)) {
return WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, "spanish_stop.txt", StandardCharsets.UTF_8));
} else if ("fr".equalsIgnoreCase(language)) {
return WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, "french_stop.txt", StandardCharsets.UTF_8));
} else if ("de".equalsIgnoreCase(language)) {
return WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, "german_stop.txt", StandardCharsets.UTF_8));
} else if ("pl".equalsIgnoreCase(language)) {
return WordlistLoader.getWordSet(IOUtils.getDecodingReader(PolishAnalyzer.class, "stopwords.txt", StandardCharsets.UTF_8), "#");
} else if ("pt".equalsIgnoreCase(language) || "br".equalsIgnoreCase(language)) {
return WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, "portuguese_stop.txt", StandardCharsets.UTF_8));
} else if ("it".equalsIgnoreCase(language)) {
return WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, "italian_stop.txt", StandardCharsets.UTF_8));
} else if ("cz".equalsIgnoreCase(language) || "sk".equalsIgnoreCase(language)) {
return WordlistLoader.getWordSet(IOUtils.getDecodingReader(CzechAnalyzer.class, "stopwords.txt", StandardCharsets.UTF_8), "#");
} else if ("tr".equalsIgnoreCase(language)) {
return TurkishAnalyzer.loadStopwordSet(false, TurkishAnalyzer.class, "stopwords.txt", "#");
} else if ("ru".equalsIgnoreCase(language)) {
return WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, "russian_stop.txt", StandardCharsets.UTF_8));
} else if ("ro".equalsIgnoreCase(language)) {
return RomanianAnalyzer.loadStopwordSet(false, RomanianAnalyzer.class, "stopwords.txt", "#");
} else if ("bg".equalsIgnoreCase(language)) {
return BulgarianAnalyzer.loadStopwordSet(false, BulgarianAnalyzer.class, "stopwords.txt", "#");
} else if ("nl".equalsIgnoreCase(language)) {
return WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, "dutch_stop.txt", StandardCharsets.UTF_8));
}
} catch (Exception ignored) {
throw new RuntimeException("Unable to load default stopword set");
}
return StandardAnalyzer.STOP_WORDS_SET;
}
示例3: create
import org.apache.lucene.analysis.snowball.SnowballFilter; //导入依赖的package包/类
@Override
public TokenStream create(TokenStream tokenStream) {
return new SnowballFilter(tokenStream, "Russian");
}
示例4: create
import org.apache.lucene.analysis.snowball.SnowballFilter; //导入依赖的package包/类
@Override
public TokenStream create(TokenStream tokenStream) {
return new SnowballFilter(tokenStream, language);
}
示例5: testBasic
import org.apache.lucene.analysis.snowball.SnowballFilter; //导入依赖的package包/类
public void testBasic() throws IOException {
TokenStream ts = new RemoveDuplicatesTokenFilter(new SnowballFilter(new KeywordRepeatFilter(
new MockTokenizer(new StringReader("the birds are flying"), MockTokenizer.WHITESPACE, false)), "English"));
assertTokenStreamContents(ts, new String[] { "the", "birds", "bird", "are", "flying", "fli"}, new int[] {1,1,0,1,1,0});
}
示例6: testComposition
import org.apache.lucene.analysis.snowball.SnowballFilter; //导入依赖的package包/类
public void testComposition() throws IOException {
TokenStream ts = new RemoveDuplicatesTokenFilter(new SnowballFilter(new KeywordRepeatFilter(new KeywordRepeatFilter(
new MockTokenizer(new StringReader("the birds are flying"), MockTokenizer.WHITESPACE, false))), "English"));
assertTokenStreamContents(ts, new String[] { "the", "birds", "bird", "are", "flying", "fli"}, new int[] {1,1,0,1,1,0});
}
示例7: create
import org.apache.lucene.analysis.snowball.SnowballFilter; //导入依赖的package包/类
@Override
public TokenStream create(TokenStream tokenStream) {
return new SnowballFilter(tokenStream, "French");
}