本文整理汇总了Java中org.apache.lucene.analysis.StopFilter类的典型用法代码示例。如果您正苦于以下问题:Java StopFilter类的具体用法?Java StopFilter怎么用?Java StopFilter使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
StopFilter类属于org.apache.lucene.analysis包,在下文中一共展示了StopFilter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testCorrectPositionIncrementSetting
import org.apache.lucene.analysis.StopFilter; //导入依赖的package包/类
public void testCorrectPositionIncrementSetting() throws IOException {
Builder builder = Settings.builder().put("index.analysis.filter.my_stop.type", "stop");
if (random().nextBoolean()) {
builder.put("index.analysis.filter.my_stop.version", Version.LATEST);
} else {
// don't specify
}
builder.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString());
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(builder.build());
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_stop");
assertThat(tokenFilter, instanceOf(StopTokenFilterFactory.class));
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader("foo bar"));
TokenStream create = tokenFilter.create(tokenizer);
assertThat(create, instanceOf(StopFilter.class));
}
示例2: TibetanAnalyzer
import org.apache.lucene.analysis.StopFilter; //导入依赖的package包/类
/**
* Creates a new {@link TibetanAnalyzer}
*
* @param segmentInWords if the segmentation is on words instead of syllables
* @param lemmatize if the analyzer should remove affixed particles, and normalize words in words mode
* @param filterChars if the text should be converted to NFD (necessary for texts containing NFC strings)
* @param inputMethod if the text should be converted from EWTS to Unicode
* @param stopFilename a file name with a stop word list
* @throws IOException if the file containing stopwords can't be opened
*/
public TibetanAnalyzer(boolean segmentInWords, boolean lemmatize, boolean filterChars, String inputMethod, String stopFilename) throws IOException {
this.segmentInWords = segmentInWords;
this.lemmatize = lemmatize;
this.filterChars = filterChars;
this.inputMethod = inputMethod;
if (stopFilename != null) {
if (stopFilename.isEmpty()) {
InputStream stream = null;
stream = TibetanAnalyzer.class.getResourceAsStream("/bo-stopwords.txt");
if (stream == null) { // we're not using the jar, there is no resource, assuming we're running the code
this.tibStopSet = null;
} else {
this.tibStopSet = StopFilter.makeStopSet(getWordList(stream, "#"));
}
} else {
this.tibStopSet = StopFilter.makeStopSet(getWordList(new FileInputStream(stopFilename), "#"));
}
} else {
this.tibStopSet = null;
}
}
示例3: reusableTokenStream
import org.apache.lucene.analysis.StopFilter; //导入依赖的package包/类
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
class SavedStreams {
StandardTokenizer tokenStream;
TokenStream filteredTokenStream;
}
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
if (streams == null) {
streams = new SavedStreams();
setPreviousTokenStream(streams);
streams.tokenStream = new StandardTokenizer(LUCENE_VERSION, reader);
streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
streams.filteredTokenStream = new StopFilter(true, streams.filteredTokenStream, STOP_WORDS_SET);
streams.filteredTokenStream = new ASCIIFoldingFilter(streams.filteredTokenStream);
} else {
streams.tokenStream.reset(reader);
}
streams.tokenStream.setMaxTokenLength(DEFAULT_MAX_TOKEN_LENGTH);
return streams.filteredTokenStream;
}
示例4: createDict
import org.apache.lucene.analysis.StopFilter; //导入依赖的package包/类
/**
* Read a Lucene index and make a spelling dictionary from it. A minimal token
* analyzer will be used, which is usually just what is needed for the
* dictionary. The default set of English stop words will be used (see
* {@link StopAnalyzer#ENGLISH_STOP_WORDS}).
*
* @param indexDir directory containing the Lucene index
* @param dictDir directory to receive the spelling dictionary
* @param prog tracker called periodically to display progress
*/
public static void createDict(Directory indexDir,
File dictDir,
ProgressTracker prog)
throws IOException
{
// Open and clear the dictionary (since we're going to totally rebuild it)
SpellWriter spellWriter = SpellWriter.open(dictDir);
spellWriter.clearDictionary();
spellWriter.setStopwords(StopFilter.makeStopSet(StopAnalyzer.ENGLISH_STOP_WORDS));
// Now re-tokenize all the fields and queue the words for the dictionary.
IndexReader indexReader = IndexReader.open(indexDir);
createDict(indexReader, new MinimalAnalyzer(), spellWriter, prog);
// All done.
spellWriter.close();
}
示例5: reusableTokenStream
import org.apache.lucene.analysis.StopFilter; //导入依赖的package包/类
/**
*
* @param fieldName
* @param reader
* @return
* @throws IOException
*/
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws
IOException {
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
if (streams == null) {
streams = new SavedStreams();
streams.source = new WhitespaceTokenizer(reader);
streams.result = new StopFilter(true, streams.source, stopwords, true);
streams.result = new PorterStemFilter(streams.source);
setPreviousTokenStream(streams);
} else {
streams.source.reset(reader);
}
return streams.result;
}
示例6: reusableTokenStream
import org.apache.lucene.analysis.StopFilter; //导入依赖的package包/类
/**
*
* @param fieldName
* @param reader
* @return
* @throws IOException
*/
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws
IOException {
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
if (streams == null) {
streams = new SavedStreams();
streams.source = new LowerCaseTokenizer(reader);
streams.result = new StopFilter(true, streams.source, stopWords, true);
// streams.result = new PorterStemFilter(streams.source);
setPreviousTokenStream(streams);
} else {
streams.source.reset(reader);
}
return streams.result;
}
示例7: reusableTokenStream
import org.apache.lucene.analysis.StopFilter; //导入依赖的package包/类
/**
*
* @param fieldName
* @param reader
* @return
* @throws IOException
*/
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws
IOException {
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
if (streams == null) {
streams = new SavedStreams();
streams.source = new WhitespaceTokenizer(reader);
streams.result = new StopFilter(true, streams.source, stopWords, true);
// streams.result = new PorterStemFilter(streams.source);
setPreviousTokenStream(streams);
} else {
streams.source.reset(reader);
}
return streams.result;
}
示例8: createComponents
import org.apache.lucene.analysis.StopFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
Analysis in;
try {
in = analysis.getConstructor(Reader.class).newInstance(reader);
} catch (Exception e) {
throw new RuntimeException("Ansj analysis can't be instance!");
}
final Tokenizer source = new AnsjTokenizer(reader,in);
TokenStreamComponents result;
if (stopwords.isEmpty()) {
result = new TokenStreamComponents(source);
} else {
result = new TokenStreamComponents(source,new StopFilter(matchVersion, source, stopwords));
}
return result;
}
示例9: create
import org.apache.lucene.analysis.StopFilter; //导入依赖的package包/类
@Override
public TokenStream create(TokenStream tokenStream) {
if (removeTrailing) {
return new StopFilter(tokenStream, stopWords);
} else {
return new SuggestStopFilter(tokenStream, stopWords);
}
}
示例10: testFillerToken
import org.apache.lucene.analysis.StopFilter; //导入依赖的package包/类
public void testFillerToken() throws IOException {
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromClassPath(createTempDir(), RESOURCE);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("shingle_filler");
String source = "simon the sorcerer";
String[] expected = new String[]{"simon FILLER", "simon FILLER sorcerer", "FILLER sorcerer"};
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
TokenStream stream = new StopFilter(tokenizer, StopFilter.makeStopSet("the"));
assertTokenStreamContents(tokenFilter.create(stream), expected);
}
示例11: stopwordFilterTest
import org.apache.lucene.analysis.StopFilter; //导入依赖的package包/类
@Test
public void stopwordFilterTest() throws IOException
{
System.out.println("Testing TibetanAnalyzer.tibStopWords");
String input = "ཧ་ཏུ་གི་ཀྱི་གིས་ཀྱིས་ཡིས་ཀྱང་སྟེ་ཏེ་མམ་རམ་སམ་ཏམ་ནོ་བོ་ཏོ་གིན་ཀྱིན་གྱིན་ཅིང་ཅིག་ཅེས་ཞེས་ཧ།";
Reader reader = new StringReader(input);
List<String> expected = Arrays.asList("ཧ", "ཧ");
System.out.print(input + " => ");
TokenStream syllables = tokenize(reader, new TibSyllableTokenizer());
CharArraySet stopSet = StopFilter.makeStopSet(TibetanAnalyzer.getWordList(new FileInputStream("src/main/resources/bo-stopwords.txt"), "#"));
StopFilter res = new StopFilter(syllables, stopSet);
assertTokenStream(res, expected);
}
示例12: getTokenFilters
import org.apache.lucene.analysis.StopFilter; //导入依赖的package包/类
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
return Collections.singletonMap("benz_cjk", (indexSettings, environment, name, settings) -> new AbstractTokenFilterFactory(indexSettings, name, settings) {
@Override
public TokenStream create(TokenStream tokenStream) {
return new StopFilter(tokenStream, config.getStopWords());
}
});
}
示例13: create
import org.apache.lucene.analysis.StopFilter; //导入依赖的package包/类
@Override
public TokenStream create(final TokenStream tokenStream) {
if (removeTrailing) {
return new StopFilter(tokenStream, stopWords);
} else {
return new SuggestStopFilter(tokenStream, stopWords);
}
}
示例14: tokenStream
import org.apache.lucene.analysis.StopFilter; //导入依赖的package包/类
@Override
public final TokenStream tokenStream(String fieldName, Reader reader) {
// Apply stop words and porter stemmer using a lower-case tokenizer.
TokenStream stream = new StopFilter(new LowerCaseTokenizer(reader),
StandardAnalyzer.STOP_WORDS);
return new PorterStemFilter(stream);
}
示例15: tokenStream
import org.apache.lucene.analysis.StopFilter; //导入依赖的package包/类
@Override
public final TokenStream tokenStream(String fieldName, Reader reader) {
// Apply stop words and porter stemmer using a lower-case tokenizer.
TokenStream stream = new StopFilter(new LowerCaseTokenizer(reader),
StandardAnalyzer.STOP_WORDS);
return new PorterStemFilter(stream);
}