本文整理汇总了Java中org.apache.lucene.analysis.StopFilter.makeStopSet方法的典型用法代码示例。如果您正苦于以下问题:Java StopFilter.makeStopSet方法的具体用法?Java StopFilter.makeStopSet怎么用?Java StopFilter.makeStopSet使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.analysis.StopFilter
的用法示例。
在下文中一共展示了StopFilter.makeStopSet方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: TibetanAnalyzer
import org.apache.lucene.analysis.StopFilter; //导入方法依赖的package包/类
/**
* Creates a new {@link TibetanAnalyzer}
*
* @param segmentInWords if the segmentation is on words instead of syllables
* @param lemmatize if the analyzer should remove affixed particles, and normalize words in words mode
* @param filterChars if the text should be converted to NFD (necessary for texts containing NFC strings)
* @param inputMethod if the text should be converted from EWTS to Unicode
* @param stopFilename a file name with a stop word list
* @throws IOException if the file containing stopwords can't be opened
*/
public TibetanAnalyzer(boolean segmentInWords, boolean lemmatize, boolean filterChars, String inputMethod, String stopFilename) throws IOException {
this.segmentInWords = segmentInWords;
this.lemmatize = lemmatize;
this.filterChars = filterChars;
this.inputMethod = inputMethod;
if (stopFilename != null) {
if (stopFilename.isEmpty()) {
InputStream stream = null;
stream = TibetanAnalyzer.class.getResourceAsStream("/bo-stopwords.txt");
if (stream == null) { // we're not using the jar, there is no resource, assuming we're running the code
this.tibStopSet = null;
} else {
this.tibStopSet = StopFilter.makeStopSet(getWordList(stream, "#"));
}
} else {
this.tibStopSet = StopFilter.makeStopSet(getWordList(new FileInputStream(stopFilename), "#"));
}
} else {
this.tibStopSet = null;
}
}
示例2: testFillerToken
import org.apache.lucene.analysis.StopFilter; //导入方法依赖的package包/类
public void testFillerToken() throws IOException {
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromClassPath(createTempDir(), RESOURCE);
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("shingle_filler");
String source = "simon the sorcerer";
String[] expected = new String[]{"simon FILLER", "simon FILLER sorcerer", "FILLER sorcerer"};
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
TokenStream stream = new StopFilter(tokenizer, StopFilter.makeStopSet("the"));
assertTokenStreamContents(tokenFilter.create(stream), expected);
}
示例3: stopwordFilterTest
import org.apache.lucene.analysis.StopFilter; //导入方法依赖的package包/类
@Test
public void stopwordFilterTest() throws IOException
{
System.out.println("Testing TibetanAnalyzer.tibStopWords");
String input = "ཧ་ཏུ་གི་ཀྱི་གིས་ཀྱིས་ཡིས་ཀྱང་སྟེ་ཏེ་མམ་རམ་སམ་ཏམ་ནོ་བོ་ཏོ་གིན་ཀྱིན་གྱིན་ཅིང་ཅིག་ཅེས་ཞེས་ཧ།";
Reader reader = new StringReader(input);
List<String> expected = Arrays.asList("ཧ", "ཧ");
System.out.print(input + " => ");
TokenStream syllables = tokenize(reader, new TibSyllableTokenizer());
CharArraySet stopSet = StopFilter.makeStopSet(TibetanAnalyzer.getWordList(new FileInputStream("src/main/resources/bo-stopwords.txt"), "#"));
StopFilter res = new StopFilter(syllables, stopSet);
assertTokenStream(res, expected);
}
示例4: GermanAnalyzer
import org.apache.lucene.analysis.StopFilter; //导入方法依赖的package包/类
/**
* Builds an analyzer with the given stop words.
*/
public GermanAnalyzer(String[] stopwords) {
stopSet = StopFilter.makeStopSet(stopwords);
}
示例5: setStemExclusionTable
import org.apache.lucene.analysis.StopFilter; //导入方法依赖的package包/类
/**
* Builds an exclusionlist from an array of Strings.
*/
public void setStemExclusionTable(String[] exclusionlist) {
exclusionSet = StopFilter.makeStopSet(exclusionlist);
}
示例6: AlfrescoStandardAnalyser
import org.apache.lucene.analysis.StopFilter; //导入方法依赖的package包/类
/** Builds an analyzer with the given stop words. */
public AlfrescoStandardAnalyser(String[] stopWords)
{
stopSet = StopFilter.makeStopSet(stopWords);
}
示例7: SpellWritingAnalyzer
import org.apache.lucene.analysis.StopFilter; //导入方法依赖的package包/类
/**
* Builds an analyzer which writes to the given spelling dictionary, using the
* given stop words.
*/
public SpellWritingAnalyzer(String[] stopWords, SpellWriter spellWriter)
{
this(StopFilter.makeStopSet(stopWords), spellWriter);
}
示例8: CJKAnalyzer
import org.apache.lucene.analysis.StopFilter; //导入方法依赖的package包/类
/**
* Builds an analyzer which removes words in {@link #STOP_WORDS}.
*/
public CJKAnalyzer() {
stopTable = StopFilter.makeStopSet(STOP_WORDS);
}
示例9: StopAnalyzer2
import org.apache.lucene.analysis.StopFilter; //导入方法依赖的package包/类
public StopAnalyzer2(String[] stopWords) {
this.stopWords = StopFilter.makeStopSet(stopWords);
}
示例10: StopAnalyzer1
import org.apache.lucene.analysis.StopFilter; //导入方法依赖的package包/类
public StopAnalyzer1(String[] stopWords) {
this.stopWords = StopFilter.makeStopSet(stopWords);
}
示例11: StopAnalyzerFlawed
import org.apache.lucene.analysis.StopFilter; //导入方法依赖的package包/类
public StopAnalyzerFlawed(String[] stopWords) {
this.stopWords = StopFilter.makeStopSet(stopWords);
}