本文整理匯總了Java中org.apache.lucene.analysis.core.StopFilter類的典型用法代碼示例。如果您正苦於以下問題:Java StopFilter類的具體用法?Java StopFilter怎麽用?Java StopFilter使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
StopFilter類屬於org.apache.lucene.analysis.core包,在下文中一共展示了StopFilter類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: getWordSet
import org.apache.lucene.analysis.core.StopFilter; //導入依賴的package包/類
/**
* Returns as {@link CharArraySet} from wordFiles, which
* can be a comma-separated list of filenames
*/
protected final CharArraySet getWordSet(ResourceLoader loader,
String wordFiles, boolean ignoreCase) throws IOException {
assureMatchVersion();
List<String> files = splitFileNames(wordFiles);
CharArraySet words = null;
if (files.size() > 0) {
// default stopwords list has 35 or so words, but maybe don't make it that
// big to start
words = new CharArraySet(luceneMatchVersion,
files.size() * 10, ignoreCase);
for (String file : files) {
List<String> wlist = getLines(loader, file.trim());
words.addAll(StopFilter.makeStopSet(luceneMatchVersion, wlist,
ignoreCase));
}
}
return words;
}
示例2: createComponents
import org.apache.lucene.analysis.core.StopFilter; //導入依賴的package包/類
/**
* Creates a token stream that tokenizes the given string into token terms
* (aka words).
*
* @param fieldName
* the name of the field to tokenize (currently ignored).
* @param reader
* reader (e.g. charfilter) of the original text. can be null.
* @param text
* the string to tokenize
* @return a new token stream
*/
public TokenStreamComponents createComponents(String fieldName, Reader reader, String text) {
// Ideally the Analyzer superclass should have a method with the same signature,
// with a default impl that simply delegates to the StringReader flavour.
if (reader == null)
reader = new FastStringReader(text);
if (pattern == NON_WORD_PATTERN) { // fast path
return new TokenStreamComponents(new FastStringTokenizer(reader, true, toLowerCase, stopWords));
} else if (pattern == WHITESPACE_PATTERN) { // fast path
return new TokenStreamComponents(new FastStringTokenizer(reader, false, toLowerCase, stopWords));
}
Tokenizer tokenizer = new PatternTokenizer(reader, pattern, toLowerCase);
TokenStream result = (stopWords != null) ? new StopFilter(matchVersion, tokenizer, stopWords) : tokenizer;
return new TokenStreamComponents(tokenizer, result);
}
示例3: testEndingHole
import org.apache.lucene.analysis.core.StopFilter; //導入依賴的package包/類
public void testEndingHole() throws Exception {
// Just deletes "of"
Analyzer a = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String field, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader);
CharArraySet stopSet = StopFilter.makeStopSet("of");
return new TokenStreamComponents(tokenizer, new StopFilter(tokenizer, stopSet));
}
};
Iterable<Input> keys = AnalyzingSuggesterTest.shuffle(
new Input("wizard of oz", 50)
);
FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20);
sug.build(new InputArrayIterator(keys));
assertEquals("wizard _ oz/1.00",
toString(sug.lookup("wizard of", 10)));
// Falls back to unigram model, with backoff 0.4 times
// prop 0.5:
assertEquals("oz/0.20",
toString(sug.lookup("wizard o", 10)));
}
示例4: testTwoEndingHoles
import org.apache.lucene.analysis.core.StopFilter; //導入依賴的package包/類
public void testTwoEndingHoles() throws Exception {
// Just deletes "of"
Analyzer a = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String field, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader);
CharArraySet stopSet = StopFilter.makeStopSet("of");
return new TokenStreamComponents(tokenizer, new StopFilter(tokenizer, stopSet));
}
};
Iterable<Input> keys = AnalyzingSuggesterTest.shuffle(
new Input("wizard of of oz", 50)
);
FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20);
sug.build(new InputArrayIterator(keys));
assertEquals("",
toString(sug.lookup("wizard of of", 10)));
}
示例5: testEndIsStopWord
import org.apache.lucene.analysis.core.StopFilter; //導入依賴的package包/類
public void testEndIsStopWord() throws Exception {
CharArraySet stopWords = StopFilter.makeStopSet("to");
TokenStream stream = new MockTokenizer(new StringReader("go to "));
TokenStream filter = new SuggestStopFilter(stream, stopWords);
filter = new SuggestStopFilter(stream, stopWords);
assertTokenStreamContents(filter,
new String[] {"go"},
new int[] {0},
new int[] {2},
null,
new int[] {1},
null,
6,
new boolean[] {false},
true);
}
示例6: testMidStopWord
import org.apache.lucene.analysis.core.StopFilter; //導入依賴的package包/類
public void testMidStopWord() throws Exception {
CharArraySet stopWords = StopFilter.makeStopSet("to");
TokenStream stream = new MockTokenizer(new StringReader("go to school"));
TokenStream filter = new SuggestStopFilter(stream, stopWords);
filter = new SuggestStopFilter(stream, stopWords);
assertTokenStreamContents(filter,
new String[] {"go", "school"},
new int[] {0, 6},
new int[] {2, 12},
null,
new int[] {1, 2},
null,
12,
new boolean[] {false, false},
true);
}
示例7: testMultipleStopWords
import org.apache.lucene.analysis.core.StopFilter; //導入依賴的package包/類
public void testMultipleStopWords() throws Exception {
CharArraySet stopWords = StopFilter.makeStopSet("to", "the", "a");
TokenStream stream = new MockTokenizer(new StringReader("go to a the school"));
TokenStream filter = new SuggestStopFilter(stream, stopWords);
filter = new SuggestStopFilter(stream, stopWords);
assertTokenStreamContents(filter,
new String[] { "go", "school" },
new int[] {0, 12},
new int[] {2, 18},
null,
new int[] {1, 4},
null,
18,
new boolean[] {false, false},
true);
}
示例8: testMultipleStopWordsEnd
import org.apache.lucene.analysis.core.StopFilter; //導入依賴的package包/類
public void testMultipleStopWordsEnd() throws Exception {
CharArraySet stopWords = StopFilter.makeStopSet("to", "the", "a");
TokenStream stream = new MockTokenizer(new StringReader("go to a the"));
TokenStream filter = new SuggestStopFilter(stream, stopWords);
filter = new SuggestStopFilter(stream, stopWords);
assertTokenStreamContents(filter,
new String[] { "go", "the"},
new int[] {0, 8},
new int[] {2, 11},
null,
new int[] {1, 3},
null,
11,
new boolean[] {false, true},
true);
}
示例9: testMultipleStopWordsEnd2
import org.apache.lucene.analysis.core.StopFilter; //導入依賴的package包/類
public void testMultipleStopWordsEnd2() throws Exception {
CharArraySet stopWords = StopFilter.makeStopSet("to", "the", "a");
TokenStream stream = new MockTokenizer(new StringReader("go to a the "));
TokenStream filter = new SuggestStopFilter(stream, stopWords);
filter = new SuggestStopFilter(stream, stopWords);
assertTokenStreamContents(filter,
new String[] { "go"},
new int[] {0},
new int[] {2},
null,
new int[] {1},
null,
12,
new boolean[] {false},
true);
}
示例10: getWordSet
import org.apache.lucene.analysis.core.StopFilter; //導入依賴的package包/類
private CharArraySet getWordSet( ResourceLoader loader,
String wordFiles, boolean ignoreCase)
throws IOException {
List<String> files = splitFileNames(wordFiles);
CharArraySet words = null;
if (files.size() > 0) {
// default stopwords list has 35 or so words, but maybe don't make it that
// big to start
words = new CharArraySet( files.size() * 10, ignoreCase);
for (String file : files) {
List<String> wlist = getLines(loader, file.trim());
words.addAll(StopFilter.makeStopSet( wlist, ignoreCase));
}
}
return words;
}
示例11: getWordSet
import org.apache.lucene.analysis.core.StopFilter; //導入依賴的package包/類
/**
* Returns as {@link CharArraySet} from wordFiles, which
* can be a comma-separated list of filenames
*/
protected CharArraySet getWordSet(ResourceLoader loader,
String wordFiles, boolean ignoreCase) throws IOException {
assureMatchVersion();
List<String> files = splitFileNames(wordFiles);
CharArraySet words = null;
if (files.size() > 0) {
// default stopwords list has 35 or so words, but maybe don't make it that
// big to start
words = new CharArraySet(luceneMatchVersion,
files.size() * 10, ignoreCase);
for (String file : files) {
List<String> wlist = getLines(loader, file.trim());
words.addAll(StopFilter.makeStopSet(luceneMatchVersion, wlist,
ignoreCase));
}
}
return words;
}
示例12: main
import org.apache.lucene.analysis.core.StopFilter; //導入依賴的package包/類
public static void main(String[] args) throws IOException {
String theSentence =
"this is the scientific article about chemicals like H20 C2H50H with concentration "
+ "of 3.99 kilograms and 0,123 micrograms also i have some CO2 gas n=3 x=45";
StringReader reader = new StringReader(theSentence);
Tokenizer whitespaceTokenizer = new WhitespaceTokenizer(reader);
TokenStream tokenStream =
new StopFilter(whitespaceTokenizer, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
tokenStream = new ScientificFiltering(tokenStream);
final CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
tokenStream.reset();
while (tokenStream.incrementToken()) {
System.out.println(charTermAttribute.toString());
}
tokenStream.end();
tokenStream.close();
}
開發者ID:MysterionRise,項目名稱:information-retrieval-adventure,代碼行數:22,代碼來源:SkippingNumbersPreservingChemicals.java
示例13: testEndingHole
import org.apache.lucene.analysis.core.StopFilter; //導入依賴的package包/類
public void testEndingHole() throws Exception {
// Just deletes "of"
Analyzer a = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String field, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader);
CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "of");
return new TokenStreamComponents(tokenizer, new StopFilter(TEST_VERSION_CURRENT, tokenizer, stopSet));
}
};
Iterable<Input> keys = shuffle(
new Input("wizard of oz", 50)
);
FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20);
sug.build(new InputArrayIterator(keys));
assertEquals("wizard _ oz/1.00",
toString(sug.lookup("wizard of", 10)));
// Falls back to unigram model, with backoff 0.4 times
// prop 0.5:
assertEquals("oz/0.20",
toString(sug.lookup("wizard o", 10)));
}
示例14: testTwoEndingHoles
import org.apache.lucene.analysis.core.StopFilter; //導入依賴的package包/類
public void testTwoEndingHoles() throws Exception {
// Just deletes "of"
Analyzer a = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String field, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader);
CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "of");
return new TokenStreamComponents(tokenizer, new StopFilter(TEST_VERSION_CURRENT, tokenizer, stopSet));
}
};
Iterable<Input> keys = shuffle(
new Input("wizard of of oz", 50)
);
FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20);
sug.build(new InputArrayIterator(keys));
assertEquals("",
toString(sug.lookup("wizard of of", 10)));
}
示例15: testEndIsStopWord
import org.apache.lucene.analysis.core.StopFilter; //導入依賴的package包/類
public void testEndIsStopWord() throws Exception {
CharArraySet stopWords = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "to");
TokenStream stream = new MockTokenizer(new StringReader("go to "));
TokenStream filter = new SuggestStopFilter(stream, stopWords);
filter = new SuggestStopFilter(stream, stopWords);
assertTokenStreamContents(filter,
new String[] {"go"},
new int[] {0},
new int[] {2},
null,
new int[] {1},
null,
6,
new boolean[] {false},
true);
}