本文整理汇总了Java中org.apache.lucene.analysis.TokenFilter类的典型用法代码示例。如果您正苦于以下问题:Java TokenFilter类的具体用法?Java TokenFilter怎么用?Java TokenFilter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
TokenFilter类属于org.apache.lucene.analysis包,在下文中一共展示了TokenFilter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testInvalidOffset
import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
public void testInvalidOffset() throws Exception {
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
filters = new WordTokenFilter(filters);
return new TokenStreamComponents(tokenizer, filters);
}
};
assertAnalyzesTo(analyzer, "mosfellsbær",
new String[] { "mosfellsbaer" },
new int[] { 0 },
new int[] { 11 });
}
示例2: testInvalidOffsets
import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
public void testInvalidOffsets() throws Exception {
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
filters = new NGramTokenFilter(filters, 2, 2);
return new TokenStreamComponents(tokenizer, filters);
}
};
assertAnalyzesTo(analyzer, "mosfellsbær",
new String[] { "mo", "os", "sf", "fe", "el", "ll", "ls", "sb", "ba", "ae", "er" },
new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
new int[] { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 },
new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 });
}
示例3: testFirstPosInc
import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
public void testFirstPosInc() throws Exception {
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
TokenFilter filter = new MockSynonymFilter(tokenizer);
StopFilter stopfilter = new StopFilter(Version.LUCENE_4_3, filter, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
stopfilter.setEnablePositionIncrements(false);
return new TokenStreamComponents(tokenizer, stopfilter);
}
};
assertAnalyzesTo(analyzer, "the quick brown fox",
new String[] { "hte", "quick", "brown", "fox" },
new int[] { 1, 1, 1, 1} );
}
示例4: create
import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
@Override
public TokenStream create(TokenStream input) {
return new TokenFilter(input) {
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
try {
throw exceptionClass.newInstance();
} catch (IllegalAccessException iae) {
throw new RuntimeException(iae);
} catch (InstantiationException ie) {
throw new RuntimeException(ie);
}
}
return false;
}
};
}
示例5: annotate
import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
@Override
public List<Annotation> annotate(String text) throws Exception {
text = SimpleTokenizer.format(text);
Analyzer analyser = new EnglishAnalyzer(Version.LUCENE_47, CharArraySet.EMPTY_SET);
TokenFilter filter = new EnglishMinimalStemFilter(analyser.tokenStream("text", new StringReader(text)));
List<Annotation> out = Lists.newArrayList();
while (filter.incrementToken()) {
CharTermAttribute az = filter.getAttribute(CharTermAttribute.class);
OffsetAttribute o = filter.getAttribute(OffsetAttribute.class);
String token = text.substring(o.startOffset(), o.endOffset());
String lemma = az.toString();
Annotation t = new Annotation();
t.setForm(token);
t.setLemma(lemma);
out.add(t);
}
if (out.size() == 0) {
log.debug("Input string is empty");
}
filter.close();
analyser.close();
return out;
}
示例6: testFirstPosInc
import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
public void testFirstPosInc() throws Exception {
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
TokenFilter filter = new MockSynonymFilter(tokenizer);
StopFilter stopfilter = new StopFilter(TEST_VERSION_CURRENT, filter, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
stopfilter.setEnablePositionIncrements(false);
return new TokenStreamComponents(tokenizer, stopfilter);
}
};
assertAnalyzesTo(analyzer, "the quick brown fox",
new String[] { "hte", "quick", "brown", "fox" },
new int[] { 1, 1, 1, 1} );
}
示例7: testInvalidOffsets
import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
public void testInvalidOffsets() throws Exception {
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
filters = new NGramTokenFilter(TEST_VERSION_CURRENT, filters, 2, 2);
return new TokenStreamComponents(tokenizer, filters);
}
};
assertAnalyzesTo(analyzer, "mosfellsbær",
new String[] { "mo", "os", "sf", "fe", "el", "ll", "ls", "sb", "ba", "ae", "er" },
new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
new int[] { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 },
new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 });
}
示例8: testFirstPosInc
import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
public void testFirstPosInc() throws Exception {
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
TokenFilter filter = new MockSynonymFilter(tokenizer);
StopFilter stopfilter = new StopFilter(Version.LUCENE_43, filter, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
stopfilter.setEnablePositionIncrements(false);
return new TokenStreamComponents(tokenizer, stopfilter);
}
};
assertAnalyzesTo(analyzer, "the quick brown fox",
new String[] { "hte", "quick", "brown", "fox" },
new int[] { 1, 1, 1, 1} );
}
示例9: create
import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
@Override
public TokenFilter create(TokenStream input) {
if (luceneMatchVersion == null) {
return new NGramTokenFilter(input, minGramSize, maxGramSize);
}
return new NGramTokenFilter(luceneMatchVersion, input, minGramSize, maxGramSize);
}
示例10: create
import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
@Override
public TokenFilter create(TokenStream input) {
if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0)) {
return new HyphenationCompoundWordTokenFilter(input, hyphenator, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
}
return new Lucene43HyphenationCompoundWordTokenFilter(input, hyphenator, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
}
示例11: create
import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
@Override
public TokenFilter create(TokenStream input) {
if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_8_0)) {
return new WordDelimiterFilter(luceneMatchVersion, input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable,
flags, protectedWords);
} else {
return new Lucene47WordDelimiterFilter(input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable,
flags, protectedWords);
}
}
示例12: affixedFilterTest
import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
@Test
public void affixedFilterTest() throws IOException
{
System.out.println("Testing TibAffixedFilter()");
String input = "དག། གའམ། གའིའོ། དགའ། དགའི། དགའོ། དགའིས། དགའང་། དགའམ། དགའིའོ།";
Reader reader = new StringReader(input);
List<String> expected = Arrays.asList("དག", "ག", "ག", "དགའ", "དགའ", "དགའ", "དགའ", "དགའ", "དགའ", "དགའ");
System.out.print(input + " => ");
TokenStream syllables = tokenize(reader, new TibSyllableTokenizer());
TokenFilter res = new TibAffixedFilter(syllables);
assertTokenStream(res, expected);
}
示例13: createComponents
import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName) {
JiebaTokenizer tokenizer = new JiebaTokenizer();
if (userDictIn != null) {
try {
tokenizer.loadUserDict(userDictIn);
} catch (IOException e) {
throw new RuntimeException("load user dict error");
}
}
TokenFilter stopFilter = new JiebaStopTokenFilter(tokenizer);
return new TokenStreamComponents(tokenizer, stopFilter);
}
示例14: createComponents
import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
final AutocompleteTokenizer tokenizer = new AutocompleteTokenizer(reader);
TokenFilter filter = new StandardFilter(tokenizer);
return new TokenStreamComponents(tokenizer, filter);
}
示例15: spellcheckAnalyzer
import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
@NotNull
private static Analyzer spellcheckAnalyzer(@NotNull final SpellChecker spellChecker) {
return new Analyzer() {
@Override
protected TokenStreamComponents createComponents(@NotNull final String field) {
final Tokenizer source = new WhitespaceTokenizer();
source.setReader(new StringReader(field));
final SpellCheckerTokenFilter spellCheckFilter = new SpellCheckerTokenFilter(defaultTokenFilter(source), spellChecker);
final TokenFilter concatenatingFilter = new ConcatenatingFilter(spellCheckFilter, ' ');
return new TokenStreamComponents(source, concatenatingFilter);
}
};
}