当前位置: 首页>>代码示例>>Java>>正文


Java TokenFilter类代码示例

本文整理汇总了Java中org.apache.lucene.analysis.TokenFilter的典型用法代码示例。如果您正苦于以下问题:Java TokenFilter类的具体用法?Java TokenFilter怎么用?Java TokenFilter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


TokenFilter类属于org.apache.lucene.analysis包,在下文中一共展示了TokenFilter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testInvalidOffset

import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
public void testInvalidOffset() throws Exception {
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
      filters = new WordTokenFilter(filters);
      return new TokenStreamComponents(tokenizer, filters);
    }
  };
  
  assertAnalyzesTo(analyzer, "mosfellsbær", 
      new String[] { "mosfellsbaer" },
      new int[]    { 0 },
      new int[]    { 11 });
}
 
开发者ID:europeana,项目名称:search,代码行数:17,代码来源:TestSmartChineseAnalyzer.java

示例2: testInvalidOffsets

import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
public void testInvalidOffsets() throws Exception {
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
      filters = new NGramTokenFilter(filters, 2, 2);
      return new TokenStreamComponents(tokenizer, filters);
    }
  };
  assertAnalyzesTo(analyzer, "mosfellsbær",
      new String[] { "mo", "os", "sf", "fe", "el", "ll", "ls", "sb", "ba", "ae", "er" },
      new int[]    {    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0 },
      new int[]    {   11,   11,   11,   11,   11,   11,   11,   11,   11,   11,   11 },
      new int[]    {     1,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0  });
}
 
开发者ID:europeana,项目名称:search,代码行数:17,代码来源:NGramTokenFilterTest.java

示例3: testFirstPosInc

import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
public void testFirstPosInc() throws Exception {
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      TokenFilter filter = new MockSynonymFilter(tokenizer);
      StopFilter stopfilter = new StopFilter(Version.LUCENE_4_3, filter, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
      stopfilter.setEnablePositionIncrements(false);
      return new TokenStreamComponents(tokenizer, stopfilter);
    }
  };
  
  assertAnalyzesTo(analyzer, "the quick brown fox",
      new String[] { "hte", "quick", "brown", "fox" },
      new int[] { 1, 1, 1, 1} );
}
 
开发者ID:europeana,项目名称:search,代码行数:17,代码来源:TestStopFilter.java

示例4: create

import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
@Override
public TokenStream create(TokenStream input) {
  return new TokenFilter(input) {
    @Override
    public boolean incrementToken() throws IOException {
      if (input.incrementToken()) {
        try {
          throw exceptionClass.newInstance();
        } catch (IllegalAccessException iae) {
          throw new RuntimeException(iae);
        } catch (InstantiationException ie) {
          throw new RuntimeException(ie);
        }
      }
      return false;
    }
  };
}
 
开发者ID:europeana,项目名称:search,代码行数:19,代码来源:ThrowingMockTokenFilterFactory.java

示例5: annotate

import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
@Override
public List<Annotation> annotate(String text) throws Exception {
	text = SimpleTokenizer.format(text);
	Analyzer analyser = new EnglishAnalyzer(Version.LUCENE_47, CharArraySet.EMPTY_SET);
	TokenFilter filter = new EnglishMinimalStemFilter(analyser.tokenStream("text", new StringReader(text)));
	List<Annotation> out = Lists.newArrayList();
	while (filter.incrementToken()) {
		CharTermAttribute az = filter.getAttribute(CharTermAttribute.class);
		OffsetAttribute o = filter.getAttribute(OffsetAttribute.class);
		String token = text.substring(o.startOffset(), o.endOffset());
		String lemma = az.toString();
		Annotation t = new Annotation();
		t.setForm(token);
		t.setLemma(lemma);
		out.add(t);
	}
	if (out.size() == 0) {
		log.debug("Input string is empty");
	}
	filter.close();
	analyser.close();
	return out;
}
 
开发者ID:kouylekov,项目名称:edits,代码行数:24,代码来源:LuceneTokenizer.java

示例6: testFirstPosInc

import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
public void testFirstPosInc() throws Exception {
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      TokenFilter filter = new MockSynonymFilter(tokenizer);
      StopFilter stopfilter = new StopFilter(TEST_VERSION_CURRENT, filter, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
      stopfilter.setEnablePositionIncrements(false);
      return new TokenStreamComponents(tokenizer, stopfilter);
    }
  };
  
  assertAnalyzesTo(analyzer, "the quick brown fox",
      new String[] { "hte", "quick", "brown", "fox" },
      new int[] { 1, 1, 1, 1} );
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:17,代码来源:TestStopFilter.java

示例7: testInvalidOffsets

import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
public void testInvalidOffsets() throws Exception {
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
      filters = new NGramTokenFilter(TEST_VERSION_CURRENT, filters, 2, 2);
      return new TokenStreamComponents(tokenizer, filters);
    }
  };
  assertAnalyzesTo(analyzer, "mosfellsbær",
      new String[] { "mo", "os", "sf", "fe", "el", "ll", "ls", "sb", "ba", "ae", "er" },
      new int[]    {    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0 },
      new int[]    {   11,   11,   11,   11,   11,   11,   11,   11,   11,   11,   11 },
      new int[]    {     1,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0  });
}
 
开发者ID:jimaguere,项目名称:Maskana-Gestor-de-Conocimiento,代码行数:17,代码来源:NGramTokenFilterTest.java

示例8: testFirstPosInc

import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
public void testFirstPosInc() throws Exception {
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      TokenFilter filter = new MockSynonymFilter(tokenizer);
      StopFilter stopfilter = new StopFilter(Version.LUCENE_43, filter, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
      stopfilter.setEnablePositionIncrements(false);
      return new TokenStreamComponents(tokenizer, stopfilter);
    }
  };
  
  assertAnalyzesTo(analyzer, "the quick brown fox",
      new String[] { "hte", "quick", "brown", "fox" },
      new int[] { 1, 1, 1, 1} );
}
 
开发者ID:jimaguere,项目名称:Maskana-Gestor-de-Conocimiento,代码行数:17,代码来源:TestStopFilter.java

示例9: create

import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
@Override
public TokenFilter create(TokenStream input) {
  if (luceneMatchVersion == null) {
    return new NGramTokenFilter(input, minGramSize, maxGramSize);
  }
  return new NGramTokenFilter(luceneMatchVersion, input, minGramSize, maxGramSize);
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:8,代码来源:NGramFilterFactory.java

示例10: create

import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
@Override
public TokenFilter create(TokenStream input) {
  if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0)) {
    return new HyphenationCompoundWordTokenFilter(input, hyphenator, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
  }
  return new Lucene43HyphenationCompoundWordTokenFilter(input, hyphenator, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);

}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:9,代码来源:HyphenationCompoundWordTokenFilterFactory.java

示例11: create

import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
@Override
public TokenFilter create(TokenStream input) {
  if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_8_0)) {
    return new WordDelimiterFilter(luceneMatchVersion, input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable,
                                 flags, protectedWords);
  } else {
    return new Lucene47WordDelimiterFilter(input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable,
                                flags, protectedWords);
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:11,代码来源:WordDelimiterFilterFactory.java

示例12: affixedFilterTest

import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
@Test
public void affixedFilterTest() throws IOException
{
	System.out.println("Testing TibAffixedFilter()");
	String input = "དག། གའམ། གའིའོ། དགའ། དགའི། དགའོ། དགའིས། དགའང་། དགའམ། དགའིའོ།";
	Reader reader = new StringReader(input);
	List<String> expected = Arrays.asList("དག", "ག", "ག", "དགའ", "དགའ", "དགའ", "དགའ", "དགའ", "དགའ", "དགའ");

	System.out.print(input + " => ");
	TokenStream syllables = tokenize(reader, new TibSyllableTokenizer());
	TokenFilter res = new TibAffixedFilter(syllables);
	assertTokenStream(res, expected);
}
 
开发者ID:BuddhistDigitalResourceCenter,项目名称:lucene-bo,代码行数:14,代码来源:TibetanAnalyzerTest.java

示例13: createComponents

import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName) {
    JiebaTokenizer tokenizer = new JiebaTokenizer();
    if (userDictIn != null) {
        try {
            tokenizer.loadUserDict(userDictIn);
        } catch (IOException e) {
            throw new RuntimeException("load user dict error");
        }
    }
    TokenFilter stopFilter = new JiebaStopTokenFilter(tokenizer);
    return new TokenStreamComponents(tokenizer, stopFilter);
}
 
开发者ID:hongfuli,项目名称:elasticsearch-analysis-jieba,代码行数:14,代码来源:JiebaAnalyzer.java

示例14: createComponents

import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    final AutocompleteTokenizer tokenizer = new AutocompleteTokenizer(reader);

    TokenFilter filter = new StandardFilter(tokenizer);

    return new TokenStreamComponents(tokenizer, filter);
}
 
开发者ID:gncloud,项目名称:fastcatsearch3,代码行数:9,代码来源:AutocompleteAnalyzer.java

示例15: spellcheckAnalyzer

import org.apache.lucene.analysis.TokenFilter; //导入依赖的package包/类
@NotNull
private static Analyzer spellcheckAnalyzer(@NotNull final SpellChecker spellChecker) {
    return new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(@NotNull final String field) {
            final Tokenizer source = new WhitespaceTokenizer();
            source.setReader(new StringReader(field));
            final SpellCheckerTokenFilter spellCheckFilter = new SpellCheckerTokenFilter(defaultTokenFilter(source), spellChecker);
            final TokenFilter concatenatingFilter = new ConcatenatingFilter(spellCheckFilter, ' ');
            return new TokenStreamComponents(source, concatenatingFilter);
        }
    };
}
 
开发者ID:hartwigmedical,项目名称:hmftools,代码行数:14,代码来源:TreatmentCurator.java


注:本文中的org.apache.lucene.analysis.TokenFilter类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。