當前位置: 首頁>>代碼示例>>Java>>正文


Java TokenFilter類代碼示例

本文整理匯總了Java中org.apache.lucene.analysis.TokenFilter的典型用法代碼示例。如果您正苦於以下問題:Java TokenFilter類的具體用法?Java TokenFilter怎麽用?Java TokenFilter使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


TokenFilter類屬於org.apache.lucene.analysis包,在下文中一共展示了TokenFilter類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: testInvalidOffset

import org.apache.lucene.analysis.TokenFilter; //導入依賴的package包/類
public void testInvalidOffset() throws Exception {
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
      filters = new WordTokenFilter(filters);
      return new TokenStreamComponents(tokenizer, filters);
    }
  };
  
  assertAnalyzesTo(analyzer, "mosfellsbær", 
      new String[] { "mosfellsbaer" },
      new int[]    { 0 },
      new int[]    { 11 });
}
 
開發者ID:europeana,項目名稱:search,代碼行數:17,代碼來源:TestSmartChineseAnalyzer.java

示例2: testInvalidOffsets

import org.apache.lucene.analysis.TokenFilter; //導入依賴的package包/類
public void testInvalidOffsets() throws Exception {
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
      filters = new NGramTokenFilter(filters, 2, 2);
      return new TokenStreamComponents(tokenizer, filters);
    }
  };
  assertAnalyzesTo(analyzer, "mosfellsbær",
      new String[] { "mo", "os", "sf", "fe", "el", "ll", "ls", "sb", "ba", "ae", "er" },
      new int[]    {    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0 },
      new int[]    {   11,   11,   11,   11,   11,   11,   11,   11,   11,   11,   11 },
      new int[]    {     1,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0  });
}
 
開發者ID:europeana,項目名稱:search,代碼行數:17,代碼來源:NGramTokenFilterTest.java

示例3: testFirstPosInc

import org.apache.lucene.analysis.TokenFilter; //導入依賴的package包/類
public void testFirstPosInc() throws Exception {
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      TokenFilter filter = new MockSynonymFilter(tokenizer);
      StopFilter stopfilter = new StopFilter(Version.LUCENE_4_3, filter, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
      stopfilter.setEnablePositionIncrements(false);
      return new TokenStreamComponents(tokenizer, stopfilter);
    }
  };
  
  assertAnalyzesTo(analyzer, "the quick brown fox",
      new String[] { "hte", "quick", "brown", "fox" },
      new int[] { 1, 1, 1, 1} );
}
 
開發者ID:europeana,項目名稱:search,代碼行數:17,代碼來源:TestStopFilter.java

示例4: create

import org.apache.lucene.analysis.TokenFilter; //導入依賴的package包/類
@Override
public TokenStream create(TokenStream input) {
  return new TokenFilter(input) {
    @Override
    public boolean incrementToken() throws IOException {
      if (input.incrementToken()) {
        try {
          throw exceptionClass.newInstance();
        } catch (IllegalAccessException iae) {
          throw new RuntimeException(iae);
        } catch (InstantiationException ie) {
          throw new RuntimeException(ie);
        }
      }
      return false;
    }
  };
}
 
開發者ID:europeana,項目名稱:search,代碼行數:19,代碼來源:ThrowingMockTokenFilterFactory.java

示例5: annotate

import org.apache.lucene.analysis.TokenFilter; //導入依賴的package包/類
@Override
public List<Annotation> annotate(String text) throws Exception {
	text = SimpleTokenizer.format(text);
	Analyzer analyser = new EnglishAnalyzer(Version.LUCENE_47, CharArraySet.EMPTY_SET);
	TokenFilter filter = new EnglishMinimalStemFilter(analyser.tokenStream("text", new StringReader(text)));
	List<Annotation> out = Lists.newArrayList();
	while (filter.incrementToken()) {
		CharTermAttribute az = filter.getAttribute(CharTermAttribute.class);
		OffsetAttribute o = filter.getAttribute(OffsetAttribute.class);
		String token = text.substring(o.startOffset(), o.endOffset());
		String lemma = az.toString();
		Annotation t = new Annotation();
		t.setForm(token);
		t.setLemma(lemma);
		out.add(t);
	}
	if (out.size() == 0) {
		log.debug("Input string is empty");
	}
	filter.close();
	analyser.close();
	return out;
}
 
開發者ID:kouylekov,項目名稱:edits,代碼行數:24,代碼來源:LuceneTokenizer.java

示例6: testFirstPosInc

import org.apache.lucene.analysis.TokenFilter; //導入依賴的package包/類
public void testFirstPosInc() throws Exception {
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      TokenFilter filter = new MockSynonymFilter(tokenizer);
      StopFilter stopfilter = new StopFilter(TEST_VERSION_CURRENT, filter, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
      stopfilter.setEnablePositionIncrements(false);
      return new TokenStreamComponents(tokenizer, stopfilter);
    }
  };
  
  assertAnalyzesTo(analyzer, "the quick brown fox",
      new String[] { "hte", "quick", "brown", "fox" },
      new int[] { 1, 1, 1, 1} );
}
 
開發者ID:pkarmstr,項目名稱:NYBC,代碼行數:17,代碼來源:TestStopFilter.java

示例7: testInvalidOffsets

import org.apache.lucene.analysis.TokenFilter; //導入依賴的package包/類
public void testInvalidOffsets() throws Exception {
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
      filters = new NGramTokenFilter(TEST_VERSION_CURRENT, filters, 2, 2);
      return new TokenStreamComponents(tokenizer, filters);
    }
  };
  assertAnalyzesTo(analyzer, "mosfellsbær",
      new String[] { "mo", "os", "sf", "fe", "el", "ll", "ls", "sb", "ba", "ae", "er" },
      new int[]    {    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0 },
      new int[]    {   11,   11,   11,   11,   11,   11,   11,   11,   11,   11,   11 },
      new int[]    {     1,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0  });
}
 
開發者ID:jimaguere,項目名稱:Maskana-Gestor-de-Conocimiento,代碼行數:17,代碼來源:NGramTokenFilterTest.java

示例8: testFirstPosInc

import org.apache.lucene.analysis.TokenFilter; //導入依賴的package包/類
public void testFirstPosInc() throws Exception {
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      TokenFilter filter = new MockSynonymFilter(tokenizer);
      StopFilter stopfilter = new StopFilter(Version.LUCENE_43, filter, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
      stopfilter.setEnablePositionIncrements(false);
      return new TokenStreamComponents(tokenizer, stopfilter);
    }
  };
  
  assertAnalyzesTo(analyzer, "the quick brown fox",
      new String[] { "hte", "quick", "brown", "fox" },
      new int[] { 1, 1, 1, 1} );
}
 
開發者ID:jimaguere,項目名稱:Maskana-Gestor-de-Conocimiento,代碼行數:17,代碼來源:TestStopFilter.java

示例9: create

import org.apache.lucene.analysis.TokenFilter; //導入依賴的package包/類
@Override
public TokenFilter create(TokenStream input) {
  if (luceneMatchVersion == null) {
    return new NGramTokenFilter(input, minGramSize, maxGramSize);
  }
  return new NGramTokenFilter(luceneMatchVersion, input, minGramSize, maxGramSize);
}
 
開發者ID:lamsfoundation,項目名稱:lams,代碼行數:8,代碼來源:NGramFilterFactory.java

示例10: create

import org.apache.lucene.analysis.TokenFilter; //導入依賴的package包/類
@Override
public TokenFilter create(TokenStream input) {
  if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0)) {
    return new HyphenationCompoundWordTokenFilter(input, hyphenator, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
  }
  return new Lucene43HyphenationCompoundWordTokenFilter(input, hyphenator, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);

}
 
開發者ID:lamsfoundation,項目名稱:lams,代碼行數:9,代碼來源:HyphenationCompoundWordTokenFilterFactory.java

示例11: create

import org.apache.lucene.analysis.TokenFilter; //導入依賴的package包/類
@Override
public TokenFilter create(TokenStream input) {
  if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_8_0)) {
    return new WordDelimiterFilter(luceneMatchVersion, input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable,
                                 flags, protectedWords);
  } else {
    return new Lucene47WordDelimiterFilter(input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable,
                                flags, protectedWords);
  }
}
 
開發者ID:lamsfoundation,項目名稱:lams,代碼行數:11,代碼來源:WordDelimiterFilterFactory.java

示例12: affixedFilterTest

import org.apache.lucene.analysis.TokenFilter; //導入依賴的package包/類
@Test
public void affixedFilterTest() throws IOException
{
	System.out.println("Testing TibAffixedFilter()");
	String input = "དག། གའམ། གའིའོ། དགའ། དགའི། དགའོ། དགའིས། དགའང་། དགའམ། དགའིའོ།";
	Reader reader = new StringReader(input);
	List<String> expected = Arrays.asList("དག", "ག", "ག", "དགའ", "དགའ", "དགའ", "དགའ", "དགའ", "དགའ", "དགའ");

	System.out.print(input + " => ");
	TokenStream syllables = tokenize(reader, new TibSyllableTokenizer());
	TokenFilter res = new TibAffixedFilter(syllables);
	assertTokenStream(res, expected);
}
 
開發者ID:BuddhistDigitalResourceCenter,項目名稱:lucene-bo,代碼行數:14,代碼來源:TibetanAnalyzerTest.java

示例13: createComponents

import org.apache.lucene.analysis.TokenFilter; //導入依賴的package包/類
@Override
protected TokenStreamComponents createComponents(String fieldName) {
    JiebaTokenizer tokenizer = new JiebaTokenizer();
    if (userDictIn != null) {
        try {
            tokenizer.loadUserDict(userDictIn);
        } catch (IOException e) {
            throw new RuntimeException("load user dict error");
        }
    }
    TokenFilter stopFilter = new JiebaStopTokenFilter(tokenizer);
    return new TokenStreamComponents(tokenizer, stopFilter);
}
 
開發者ID:hongfuli,項目名稱:elasticsearch-analysis-jieba,代碼行數:14,代碼來源:JiebaAnalyzer.java

示例14: createComponents

import org.apache.lucene.analysis.TokenFilter; //導入依賴的package包/類
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    final AutocompleteTokenizer tokenizer = new AutocompleteTokenizer(reader);

    TokenFilter filter = new StandardFilter(tokenizer);

    return new TokenStreamComponents(tokenizer, filter);
}
 
開發者ID:gncloud,項目名稱:fastcatsearch3,代碼行數:9,代碼來源:AutocompleteAnalyzer.java

示例15: spellcheckAnalyzer

import org.apache.lucene.analysis.TokenFilter; //導入依賴的package包/類
@NotNull
private static Analyzer spellcheckAnalyzer(@NotNull final SpellChecker spellChecker) {
    return new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(@NotNull final String field) {
            final Tokenizer source = new WhitespaceTokenizer();
            source.setReader(new StringReader(field));
            final SpellCheckerTokenFilter spellCheckFilter = new SpellCheckerTokenFilter(defaultTokenFilter(source), spellChecker);
            final TokenFilter concatenatingFilter = new ConcatenatingFilter(spellCheckFilter, ' ');
            return new TokenStreamComponents(source, concatenatingFilter);
        }
    };
}
 
開發者ID:hartwigmedical,項目名稱:hmftools,代碼行數:14,代碼來源:TreatmentCurator.java


注:本文中的org.apache.lucene.analysis.TokenFilter類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。