當前位置: 首頁>>代碼示例>>Java>>正文


Java LowerCaseFilter類代碼示例

本文整理匯總了Java中org.apache.lucene.analysis.core.LowerCaseFilter的典型用法代碼示例。如果您正苦於以下問題:Java LowerCaseFilter類的具體用法?Java LowerCaseFilter怎麽用?Java LowerCaseFilter使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


LowerCaseFilter類屬於org.apache.lucene.analysis.core包,在下文中一共展示了LowerCaseFilter類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: inform

import org.apache.lucene.analysis.core.LowerCaseFilter; //導入依賴的package包/類
@Override
public void inform(ResourceLoader loader) throws IOException {
  final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
  
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) : factory.create(reader);
      TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer;
      return new TokenStreamComponents(tokenizer, stream);
    }
  };

  try {
    String formatClass = format;
    if (format == null || format.equals("solr")) {
      formatClass = SolrSynonymParser.class.getName();
    } else if (format.equals("wordnet")) {
      formatClass = WordnetSynonymParser.class.getName();
    }
    // TODO: expose dedup as a parameter?
    map = loadSynonyms(loader, formatClass, true, analyzer);
  } catch (ParseException e) {
    throw new IOException("Error parsing synonyms file:", e);
  }
}
 
開發者ID:europeana,項目名稱:search,代碼行數:27,代碼來源:FSTSynonymFilterFactory.java

示例2: inform

import org.apache.lucene.analysis.core.LowerCaseFilter; //導入依賴的package包/類
@Override
public void inform(ResourceLoader loader) throws IOException {
  final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
  
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_43, reader) : factory.create(reader);
      TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_43, tokenizer) : tokenizer;
      return new TokenStreamComponents(tokenizer, stream);
    }
  };

  try {
    String formatClass = format;
    if (format == null || format.equals("solr")) {
      formatClass = SolrSynonymParser.class.getName();
    } else if (format.equals("wordnet")) {
      formatClass = WordnetSynonymParser.class.getName();
    }
    // TODO: expose dedup as a parameter?
    map = loadSynonyms(loader, formatClass, true, analyzer);
  } catch (ParseException e) {
    throw new IOException("Error parsing synonyms file:", e);
  }
}
 
開發者ID:jimaguere,項目名稱:Maskana-Gestor-de-Conocimiento,代碼行數:27,代碼來源:FSTSynonymFilterFactory.java

示例3: ThaiWordFilter

import org.apache.lucene.analysis.core.LowerCaseFilter; //導入依賴的package包/類
/**
 * @deprecated Use {@link #ThaiWordFilter(TokenStream)}
 */
@Deprecated
public ThaiWordFilter(Version matchVersion, TokenStream input) {
  super(matchVersion.onOrAfter(Version.LUCENE_3_1) ?
      input : new LowerCaseFilter(matchVersion, input));
  if (!DBBI_AVAILABLE)
    throw new UnsupportedOperationException("This JRE does not have support for Thai segmentation");
  handlePosIncr = matchVersion.onOrAfter(Version.LUCENE_3_1);
}
 
開發者ID:lamsfoundation,項目名稱:lams,代碼行數:12,代碼來源:ThaiWordFilter.java

示例4: create

import org.apache.lucene.analysis.core.LowerCaseFilter; //導入依賴的package包/類
@Override
public LowerCaseFilter create(TokenStream input) {
  if (luceneMatchVersion == null) {
    return new LowerCaseFilter(input);
  }
  return new LowerCaseFilter(luceneMatchVersion, input);
}
 
開發者ID:lamsfoundation,項目名稱:lams,代碼行數:8,代碼來源:LowerCaseFilterFactory.java

示例5: create

import org.apache.lucene.analysis.core.LowerCaseFilter; //導入依賴的package包/類
@Override
public TokenStream create(TokenStream tokenStream) {
    if (lang == null) {
        return new LowerCaseFilter(tokenStream);
    } else if (lang.equalsIgnoreCase("greek")) {
        return new GreekLowerCaseFilter(tokenStream);
    } else if (lang.equalsIgnoreCase("irish")) {
        return new IrishLowerCaseFilter(tokenStream);
    } else if (lang.equalsIgnoreCase("turkish")) {
        return new TurkishLowerCaseFilter(tokenStream);
    } else {
        throw new IllegalArgumentException("language [" + lang + "] not support for lower case");
    }
}
 
開發者ID:baidu,項目名稱:Elasticsearch,代碼行數:15,代碼來源:LowerCaseTokenFilterFactory.java

示例6: transform

import org.apache.lucene.analysis.core.LowerCaseFilter; //導入依賴的package包/類
public Tuple2<Double, Multiset<String>> transform(Row row) throws IOException {
	Double label = row.getDouble(1);
	StringReader document = new StringReader(row.getString(0).replaceAll("br2n", ""));
	List<String> wordsList = new ArrayList<>();

	try (BulgarianAnalyzer analyzer = new BulgarianAnalyzer(BULGARIAN_STOP_WORDS_SET)) {
		TokenStream stream = analyzer.tokenStream("words", document);

		TokenFilter lowerFilter = new LowerCaseFilter(stream);
		TokenFilter numbers = new NumberFilter(lowerFilter);
		TokenFilter length = new LengthFilter(numbers, 3, 1000);
		TokenFilter stemmer = new BulgarianStemFilter(length);
		TokenFilter ngrams = new ShingleFilter(stemmer, 2, 3);

		try (TokenFilter filter = ngrams) {
			Attribute termAtt = filter.addAttribute(CharTermAttribute.class);
			filter.reset();
			while (filter.incrementToken()) {
				String word = termAtt.toString().replace(",", "(comma)").replaceAll("\n|\r", "");
				if (word.contains("_")) {
					continue;
				}
				wordsList.add(word);
			}
		}
	}

	Multiset<String> words = ConcurrentHashMultiset.create(wordsList);

	return new Tuple2<>(label, words);
}
 
開發者ID:mhardalov,項目名稱:news-credibility,代碼行數:32,代碼來源:TokenTransform.java

示例7: createComponents

import org.apache.lucene.analysis.core.LowerCaseFilter; //導入依賴的package包/類
@Override
protected TokenStreamComponents createComponents(String fieldName,
    Reader reader) {
  Tokenizer t = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
  return new TokenStreamComponents(t,
      new GermanStemFilter(new LowerCaseFilter(t)));
}
 
開發者ID:europeana,項目名稱:search,代碼行數:8,代碼來源:TestGermanStemFilter.java

示例8: testMultipleSources

import org.apache.lucene.analysis.core.LowerCaseFilter; //導入依賴的package包/類
public void testMultipleSources() throws Exception {
  final TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer1.toString()), MockTokenizer.WHITESPACE, false));
  final TeeSinkTokenFilter.SinkTokenStream dogDetector = tee1.newSinkTokenStream(dogFilter);
  final TeeSinkTokenFilter.SinkTokenStream theDetector = tee1.newSinkTokenStream(theFilter);
  tee1.reset();
  final TokenStream source1 = new CachingTokenFilter(tee1);
  
  tee1.addAttribute(CheckClearAttributesAttribute.class);
  dogDetector.addAttribute(CheckClearAttributesAttribute.class);
  theDetector.addAttribute(CheckClearAttributesAttribute.class);

  MockTokenizer tokenizer = new MockTokenizer(tee1.getAttributeFactory(), new StringReader(buffer2.toString()), MockTokenizer.WHITESPACE, false);
  final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(tokenizer);
  tee2.addSinkTokenStream(dogDetector);
  tee2.addSinkTokenStream(theDetector);
  final TokenStream source2 = tee2;

  assertTokenStreamContents(source1, tokens1);
  assertTokenStreamContents(source2, tokens2);

  assertTokenStreamContents(theDetector, new String[]{"The", "the", "The", "the"});
  assertTokenStreamContents(dogDetector, new String[]{"Dogs", "Dogs"});
  
  source1.reset();
  TokenStream lowerCasing = new LowerCaseFilter(source1);
  String[] lowerCaseTokens = new String[tokens1.length];
  for (int i = 0; i < tokens1.length; i++)
    lowerCaseTokens[i] = tokens1[i].toLowerCase(Locale.ROOT);
  assertTokenStreamContents(lowerCasing, lowerCaseTokens);
}
 
開發者ID:europeana,項目名稱:search,代碼行數:31,代碼來源:TestTeeSinkTokenFilter.java

示例9: createComponents

import org.apache.lucene.analysis.core.LowerCaseFilter; //導入依賴的package包/類
@Override
protected TokenStreamComponents createComponents( String fieldName )
{
    called = true;
    Tokenizer source = new WhitespaceTokenizer();
    return new TokenStreamComponents( source, new LowerCaseFilter( source ) );
}
 
開發者ID:neo4j-contrib,項目名稱:neo4j-lucene5-index,代碼行數:8,代碼來源:CustomAnalyzer.java

示例10: createComponents

import org.apache.lucene.analysis.core.LowerCaseFilter; //導入依賴的package包/類
@Override
protected TokenStreamComponents createComponents(String fieldName) {
    final LowerCaseTokenizer src = new LowerCaseTokenizer();
    final TokenStream tok = new LowerCaseFilter(src);

    return new TokenStreamComponents(src,
                                     tok);
}
 
開發者ID:kiegroup,項目名稱:appformer,代碼行數:9,代碼來源:FilenameAnalyzer.java

示例11: getAnalyzer

import org.apache.lucene.analysis.core.LowerCaseFilter; //導入依賴的package包/類
protected static Analyzer getAnalyzer(final boolean ignoreCase) {
    return new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(final String fieldName) {
            final Tokenizer tokenizer = new KeywordTokenizer();
            final TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer;
            return new TokenStreamComponents(tokenizer, stream);
        }
    };
}
 
開發者ID:codelibs,項目名稱:elasticsearch-analysis-synonym,代碼行數:11,代碼來源:SynonymLoader.java

示例12: createComponents

import org.apache.lucene.analysis.core.LowerCaseFilter; //導入依賴的package包/類
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    Tokenizer tokenizer;
    if (DocumentIndex.FIELD__KEYWORD.equals(fieldName)) {
        tokenizer = new NullTokenizer(reader);
    } else {
        tokenizer = new LetterOrDigitTokenizer(reader);
    }

    return new TokenStreamComponents(tokenizer, new LowerCaseFilter(Version.LUCENE_40, tokenizer));
}
 
開發者ID:imCodePartnerAB,項目名稱:imcms,代碼行數:12,代碼來源:AnalyzerImpl.java

示例13: createComponents

import org.apache.lucene.analysis.core.LowerCaseFilter; //導入依賴的package包/類
@SuppressWarnings("resource")
@Override
protected TokenStreamComponents createComponents(String fieldName) {
    final Tokenizer tokenizer = new LemmatizingTokenizer();
    TokenStream stream = new LowerCaseFilter(tokenizer);
    // stream = new KeywordRepeatFilter(stream);
    stream = new LemmaTokenFilter(stream, true);
    return new TokenStreamComponents(tokenizer, stream);
}
 
開發者ID:shaie,項目名稱:lucenelab,代碼行數:10,代碼來源:LemmatizingTokenizerDemo.java

示例14: inform

import org.apache.lucene.analysis.core.LowerCaseFilter; //導入依賴的package包/類
@Override
public void inform(ResourceLoader loader) throws IOException {
  final boolean ignoreCase = getBoolean("ignoreCase", false); 
  this.ignoreCase = ignoreCase;

  String tf = args.get("tokenizerFactory");

  final TokenizerFactory factory = tf == null ? null : loadTokenizerFactory(loader, tf);
  
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_31, reader) : factory.create(reader);
      TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_31, tokenizer) : tokenizer;
      return new TokenStreamComponents(tokenizer, stream);
    }
  };

  String format = args.get("format");
  try {
    if (format == null || format.equals("solr")) {
      // TODO: expose dedup as a parameter?
      map = loadSolrSynonyms(loader, true, analyzer);
    } else if (format.equals("wordnet")) {
      map = loadWordnetSynonyms(loader, true, analyzer);
    } else {
      // TODO: somehow make this more pluggable
      throw new IllegalArgumentException("Unrecognized synonyms format: " + format);
    }
  } catch (ParseException e) {
    throw new IOException("Exception thrown while loading synonyms", e);
  }
}
 
開發者ID:pkarmstr,項目名稱:NYBC,代碼行數:34,代碼來源:FSTSynonymFilterFactory.java

示例15: ThaiWordFilter

import org.apache.lucene.analysis.core.LowerCaseFilter; //導入依賴的package包/類
/** Creates a new ThaiWordFilter with the specified match version. */
public ThaiWordFilter(Version matchVersion, TokenStream input) {
  super(matchVersion.onOrAfter(Version.LUCENE_31) ?
    input : new LowerCaseFilter(matchVersion, input));
  if (!DBBI_AVAILABLE)
    throw new UnsupportedOperationException("This JRE does not have support for Thai segmentation");
  handlePosIncr = matchVersion.onOrAfter(Version.LUCENE_31);
}
 
開發者ID:pkarmstr,項目名稱:NYBC,代碼行數:9,代碼來源:ThaiWordFilter.java


注:本文中的org.apache.lucene.analysis.core.LowerCaseFilter類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。