当前位置: 首页>>代码示例>>Java>>正文


Java LowerCaseFilter类代码示例

本文整理汇总了Java中org.apache.lucene.analysis.core.LowerCaseFilter的典型用法代码示例。如果您正苦于以下问题:Java LowerCaseFilter类的具体用法?Java LowerCaseFilter怎么用?Java LowerCaseFilter使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


LowerCaseFilter类属于org.apache.lucene.analysis.core包,在下文中一共展示了LowerCaseFilter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: inform

import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
@Override
public void inform(ResourceLoader loader) throws IOException {
  final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
  
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) : factory.create(reader);
      TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer;
      return new TokenStreamComponents(tokenizer, stream);
    }
  };

  try {
    String formatClass = format;
    if (format == null || format.equals("solr")) {
      formatClass = SolrSynonymParser.class.getName();
    } else if (format.equals("wordnet")) {
      formatClass = WordnetSynonymParser.class.getName();
    }
    // TODO: expose dedup as a parameter?
    map = loadSynonyms(loader, formatClass, true, analyzer);
  } catch (ParseException e) {
    throw new IOException("Error parsing synonyms file:", e);
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:27,代码来源:FSTSynonymFilterFactory.java

示例2: inform

import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
@Override
public void inform(ResourceLoader loader) throws IOException {
  final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
  
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_43, reader) : factory.create(reader);
      TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_43, tokenizer) : tokenizer;
      return new TokenStreamComponents(tokenizer, stream);
    }
  };

  try {
    String formatClass = format;
    if (format == null || format.equals("solr")) {
      formatClass = SolrSynonymParser.class.getName();
    } else if (format.equals("wordnet")) {
      formatClass = WordnetSynonymParser.class.getName();
    }
    // TODO: expose dedup as a parameter?
    map = loadSynonyms(loader, formatClass, true, analyzer);
  } catch (ParseException e) {
    throw new IOException("Error parsing synonyms file:", e);
  }
}
 
开发者ID:jimaguere,项目名称:Maskana-Gestor-de-Conocimiento,代码行数:27,代码来源:FSTSynonymFilterFactory.java

示例3: ThaiWordFilter

import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
/**
 * @deprecated Use {@link #ThaiWordFilter(TokenStream)}
 */
@Deprecated
public ThaiWordFilter(Version matchVersion, TokenStream input) {
  super(matchVersion.onOrAfter(Version.LUCENE_3_1) ?
      input : new LowerCaseFilter(matchVersion, input));
  if (!DBBI_AVAILABLE)
    throw new UnsupportedOperationException("This JRE does not have support for Thai segmentation");
  handlePosIncr = matchVersion.onOrAfter(Version.LUCENE_3_1);
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:12,代码来源:ThaiWordFilter.java

示例4: create

import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
@Override
public LowerCaseFilter create(TokenStream input) {
  if (luceneMatchVersion == null) {
    return new LowerCaseFilter(input);
  }
  return new LowerCaseFilter(luceneMatchVersion, input);
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:8,代码来源:LowerCaseFilterFactory.java

示例5: create

import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
@Override
public TokenStream create(TokenStream tokenStream) {
    if (lang == null) {
        return new LowerCaseFilter(tokenStream);
    } else if (lang.equalsIgnoreCase("greek")) {
        return new GreekLowerCaseFilter(tokenStream);
    } else if (lang.equalsIgnoreCase("irish")) {
        return new IrishLowerCaseFilter(tokenStream);
    } else if (lang.equalsIgnoreCase("turkish")) {
        return new TurkishLowerCaseFilter(tokenStream);
    } else {
        throw new IllegalArgumentException("language [" + lang + "] not support for lower case");
    }
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:15,代码来源:LowerCaseTokenFilterFactory.java

示例6: transform

import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
public Tuple2<Double, Multiset<String>> transform(Row row) throws IOException {
	Double label = row.getDouble(1);
	StringReader document = new StringReader(row.getString(0).replaceAll("br2n", ""));
	List<String> wordsList = new ArrayList<>();

	try (BulgarianAnalyzer analyzer = new BulgarianAnalyzer(BULGARIAN_STOP_WORDS_SET)) {
		TokenStream stream = analyzer.tokenStream("words", document);

		TokenFilter lowerFilter = new LowerCaseFilter(stream);
		TokenFilter numbers = new NumberFilter(lowerFilter);
		TokenFilter length = new LengthFilter(numbers, 3, 1000);
		TokenFilter stemmer = new BulgarianStemFilter(length);
		TokenFilter ngrams = new ShingleFilter(stemmer, 2, 3);

		try (TokenFilter filter = ngrams) {
			Attribute termAtt = filter.addAttribute(CharTermAttribute.class);
			filter.reset();
			while (filter.incrementToken()) {
				String word = termAtt.toString().replace(",", "(comma)").replaceAll("\n|\r", "");
				if (word.contains("_")) {
					continue;
				}
				wordsList.add(word);
			}
		}
	}

	Multiset<String> words = ConcurrentHashMultiset.create(wordsList);

	return new Tuple2<>(label, words);
}
 
开发者ID:mhardalov,项目名称:news-credibility,代码行数:32,代码来源:TokenTransform.java

示例7: createComponents

import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName,
    Reader reader) {
  Tokenizer t = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
  return new TokenStreamComponents(t,
      new GermanStemFilter(new LowerCaseFilter(t)));
}
 
开发者ID:europeana,项目名称:search,代码行数:8,代码来源:TestGermanStemFilter.java

示例8: testMultipleSources

import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
public void testMultipleSources() throws Exception {
  final TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer1.toString()), MockTokenizer.WHITESPACE, false));
  final TeeSinkTokenFilter.SinkTokenStream dogDetector = tee1.newSinkTokenStream(dogFilter);
  final TeeSinkTokenFilter.SinkTokenStream theDetector = tee1.newSinkTokenStream(theFilter);
  tee1.reset();
  final TokenStream source1 = new CachingTokenFilter(tee1);
  
  tee1.addAttribute(CheckClearAttributesAttribute.class);
  dogDetector.addAttribute(CheckClearAttributesAttribute.class);
  theDetector.addAttribute(CheckClearAttributesAttribute.class);

  MockTokenizer tokenizer = new MockTokenizer(tee1.getAttributeFactory(), new StringReader(buffer2.toString()), MockTokenizer.WHITESPACE, false);
  final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(tokenizer);
  tee2.addSinkTokenStream(dogDetector);
  tee2.addSinkTokenStream(theDetector);
  final TokenStream source2 = tee2;

  assertTokenStreamContents(source1, tokens1);
  assertTokenStreamContents(source2, tokens2);

  assertTokenStreamContents(theDetector, new String[]{"The", "the", "The", "the"});
  assertTokenStreamContents(dogDetector, new String[]{"Dogs", "Dogs"});
  
  source1.reset();
  TokenStream lowerCasing = new LowerCaseFilter(source1);
  String[] lowerCaseTokens = new String[tokens1.length];
  for (int i = 0; i < tokens1.length; i++)
    lowerCaseTokens[i] = tokens1[i].toLowerCase(Locale.ROOT);
  assertTokenStreamContents(lowerCasing, lowerCaseTokens);
}
 
开发者ID:europeana,项目名称:search,代码行数:31,代码来源:TestTeeSinkTokenFilter.java

示例9: createComponents

import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents( String fieldName )
{
    called = true;
    Tokenizer source = new WhitespaceTokenizer();
    return new TokenStreamComponents( source, new LowerCaseFilter( source ) );
}
 
开发者ID:neo4j-contrib,项目名称:neo4j-lucene5-index,代码行数:8,代码来源:CustomAnalyzer.java

示例10: createComponents

import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName) {
    final LowerCaseTokenizer src = new LowerCaseTokenizer();
    final TokenStream tok = new LowerCaseFilter(src);

    return new TokenStreamComponents(src,
                                     tok);
}
 
开发者ID:kiegroup,项目名称:appformer,代码行数:9,代码来源:FilenameAnalyzer.java

示例11: getAnalyzer

import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
protected static Analyzer getAnalyzer(final boolean ignoreCase) {
    return new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(final String fieldName) {
            final Tokenizer tokenizer = new KeywordTokenizer();
            final TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer;
            return new TokenStreamComponents(tokenizer, stream);
        }
    };
}
 
开发者ID:codelibs,项目名称:elasticsearch-analysis-synonym,代码行数:11,代码来源:SynonymLoader.java

示例12: createComponents

import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    Tokenizer tokenizer;
    if (DocumentIndex.FIELD__KEYWORD.equals(fieldName)) {
        tokenizer = new NullTokenizer(reader);
    } else {
        tokenizer = new LetterOrDigitTokenizer(reader);
    }

    return new TokenStreamComponents(tokenizer, new LowerCaseFilter(Version.LUCENE_40, tokenizer));
}
 
开发者ID:imCodePartnerAB,项目名称:imcms,代码行数:12,代码来源:AnalyzerImpl.java

示例13: createComponents

import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
@SuppressWarnings("resource")
@Override
protected TokenStreamComponents createComponents(String fieldName) {
    final Tokenizer tokenizer = new LemmatizingTokenizer();
    TokenStream stream = new LowerCaseFilter(tokenizer);
    // stream = new KeywordRepeatFilter(stream);
    stream = new LemmaTokenFilter(stream, true);
    return new TokenStreamComponents(tokenizer, stream);
}
 
开发者ID:shaie,项目名称:lucenelab,代码行数:10,代码来源:LemmatizingTokenizerDemo.java

示例14: inform

import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
@Override
public void inform(ResourceLoader loader) throws IOException {
  final boolean ignoreCase = getBoolean("ignoreCase", false); 
  this.ignoreCase = ignoreCase;

  String tf = args.get("tokenizerFactory");

  final TokenizerFactory factory = tf == null ? null : loadTokenizerFactory(loader, tf);
  
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_31, reader) : factory.create(reader);
      TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_31, tokenizer) : tokenizer;
      return new TokenStreamComponents(tokenizer, stream);
    }
  };

  String format = args.get("format");
  try {
    if (format == null || format.equals("solr")) {
      // TODO: expose dedup as a parameter?
      map = loadSolrSynonyms(loader, true, analyzer);
    } else if (format.equals("wordnet")) {
      map = loadWordnetSynonyms(loader, true, analyzer);
    } else {
      // TODO: somehow make this more pluggable
      throw new IllegalArgumentException("Unrecognized synonyms format: " + format);
    }
  } catch (ParseException e) {
    throw new IOException("Exception thrown while loading synonyms", e);
  }
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:34,代码来源:FSTSynonymFilterFactory.java

示例15: ThaiWordFilter

import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
/** Creates a new ThaiWordFilter with the specified match version. */
public ThaiWordFilter(Version matchVersion, TokenStream input) {
  super(matchVersion.onOrAfter(Version.LUCENE_31) ?
    input : new LowerCaseFilter(matchVersion, input));
  if (!DBBI_AVAILABLE)
    throw new UnsupportedOperationException("This JRE does not have support for Thai segmentation");
  handlePosIncr = matchVersion.onOrAfter(Version.LUCENE_31);
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:9,代码来源:ThaiWordFilter.java


注:本文中的org.apache.lucene.analysis.core.LowerCaseFilter类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。