本文整理汇总了Java中org.apache.lucene.analysis.core.LowerCaseFilter类的典型用法代码示例。如果您正苦于以下问题:Java LowerCaseFilter类的具体用法?Java LowerCaseFilter怎么用?Java LowerCaseFilter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
LowerCaseFilter类属于org.apache.lucene.analysis.core包,在下文中一共展示了LowerCaseFilter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: inform
import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
@Override
public void inform(ResourceLoader loader) throws IOException {
final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) : factory.create(reader);
TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer;
return new TokenStreamComponents(tokenizer, stream);
}
};
try {
String formatClass = format;
if (format == null || format.equals("solr")) {
formatClass = SolrSynonymParser.class.getName();
} else if (format.equals("wordnet")) {
formatClass = WordnetSynonymParser.class.getName();
}
// TODO: expose dedup as a parameter?
map = loadSynonyms(loader, formatClass, true, analyzer);
} catch (ParseException e) {
throw new IOException("Error parsing synonyms file:", e);
}
}
示例2: inform
import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
@Override
public void inform(ResourceLoader loader) throws IOException {
final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_43, reader) : factory.create(reader);
TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_43, tokenizer) : tokenizer;
return new TokenStreamComponents(tokenizer, stream);
}
};
try {
String formatClass = format;
if (format == null || format.equals("solr")) {
formatClass = SolrSynonymParser.class.getName();
} else if (format.equals("wordnet")) {
formatClass = WordnetSynonymParser.class.getName();
}
// TODO: expose dedup as a parameter?
map = loadSynonyms(loader, formatClass, true, analyzer);
} catch (ParseException e) {
throw new IOException("Error parsing synonyms file:", e);
}
}
示例3: ThaiWordFilter
import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
/**
* @deprecated Use {@link #ThaiWordFilter(TokenStream)}
*/
@Deprecated
public ThaiWordFilter(Version matchVersion, TokenStream input) {
super(matchVersion.onOrAfter(Version.LUCENE_3_1) ?
input : new LowerCaseFilter(matchVersion, input));
if (!DBBI_AVAILABLE)
throw new UnsupportedOperationException("This JRE does not have support for Thai segmentation");
handlePosIncr = matchVersion.onOrAfter(Version.LUCENE_3_1);
}
示例4: create
import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
@Override
public LowerCaseFilter create(TokenStream input) {
if (luceneMatchVersion == null) {
return new LowerCaseFilter(input);
}
return new LowerCaseFilter(luceneMatchVersion, input);
}
示例5: create
import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
@Override
public TokenStream create(TokenStream tokenStream) {
if (lang == null) {
return new LowerCaseFilter(tokenStream);
} else if (lang.equalsIgnoreCase("greek")) {
return new GreekLowerCaseFilter(tokenStream);
} else if (lang.equalsIgnoreCase("irish")) {
return new IrishLowerCaseFilter(tokenStream);
} else if (lang.equalsIgnoreCase("turkish")) {
return new TurkishLowerCaseFilter(tokenStream);
} else {
throw new IllegalArgumentException("language [" + lang + "] not support for lower case");
}
}
示例6: transform
import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
public Tuple2<Double, Multiset<String>> transform(Row row) throws IOException {
Double label = row.getDouble(1);
StringReader document = new StringReader(row.getString(0).replaceAll("br2n", ""));
List<String> wordsList = new ArrayList<>();
try (BulgarianAnalyzer analyzer = new BulgarianAnalyzer(BULGARIAN_STOP_WORDS_SET)) {
TokenStream stream = analyzer.tokenStream("words", document);
TokenFilter lowerFilter = new LowerCaseFilter(stream);
TokenFilter numbers = new NumberFilter(lowerFilter);
TokenFilter length = new LengthFilter(numbers, 3, 1000);
TokenFilter stemmer = new BulgarianStemFilter(length);
TokenFilter ngrams = new ShingleFilter(stemmer, 2, 3);
try (TokenFilter filter = ngrams) {
Attribute termAtt = filter.addAttribute(CharTermAttribute.class);
filter.reset();
while (filter.incrementToken()) {
String word = termAtt.toString().replace(",", "(comma)").replaceAll("\n|\r", "");
if (word.contains("_")) {
continue;
}
wordsList.add(word);
}
}
}
Multiset<String> words = ConcurrentHashMultiset.create(wordsList);
return new Tuple2<>(label, words);
}
示例7: createComponents
import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
Tokenizer t = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
return new TokenStreamComponents(t,
new GermanStemFilter(new LowerCaseFilter(t)));
}
示例8: testMultipleSources
import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
public void testMultipleSources() throws Exception {
final TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer1.toString()), MockTokenizer.WHITESPACE, false));
final TeeSinkTokenFilter.SinkTokenStream dogDetector = tee1.newSinkTokenStream(dogFilter);
final TeeSinkTokenFilter.SinkTokenStream theDetector = tee1.newSinkTokenStream(theFilter);
tee1.reset();
final TokenStream source1 = new CachingTokenFilter(tee1);
tee1.addAttribute(CheckClearAttributesAttribute.class);
dogDetector.addAttribute(CheckClearAttributesAttribute.class);
theDetector.addAttribute(CheckClearAttributesAttribute.class);
MockTokenizer tokenizer = new MockTokenizer(tee1.getAttributeFactory(), new StringReader(buffer2.toString()), MockTokenizer.WHITESPACE, false);
final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(tokenizer);
tee2.addSinkTokenStream(dogDetector);
tee2.addSinkTokenStream(theDetector);
final TokenStream source2 = tee2;
assertTokenStreamContents(source1, tokens1);
assertTokenStreamContents(source2, tokens2);
assertTokenStreamContents(theDetector, new String[]{"The", "the", "The", "the"});
assertTokenStreamContents(dogDetector, new String[]{"Dogs", "Dogs"});
source1.reset();
TokenStream lowerCasing = new LowerCaseFilter(source1);
String[] lowerCaseTokens = new String[tokens1.length];
for (int i = 0; i < tokens1.length; i++)
lowerCaseTokens[i] = tokens1[i].toLowerCase(Locale.ROOT);
assertTokenStreamContents(lowerCasing, lowerCaseTokens);
}
示例9: createComponents
import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents( String fieldName )
{
called = true;
Tokenizer source = new WhitespaceTokenizer();
return new TokenStreamComponents( source, new LowerCaseFilter( source ) );
}
示例10: createComponents
import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final LowerCaseTokenizer src = new LowerCaseTokenizer();
final TokenStream tok = new LowerCaseFilter(src);
return new TokenStreamComponents(src,
tok);
}
示例11: getAnalyzer
import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
protected static Analyzer getAnalyzer(final boolean ignoreCase) {
return new Analyzer() {
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
final Tokenizer tokenizer = new KeywordTokenizer();
final TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer;
return new TokenStreamComponents(tokenizer, stream);
}
};
}
示例12: createComponents
import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer;
if (DocumentIndex.FIELD__KEYWORD.equals(fieldName)) {
tokenizer = new NullTokenizer(reader);
} else {
tokenizer = new LetterOrDigitTokenizer(reader);
}
return new TokenStreamComponents(tokenizer, new LowerCaseFilter(Version.LUCENE_40, tokenizer));
}
示例13: createComponents
import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
@SuppressWarnings("resource")
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer tokenizer = new LemmatizingTokenizer();
TokenStream stream = new LowerCaseFilter(tokenizer);
// stream = new KeywordRepeatFilter(stream);
stream = new LemmaTokenFilter(stream, true);
return new TokenStreamComponents(tokenizer, stream);
}
示例14: inform
import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
@Override
public void inform(ResourceLoader loader) throws IOException {
final boolean ignoreCase = getBoolean("ignoreCase", false);
this.ignoreCase = ignoreCase;
String tf = args.get("tokenizerFactory");
final TokenizerFactory factory = tf == null ? null : loadTokenizerFactory(loader, tf);
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_31, reader) : factory.create(reader);
TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_31, tokenizer) : tokenizer;
return new TokenStreamComponents(tokenizer, stream);
}
};
String format = args.get("format");
try {
if (format == null || format.equals("solr")) {
// TODO: expose dedup as a parameter?
map = loadSolrSynonyms(loader, true, analyzer);
} else if (format.equals("wordnet")) {
map = loadWordnetSynonyms(loader, true, analyzer);
} else {
// TODO: somehow make this more pluggable
throw new IllegalArgumentException("Unrecognized synonyms format: " + format);
}
} catch (ParseException e) {
throw new IOException("Exception thrown while loading synonyms", e);
}
}
示例15: ThaiWordFilter
import org.apache.lucene.analysis.core.LowerCaseFilter; //导入依赖的package包/类
/** Creates a new ThaiWordFilter with the specified match version. */
public ThaiWordFilter(Version matchVersion, TokenStream input) {
super(matchVersion.onOrAfter(Version.LUCENE_31) ?
input : new LowerCaseFilter(matchVersion, input));
if (!DBBI_AVAILABLE)
throw new UnsupportedOperationException("This JRE does not have support for Thai segmentation");
handlePosIncr = matchVersion.onOrAfter(Version.LUCENE_31);
}