本文整理汇总了Java中org.apache.lucene.analysis.CharArraySet类的典型用法代码示例。如果您正苦于以下问题:Java CharArraySet类的具体用法?Java CharArraySet怎么用?Java CharArraySet使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
CharArraySet类属于org.apache.lucene.analysis包,在下文中一共展示了CharArraySet类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: PatternAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public PatternAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
boolean lowercase =
settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(), "lowercase", true, deprecationLogger);
CharArraySet stopWords = Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, defaultStopwords);
String sPattern = settings.get("pattern", "\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/);
if (sPattern == null) {
throw new IllegalArgumentException("Analyzer [" + name + "] of type pattern must have a `pattern` set");
}
Pattern pattern = Regex.compile(sPattern, settings.get("flags"));
analyzer = new PatternAnalyzer(pattern, lowercase, stopWords);
}
示例2: parseStemExclusion
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public static CharArraySet parseStemExclusion(Settings settings, CharArraySet defaultStemExclusion) {
String value = settings.get("stem_exclusion");
if (value != null) {
if ("_none_".equals(value)) {
return CharArraySet.EMPTY_SET;
} else {
// LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)?
return new CharArraySet(Strings.commaDelimitedListToSet(value), false);
}
}
String[] stemExclusion = settings.getAsArray("stem_exclusion", null);
if (stemExclusion != null) {
// LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)?
return new CharArraySet(Arrays.asList(stemExclusion), false);
} else {
return defaultStemExclusion;
}
}
示例3: parseWords
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public static CharArraySet parseWords(Environment env, Settings settings, String name, CharArraySet defaultWords,
Map<String, Set<?>> namedWords, boolean ignoreCase) {
String value = settings.get(name);
if (value != null) {
if ("_none_".equals(value)) {
return CharArraySet.EMPTY_SET;
} else {
return resolveNamedWords(Strings.commaDelimitedListToSet(value), namedWords, ignoreCase);
}
}
List<String> pathLoadedWords = getWordList(env, settings, name);
if (pathLoadedWords != null) {
return resolveNamedWords(pathLoadedWords, namedWords, ignoreCase);
}
return defaultWords;
}
示例4: testOverlappingAtBeginning
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
@Test
public void testOverlappingAtBeginning() throws Exception {
final CharArraySet phraseSets = new CharArraySet(Arrays.asList(
"new york", "new york city", "city of new york"), false);
final String input = "new york city is great";
StringReader reader = new StringReader(input);
final WhitespaceTokenizer in = new WhitespaceTokenizer();
in.setReader(reader);
AutoPhrasingTokenFilter aptf = new AutoPhrasingTokenFilter(in, phraseSets, false);
aptf.setReplaceWhitespaceWith('_');
CharTermAttribute term = aptf.addAttribute(CharTermAttribute.class);
aptf.reset();
assertTrue(aptf.incrementToken());
assertEquals("new_york_city", term.toString());
assertTrue(aptf.incrementToken());
assertEquals("is", term.toString());
assertTrue(aptf.incrementToken());
assertEquals("great", term.toString());
}
示例5: testOverlappingAtEnd
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
@Test
public void testOverlappingAtEnd() throws Exception {
final CharArraySet phraseSets = new CharArraySet(Arrays.asList(
"new york", "new york city", "city of new york"), false);
final String input = "the great city of new york";
StringReader reader = new StringReader(input);
final WhitespaceTokenizer in = new WhitespaceTokenizer();
in.setReader(reader);
AutoPhrasingTokenFilter aptf = new AutoPhrasingTokenFilter(in, phraseSets, false);
aptf.setReplaceWhitespaceWith('_');
CharTermAttribute term = aptf.addAttribute(CharTermAttribute.class);
aptf.reset();
assertTrue(aptf.incrementToken());
assertEquals("the", term.toString());
assertTrue(aptf.incrementToken());
assertEquals("great", term.toString());
assertTrue(aptf.incrementToken());
assertEquals("city_of_new_york", term.toString());
}
示例6: testIncompletePhrase
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
@Test
public void testIncompletePhrase() throws Exception {
final CharArraySet phraseSets = new CharArraySet(Arrays.asList(
"big apple", "new york city", "property tax", "three word phrase"), false);
final String input = "some new york";
StringReader reader = new StringReader(input);
final WhitespaceTokenizer in = new WhitespaceTokenizer();
in.setReader(reader);
AutoPhrasingTokenFilter aptf = new AutoPhrasingTokenFilter(in, phraseSets, false);
aptf.setReplaceWhitespaceWith('_');
CharTermAttribute term = aptf.addAttribute(CharTermAttribute.class);
aptf.reset();
assertTrue(aptf.incrementToken());
assertEquals("some", term.toString());
assertTrue(aptf.incrementToken());
assertEquals("new", term.toString());
assertTrue(aptf.incrementToken());
assertEquals("york", term.toString());
}
示例7: RomanianAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public RomanianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new RomanianAnalyzer(
Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, RomanianAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
);
analyzer.setVersion(version);
}
示例8: BasqueAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public BasqueAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new BasqueAnalyzer(
Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, BasqueAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
);
analyzer.setVersion(version);
}
示例9: StandardHtmlStripAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public StandardHtmlStripAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
CharArraySet stopWords = Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, defaultStopwords);
analyzer = new StandardHtmlStripAnalyzer(stopWords);
analyzer.setVersion(version);
}
示例10: IndonesianAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public IndonesianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new IndonesianAnalyzer(
Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, IndonesianAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
);
analyzer.setVersion(version);
}
示例11: ArabicAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public ArabicAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
arabicAnalyzer = new ArabicAnalyzer(
Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, ArabicAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
);
arabicAnalyzer.setVersion(version);
}
示例12: SnowballAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public SnowballAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
String language = settings.get("language", settings.get("name", "English"));
CharArraySet defaultStopwords = DEFAULT_LANGUAGE_STOPWORDS.getOrDefault(language, CharArraySet.EMPTY_SET);
CharArraySet stopWords = Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, defaultStopwords);
analyzer = new SnowballAnalyzer(language, stopWords);
analyzer.setVersion(version);
}
示例13: StopAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public StopAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
CharArraySet stopWords = Analysis.parseStopWords(
env, indexSettings.getIndexVersionCreated(), settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
this.stopAnalyzer = new StopAnalyzer(stopWords);
this.stopAnalyzer.setVersion(version);
}
示例14: SwedishAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public SwedishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new SwedishAnalyzer(
Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, SwedishAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
);
analyzer.setVersion(version);
}
示例15: StandardAnalyzerProvider
import org.apache.lucene.analysis.CharArraySet; //导入依赖的package包/类
public StandardAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
CharArraySet stopWords = Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, defaultStopwords);
int maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
standardAnalyzer = new StandardAnalyzer(stopWords);
standardAnalyzer.setVersion(version);
standardAnalyzer.setMaxTokenLength(maxTokenLength);
}