本文整理汇总了Java中org.apache.lucene.analysis.util.CharArraySet类的典型用法代码示例。如果您正苦于以下问题:Java CharArraySet类的具体用法?Java CharArraySet怎么用?Java CharArraySet使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
CharArraySet类属于org.apache.lucene.analysis.util包,在下文中一共展示了CharArraySet类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: uniqueStems
import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
/**
* Find the unique stem(s) of the provided word
*
* @param word Word to find the stems for
* @return List of stems for the word
*/
public List<CharsRef> uniqueStems(char word[], int length) {
List<CharsRef> stems = stem(word, length);
if (stems.size() < 2) {
return stems;
}
CharArraySet terms = new CharArraySet(8, dictionary.ignoreCase);
List<CharsRef> deduped = new ArrayList<>();
for (CharsRef s : stems) {
if (!terms.contains(s)) {
deduped.add(s);
terms.add(s);
}
}
return deduped;
}
示例2: CapitalizationFilter
import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
/**
* Creates a CapitalizationFilter with the specified parameters.
* @param in input tokenstream
* @param onlyFirstWord should each word be capitalized or all of the words?
* @param keep a keep word list. Each word that should be kept separated by whitespace.
* @param forceFirstLetter Force the first letter to be capitalized even if it is in the keep list.
* @param okPrefix do not change word capitalization if a word begins with something in this list.
* @param minWordLength how long the word needs to be to get capitalization applied. If the
* minWordLength is 3, "and" > "And" but "or" stays "or".
* @param maxWordCount if the token contains more then maxWordCount words, the capitalization is
* assumed to be correct.
* @param maxTokenLength ???
*/
public CapitalizationFilter(TokenStream in, boolean onlyFirstWord, CharArraySet keep,
boolean forceFirstLetter, Collection<char[]> okPrefix, int minWordLength,
int maxWordCount, int maxTokenLength) {
super(in);
this.onlyFirstWord = onlyFirstWord;
this.keep = keep;
this.forceFirstLetter = forceFirstLetter;
this.okPrefix = okPrefix;
if (minWordLength < 0) {
throw new IllegalArgumentException("minWordLength must be greater than or equal to zero");
}
if (maxWordCount < 1) {
throw new IllegalArgumentException("maxWordCount must be greater than zero");
}
if (maxTokenLength < 1) {
throw new IllegalArgumentException("maxTokenLength must be greater than zero");
}
this.minWordLength = minWordLength;
this.maxWordCount = maxWordCount;
this.maxTokenLength = maxTokenLength;
}
示例3: PatternAnalyzerProvider
import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
@Inject
public PatternAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettingsService.getSettings(), name, settings);
Version esVersion = Version.indexCreated(indexSettingsService.getSettings());
final CharArraySet defaultStopwords;
if (esVersion.onOrAfter(Version.V_1_0_0_RC1)) {
defaultStopwords = CharArraySet.EMPTY_SET;
} else {
defaultStopwords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
}
boolean lowercase = settings.getAsBoolean("lowercase", true);
CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
String sPattern = settings.get("pattern", "\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/);
if (sPattern == null) {
throw new IllegalArgumentException("Analyzer [" + name + "] of type pattern must have a `pattern` set");
}
Pattern pattern = Regex.compile(sPattern, settings.get("flags"));
analyzer = new PatternAnalyzer(pattern, lowercase, stopWords);
}
示例4: Lucene43CompoundWordTokenFilterBase
import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
protected Lucene43CompoundWordTokenFilterBase(TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
super(input);
this.tokens=new LinkedList<>();
if (minWordSize < 0) {
throw new IllegalArgumentException("minWordSize cannot be negative");
}
this.minWordSize=minWordSize;
if (minSubwordSize < 0) {
throw new IllegalArgumentException("minSubwordSize cannot be negative");
}
this.minSubwordSize=minSubwordSize;
if (maxSubwordSize < 0) {
throw new IllegalArgumentException("maxSubwordSize cannot be negative");
}
this.maxSubwordSize=maxSubwordSize;
this.onlyLongestMatch=onlyLongestMatch;
this.dictionary = dictionary;
}
示例5: resolveNamedWords
import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
private static CharArraySet resolveNamedWords(Collection<String> words, Map<String, Set<?>> namedWords, boolean ignoreCase) {
if (namedWords == null) {
return new CharArraySet(words, ignoreCase);
}
CharArraySet setWords = new CharArraySet(words.size(), ignoreCase);
for (String word : words) {
if (namedWords.containsKey(word)) {
setWords.addAll(namedWords.get(word));
} else {
setWords.add(word);
}
}
return setWords;
}
示例6: FinnishAnalyzer
import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
/**
* @deprecated Use {@link #FinnishAnalyzer(CharArraySet,CharArraySet)}
*/
@Deprecated
public FinnishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));
}
示例7: EnglishAnalyzerProvider
import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
@Inject
public EnglishAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettingsService.getSettings(), name, settings);
analyzer = new EnglishAnalyzer(Analysis.parseStopWords(env, settings, EnglishAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
analyzer.setVersion(version);
}
示例8: SwedishAnalyzerProvider
import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
@Inject
public SwedishAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettingsService.getSettings(), name, settings);
analyzer = new SwedishAnalyzer(Analysis.parseStopWords(env, settings, SwedishAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
analyzer.setVersion(version);
}
示例9: HungarianAnalyzerProvider
import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
@Inject
public HungarianAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettingsService.getSettings(), name, settings);
analyzer = new HungarianAnalyzer(Analysis.parseStopWords(env, settings, HungarianAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
analyzer.setVersion(version);
}
示例10: EnglishAnalyzer
import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
/**
* @deprecated Use {@link #EnglishAnalyzer(CharArraySet,CharArraySet)}
*/
@Deprecated
public EnglishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));
}
示例11: LatvianAnalyzer
import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
/**
* @deprecated Use {@link #LatvianAnalyzer(CharArraySet,CharArraySet)}
*/
@Deprecated
public LatvianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
super(matchVersion, stopwords);
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
matchVersion, stemExclusionSet));
}
示例12: CzechAnalyzerProvider
import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
@Inject
public CzechAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettingsService.getSettings(), name, settings);
analyzer = new CzechAnalyzer(Analysis.parseStopWords(env, settings, CzechAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
analyzer.setVersion(version);
}
示例13: getStopFilter
import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
private TokenStream getStopFilter(String lang, Set<String> metadataStopWords, TokenStream stream) {
if (metadataStopWords != null && !metadataStopWords.isEmpty()) {
return new StopFilter(stream, new CharArraySet(metadataStopWords, false));
} else {
try {
InputStream in = ClassLoader.getSystemResourceAsStream(lang.toLowerCase() + ".stopwords");
if (in != null) {
logger.debug("Loading Stop words for lang={}", lang);
CharArraySet stopWords = new CharArraySet(30, true);
try (BufferedReader bin = new BufferedReader(new InputStreamReader(in))) {
String line;
String[] parts;
while ((line = bin.readLine()) != null) {
parts = line.split(Pattern.quote("|"));
line = parts[0].trim();
if (line.length() > 0) {
stopWords.add(line);
}
}
return new StopFilter(stream, stopWords);
}
} else {
logger.warn("No stop words found for lang={}", lang);
}
} catch (Exception e) {
logger.error("Error creating stop filter for lang={}", lang, e);
}
}
return stream;
}
示例14: RomanianAnalyzerProvider
import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
@Inject
public RomanianAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettingsService.getSettings(), name, settings);
analyzer = new RomanianAnalyzer(Analysis.parseStopWords(env, settings, RomanianAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
analyzer.setVersion(version);
}
示例15: BulgarianAnalyzerProvider
import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
@Inject
public BulgarianAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettingsService.getSettings(), name, settings);
analyzer = new BulgarianAnalyzer(Analysis.parseStopWords(env, settings, BulgarianAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
analyzer.setVersion(version);
}