当前位置: 首页>>代码示例>>Java>>正文


Java CharArraySet类代码示例

本文整理汇总了Java中org.apache.lucene.analysis.util.CharArraySet的典型用法代码示例。如果您正苦于以下问题:Java CharArraySet类的具体用法?Java CharArraySet怎么用?Java CharArraySet使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


CharArraySet类属于org.apache.lucene.analysis.util包,在下文中一共展示了CharArraySet类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: uniqueStems

import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
/**
 * Find the unique stem(s) of the provided word
 * 
 * @param word Word to find the stems for
 * @return List of stems for the word
 */
public List<CharsRef> uniqueStems(char word[], int length) {
  List<CharsRef> stems = stem(word, length);
  if (stems.size() < 2) {
    return stems;
  }
  CharArraySet terms = new CharArraySet(8, dictionary.ignoreCase);
  List<CharsRef> deduped = new ArrayList<>();
  for (CharsRef s : stems) {
    if (!terms.contains(s)) {
      deduped.add(s);
      terms.add(s);
    }
  }
  return deduped;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:22,代码来源:Stemmer.java

示例2: CapitalizationFilter

import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
/**
 * Creates a CapitalizationFilter with the specified parameters.
 * @param in input tokenstream 
 * @param onlyFirstWord should each word be capitalized or all of the words?
 * @param keep a keep word list.  Each word that should be kept separated by whitespace.
 * @param forceFirstLetter Force the first letter to be capitalized even if it is in the keep list.
 * @param okPrefix do not change word capitalization if a word begins with something in this list.
 * @param minWordLength how long the word needs to be to get capitalization applied.  If the
 *                      minWordLength is 3, "and" > "And" but "or" stays "or".
 * @param maxWordCount if the token contains more then maxWordCount words, the capitalization is
 *                     assumed to be correct.
 * @param maxTokenLength ???
 */
public CapitalizationFilter(TokenStream in, boolean onlyFirstWord, CharArraySet keep, 
    boolean forceFirstLetter, Collection<char[]> okPrefix, int minWordLength, 
    int maxWordCount, int maxTokenLength) {
  super(in);
  this.onlyFirstWord = onlyFirstWord;
  this.keep = keep;
  this.forceFirstLetter = forceFirstLetter;
  this.okPrefix = okPrefix;
  if (minWordLength < 0) {
    throw new IllegalArgumentException("minWordLength must be greater than or equal to zero");
  }
  if (maxWordCount < 1) {
    throw new IllegalArgumentException("maxWordCount must be greater than zero");
  }
  if (maxTokenLength < 1) {
    throw new IllegalArgumentException("maxTokenLength must be greater than zero");
  }
  this.minWordLength = minWordLength;
  this.maxWordCount = maxWordCount;
  this.maxTokenLength = maxTokenLength;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:35,代码来源:CapitalizationFilter.java

示例3: PatternAnalyzerProvider

import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
@Inject
public PatternAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);

    Version esVersion = Version.indexCreated(indexSettingsService.getSettings());
    final CharArraySet defaultStopwords;
    if (esVersion.onOrAfter(Version.V_1_0_0_RC1)) {
        defaultStopwords = CharArraySet.EMPTY_SET;
    } else {
        defaultStopwords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
    }
    boolean lowercase = settings.getAsBoolean("lowercase", true);
    CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);

    String sPattern = settings.get("pattern", "\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/);
    if (sPattern == null) {
        throw new IllegalArgumentException("Analyzer [" + name + "] of type pattern must have a `pattern` set");
    }
    Pattern pattern = Regex.compile(sPattern, settings.get("flags"));

    analyzer = new PatternAnalyzer(pattern, lowercase, stopWords);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:23,代码来源:PatternAnalyzerProvider.java

示例4: Lucene43CompoundWordTokenFilterBase

import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
protected Lucene43CompoundWordTokenFilterBase(TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
  super(input);
  this.tokens=new LinkedList<>();
  if (minWordSize < 0) {
    throw new IllegalArgumentException("minWordSize cannot be negative");
  }
  this.minWordSize=minWordSize;
  if (minSubwordSize < 0) {
    throw new IllegalArgumentException("minSubwordSize cannot be negative");
  }
  this.minSubwordSize=minSubwordSize;
  if (maxSubwordSize < 0) {
    throw new IllegalArgumentException("maxSubwordSize cannot be negative");
  }
  this.maxSubwordSize=maxSubwordSize;
  this.onlyLongestMatch=onlyLongestMatch;
  this.dictionary = dictionary;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:19,代码来源:Lucene43CompoundWordTokenFilterBase.java

示例5: resolveNamedWords

import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
private static CharArraySet resolveNamedWords(Collection<String> words, Map<String, Set<?>> namedWords, boolean ignoreCase) {
    if (namedWords == null) {
        return new CharArraySet(words, ignoreCase);
    }
    CharArraySet setWords = new CharArraySet(words.size(), ignoreCase);
    for (String word : words) {
        if (namedWords.containsKey(word)) {
            setWords.addAll(namedWords.get(word));
        } else {
            setWords.add(word);
        }
    }
    return setWords;
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:15,代码来源:Analysis.java

示例6: FinnishAnalyzer

import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
/**
 * @deprecated Use {@link #FinnishAnalyzer(CharArraySet,CharArraySet)}
 */
@Deprecated
public FinnishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(matchVersion, stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
      matchVersion, stemExclusionSet));
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:10,代码来源:FinnishAnalyzer.java

示例7: EnglishAnalyzerProvider

import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
@Inject
public EnglishAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    analyzer = new EnglishAnalyzer(Analysis.parseStopWords(env, settings, EnglishAnalyzer.getDefaultStopSet()),
                                   Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:8,代码来源:EnglishAnalyzerProvider.java

示例8: SwedishAnalyzerProvider

import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
@Inject
public SwedishAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    analyzer = new SwedishAnalyzer(Analysis.parseStopWords(env, settings, SwedishAnalyzer.getDefaultStopSet()),
                                   Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:8,代码来源:SwedishAnalyzerProvider.java

示例9: HungarianAnalyzerProvider

import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
@Inject
public HungarianAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    analyzer = new HungarianAnalyzer(Analysis.parseStopWords(env, settings, HungarianAnalyzer.getDefaultStopSet()),
                                     Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:8,代码来源:HungarianAnalyzerProvider.java

示例10: EnglishAnalyzer

import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
/**
 * @deprecated Use {@link #EnglishAnalyzer(CharArraySet,CharArraySet)}
 */
@Deprecated
public EnglishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(matchVersion, stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
      matchVersion, stemExclusionSet));
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:10,代码来源:EnglishAnalyzer.java

示例11: LatvianAnalyzer

import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
/**
 * @deprecated Use {@link #LatvianAnalyzer(CharArraySet,CharArraySet)}
 */
@Deprecated
public LatvianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(matchVersion, stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
      matchVersion, stemExclusionSet));
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:10,代码来源:LatvianAnalyzer.java

示例12: CzechAnalyzerProvider

import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
@Inject
public CzechAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    analyzer = new CzechAnalyzer(Analysis.parseStopWords(env, settings, CzechAnalyzer.getDefaultStopSet()),
                                 Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:8,代码来源:CzechAnalyzerProvider.java

示例13: getStopFilter

import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
private TokenStream getStopFilter(String lang, Set<String> metadataStopWords, TokenStream stream) {

        if (metadataStopWords != null && !metadataStopWords.isEmpty()) {
            return new StopFilter(stream, new CharArraySet(metadataStopWords, false));

        } else {
            try {
                InputStream in = ClassLoader.getSystemResourceAsStream(lang.toLowerCase() + ".stopwords");
                if (in != null) {
                    logger.debug("Loading Stop words for lang={}", lang);
                    CharArraySet stopWords = new CharArraySet(30, true);
                    try (BufferedReader bin = new BufferedReader(new InputStreamReader(in))) {
                        String line;
                        String[] parts;
                        while ((line = bin.readLine()) != null) {
                            parts = line.split(Pattern.quote("|"));
                            line = parts[0].trim();

                            if (line.length() > 0) {
                                stopWords.add(line);
                            }
                        }
                        return new StopFilter(stream, stopWords);
                    }
                } else {
                    logger.warn("No stop words found for lang={}", lang);
                }
            } catch (Exception e) {
                logger.error("Error creating stop filter for lang={}", lang, e);
            }
        }

        return stream;
    }
 
开发者ID:Lambda-3,项目名称:Indra,代码行数:35,代码来源:IndraAnalyzer.java

示例14: RomanianAnalyzerProvider

import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
@Inject
public RomanianAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    analyzer = new RomanianAnalyzer(Analysis.parseStopWords(env, settings, RomanianAnalyzer.getDefaultStopSet()),
                                    Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:8,代码来源:RomanianAnalyzerProvider.java

示例15: BulgarianAnalyzerProvider

import org.apache.lucene.analysis.util.CharArraySet; //导入依赖的package包/类
@Inject
public BulgarianAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    analyzer = new BulgarianAnalyzer(Analysis.parseStopWords(env, settings, BulgarianAnalyzer.getDefaultStopSet()),
                                     Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:8,代码来源:BulgarianAnalyzerProvider.java


注:本文中的org.apache.lucene.analysis.util.CharArraySet类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。