当前位置: 首页>>代码示例>>Java>>正文


Java StopAnalyzer类代码示例

本文整理汇总了Java中org.apache.lucene.analysis.core.StopAnalyzer的典型用法代码示例。如果您正苦于以下问题:Java StopAnalyzer类的具体用法?Java StopAnalyzer怎么用?Java StopAnalyzer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


StopAnalyzer类属于org.apache.lucene.analysis.core包,在下文中一共展示了StopAnalyzer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: StandardAnalyzerProvider

import org.apache.lucene.analysis.core.StopAnalyzer; //导入依赖的package包/类
public StandardAnalyzerProvider(Index index, Settings indexSettings, Environment env, String name, Settings settings) {
    super(index, indexSettings, name, settings);
    this.esVersion = Version.indexCreated(indexSettings);
    final CharArraySet defaultStopwords;
    if (esVersion.onOrAfter(Version.V_1_0_0_Beta1)) {
        defaultStopwords = CharArraySet.EMPTY_SET;
    } else {
        defaultStopwords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
    }

    CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
    int maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
    standardAnalyzer = new StandardAnalyzer(stopWords);
    standardAnalyzer.setVersion(version);
    standardAnalyzer.setMaxTokenLength(maxTokenLength);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:17,代码来源:StandardAnalyzerProvider.java

示例2: PatternAnalyzerProvider

import org.apache.lucene.analysis.core.StopAnalyzer; //导入依赖的package包/类
@Inject
public PatternAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);

    Version esVersion = Version.indexCreated(indexSettingsService.getSettings());
    final CharArraySet defaultStopwords;
    if (esVersion.onOrAfter(Version.V_1_0_0_RC1)) {
        defaultStopwords = CharArraySet.EMPTY_SET;
    } else {
        defaultStopwords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
    }
    boolean lowercase = settings.getAsBoolean("lowercase", true);
    CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);

    String sPattern = settings.get("pattern", "\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/);
    if (sPattern == null) {
        throw new IllegalArgumentException("Analyzer [" + name + "] of type pattern must have a `pattern` set");
    }
    Pattern pattern = Regex.compile(sPattern, settings.get("flags"));

    analyzer = new PatternAnalyzer(pattern, lowercase, stopWords);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:23,代码来源:PatternAnalyzerProvider.java

示例3: testRandomStrings

import org.apache.lucene.analysis.core.StopAnalyzer; //导入依赖的package包/类
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  Analyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
  
  // dodge jre bug http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7104012
  final UncaughtExceptionHandler savedHandler = Thread.getDefaultUncaughtExceptionHandler();
  Thread.setDefaultUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
    @Override
    public void uncaughtException(Thread thread, Throwable throwable) {
      assumeTrue("not failing due to jre bug ", !isJREBug7104012(throwable));
      // otherwise its some other bug, pass to default handler
      savedHandler.uncaughtException(thread, throwable);
    }
  });
  
  try {
    Thread.getDefaultUncaughtExceptionHandler();
    checkRandomData(random(), a, 10000*RANDOM_MULTIPLIER);
  } catch (ArrayIndexOutOfBoundsException ex) {
    assumeTrue("not failing due to jre bug ", !isJREBug7104012(ex));
    throw ex; // otherwise rethrow
  } finally {
    Thread.setDefaultUncaughtExceptionHandler(savedHandler);
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:26,代码来源:PatternAnalyzerTest.java

示例4: inform

import org.apache.lucene.analysis.core.StopAnalyzer; //导入依赖的package包/类
@Override
public void inform(ResourceLoader loader) throws IOException {
  String stopWordFiles = args.get("words");
  ignoreCase = getBoolean("ignoreCase",false);
  enablePositionIncrements = getBoolean("enablePositionIncrements",false);

  if (stopWordFiles != null) {
    if ("snowball".equalsIgnoreCase(args.get("format"))) {
      stopWords = getSnowballWordSet(loader, stopWordFiles, ignoreCase);
    } else {
      stopWords = getWordSet(loader, stopWordFiles, ignoreCase);
    }
  } else {
    stopWords = new CharArraySet(luceneMatchVersion, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
  }
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:17,代码来源:StopFilterFactory.java

示例5: main

import org.apache.lucene.analysis.core.StopAnalyzer; //导入依赖的package包/类
public static void main(String[] args) throws IOException {

    String theSentence =
        "this is the scientific article about chemicals like H20 C2H50H with concentration "
            + "of 3.99 kilograms and 0,123 micrograms also i have some CO2 gas n=3 x=45";
    StringReader reader = new StringReader(theSentence);
    Tokenizer whitespaceTokenizer = new WhitespaceTokenizer(reader);
    TokenStream tokenStream =
        new StopFilter(whitespaceTokenizer, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
    tokenStream = new ScientificFiltering(tokenStream);

    final CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
    tokenStream.reset();

    while (tokenStream.incrementToken()) {
      System.out.println(charTermAttribute.toString());
    }

    tokenStream.end();
    tokenStream.close();
  }
 
开发者ID:MysterionRise,项目名称:information-retrieval-adventure,代码行数:22,代码来源:SkippingNumbersPreservingChemicals.java

示例6: StopAnalyzerProvider

import org.apache.lucene.analysis.core.StopAnalyzer; //导入依赖的package包/类
public StopAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    CharArraySet stopWords = Analysis.parseStopWords(
        env, indexSettings.getIndexVersionCreated(), settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
    this.stopAnalyzer = new StopAnalyzer(stopWords);
    this.stopAnalyzer.setVersion(version);
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:8,代码来源:StopAnalyzerProvider.java

示例7: StopTokenFilterFactory

import org.apache.lucene.analysis.core.StopAnalyzer; //导入依赖的package包/类
public StopTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    this.ignoreCase =
        settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(), "ignore_case", false, deprecationLogger);
    this.removeTrailing = settings.getAsBooleanLenientForPreEs6Indices(
        indexSettings.getIndexVersionCreated(), "remove_trailing", true, deprecationLogger);
    this.stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
    if (settings.get("enable_position_increments") != null) {
        throw new IllegalArgumentException("enable_position_increments is not supported anymore. Please fix your analysis chain");
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:12,代码来源:StopTokenFilterFactory.java

示例8: testNonWordPattern

import org.apache.lucene.analysis.core.StopAnalyzer; //导入依赖的package包/类
/**
 * Test PatternAnalyzer when it is configured with a non-word pattern.
 */
public void testNonWordPattern() throws IOException {
  // Split on non-letter pattern, do not lowercase, no stopwords
  PatternAnalyzer a = new PatternAnalyzer(Pattern.compile("\\W+"), false, null);
  assertAnalyzesTo(a, "The quick brown Fox,the abcd1234 (56.78) dc.",
                      new String[] { "The", "quick", "brown", "Fox", "the", "abcd1234", "56", "78", "dc" });

  // split on non-letter pattern, lowercase, english stopwords
  PatternAnalyzer b = new PatternAnalyzer(Pattern.compile("\\W+"), true,
                                          StopAnalyzer.ENGLISH_STOP_WORDS_SET);
  assertAnalyzesTo(b, "The quick brown Fox,the abcd1234 (56.78) dc.",
                       new String[] { "quick", "brown", "fox", "abcd1234", "56", "78", "dc" });
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:16,代码来源:PatternAnalyzerTests.java

示例9: testWhitespacePattern

import org.apache.lucene.analysis.core.StopAnalyzer; //导入依赖的package包/类
/**
 * Test PatternAnalyzer when it is configured with a whitespace pattern.
 * Behavior can be similar to WhitespaceAnalyzer (depending upon options)
 */
public void testWhitespacePattern() throws IOException {
  // Split on whitespace patterns, do not lowercase, no stopwords
  PatternAnalyzer a = new PatternAnalyzer(Pattern.compile("\\s+"), false, null);
  assertAnalyzesTo(a, "The quick brown Fox,the abcd1234 (56.78) dc.",
                      new String[] { "The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc." });

  // Split on whitespace patterns, lowercase, english stopwords
  PatternAnalyzer b = new PatternAnalyzer(Pattern.compile("\\s+"), true,
                                          StopAnalyzer.ENGLISH_STOP_WORDS_SET);
  assertAnalyzesTo(b, "The quick brown Fox,the abcd1234 (56.78) dc.",
                       new String[] { "quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc." });
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:17,代码来源:PatternAnalyzerTests.java

示例10: testCustomPattern

import org.apache.lucene.analysis.core.StopAnalyzer; //导入依赖的package包/类
/**
 * Test PatternAnalyzer when it is configured with a custom pattern. In this
 * case, text is tokenized on the comma ","
 */
public void testCustomPattern() throws IOException {
  // Split on comma, do not lowercase, no stopwords
  PatternAnalyzer a = new PatternAnalyzer(Pattern.compile(","), false, null);
  assertAnalyzesTo(a, "Here,Are,some,Comma,separated,words,",
                       new String[] { "Here", "Are", "some", "Comma", "separated", "words" });

  // split on comma, lowercase, english stopwords
  PatternAnalyzer b = new PatternAnalyzer(Pattern.compile(","), true,
                                           StopAnalyzer.ENGLISH_STOP_WORDS_SET);
  assertAnalyzesTo(b, "Here,Are,some,Comma,separated,words,",
                       new String[] { "here", "some", "comma", "separated", "words" });
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:17,代码来源:PatternAnalyzerTests.java

示例11: inform

import org.apache.lucene.analysis.core.StopAnalyzer; //导入依赖的package包/类
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (commonWordFiles != null) {
    if ("snowball".equalsIgnoreCase(format)) {
      commonWords = getSnowballWordSet(loader, commonWordFiles, ignoreCase);
    } else {
      commonWords = getWordSet(loader, commonWordFiles, ignoreCase);
    }
  } else {
    commonWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:13,代码来源:CommonGramsFilterFactory.java

示例12: StandardHtmlStripAnalyzerProvider

import org.apache.lucene.analysis.core.StopAnalyzer; //导入依赖的package包/类
@Inject
public StandardHtmlStripAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env,  @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    this.esVersion = Version.indexCreated(indexSettingsService.getSettings());
    final CharArraySet defaultStopwords;
    if (esVersion.onOrAfter(Version.V_1_0_0_RC1)) {
        defaultStopwords = CharArraySet.EMPTY_SET;
    } else {
        defaultStopwords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
    }
    CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
    analyzer = new StandardHtmlStripAnalyzer(stopWords);
    analyzer.setVersion(version);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:15,代码来源:StandardHtmlStripAnalyzerProvider.java

示例13: StopAnalyzerProvider

import org.apache.lucene.analysis.core.StopAnalyzer; //导入依赖的package包/类
@Inject
public StopAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    CharArraySet stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
    this.stopAnalyzer = new StopAnalyzer(stopWords);
    this.stopAnalyzer.setVersion(version);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:8,代码来源:StopAnalyzerProvider.java

示例14: StopTokenFilterFactory

import org.apache.lucene.analysis.core.StopAnalyzer; //导入依赖的package包/类
@Inject
public StopTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    this.ignoreCase = settings.getAsBoolean("ignore_case", false);
    this.removeTrailing = settings.getAsBoolean("remove_trailing", true);
    this.stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
    if (version.onOrAfter(Version.LUCENE_4_4) && settings.get("enable_position_increments") != null) {
        throw new IllegalArgumentException("enable_position_increments is not supported anymore as of Lucene 4.4 as it can create broken token streams."
                + " Please fix your analysis chain or use an older compatibility version (<= 4.3).");
    }
    this.enablePositionIncrements = settings.getAsBoolean("enable_position_increments", true);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:13,代码来源:StopTokenFilterFactory.java

示例15: testPositionIncrements

import org.apache.lucene.analysis.core.StopAnalyzer; //导入依赖的package包/类
public void testPositionIncrements() throws Exception {
  final ThaiAnalyzer analyzer = new ThaiAnalyzer(StopAnalyzer.ENGLISH_STOP_WORDS_SET);
  assertAnalyzesTo(analyzer, "การที่ได้ต้อง the แสดงว่างานดี", 
      new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
      new int[] { 0, 3, 6, 9, 18, 22, 25, 28 },
      new int[] { 3, 6, 9, 13, 22, 25, 28, 30 },
      new int[] { 1, 1, 1, 1, 2, 1, 1, 1 });

  // case that a stopword is adjacent to thai text, with no whitespace
  assertAnalyzesTo(analyzer, "การที่ได้ต้องthe แสดงว่างานดี", 
      new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
      new int[] { 0, 3, 6, 9, 17, 21, 24, 27 },
      new int[] { 3, 6, 9, 13, 21, 24, 27, 29 },
      new int[] { 1, 1, 1, 1, 2, 1, 1, 1 });
}
 
开发者ID:europeana,项目名称:search,代码行数:16,代码来源:TestThaiAnalyzer.java


注:本文中的org.apache.lucene.analysis.core.StopAnalyzer类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。