Java StopAnalyzer.ENGLISH_STOP_WORDS_SET属性代码示例

本文整理汇总了Java中org.apache.lucene.analysis.core.StopAnalyzer.ENGLISH_STOP_WORDS_SET属性的典型用法代码示例。如果您正苦于以下问题：Java StopAnalyzer.ENGLISH_STOP_WORDS_SET属性的具体用法？Java StopAnalyzer.ENGLISH_STOP_WORDS_SET怎么用？Java StopAnalyzer.ENGLISH_STOP_WORDS_SET使用的例子？那么恭喜您, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类org.apache.lucene.analysis.core.StopAnalyzer的用法示例。

在下文中一共展示了StopAnalyzer.ENGLISH_STOP_WORDS_SET属性的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: StandardAnalyzerProvider

public StandardAnalyzerProvider(Index index, Settings indexSettings, Environment env, String name, Settings settings) {
    super(index, indexSettings, name, settings);
    this.esVersion = Version.indexCreated(indexSettings);
    final CharArraySet defaultStopwords;
    if (esVersion.onOrAfter(Version.V_1_0_0_Beta1)) {
        defaultStopwords = CharArraySet.EMPTY_SET;
    } else {
        defaultStopwords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
    }

    CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
    int maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
    standardAnalyzer = new StandardAnalyzer(stopWords);
    standardAnalyzer.setVersion(version);
    standardAnalyzer.setMaxTokenLength(maxTokenLength);
}

开发者ID:baidu，项目名称:Elasticsearch，代码行数:16，代码来源:StandardAnalyzerProvider.java

示例2: PatternAnalyzerProvider

@Inject
public PatternAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);

    Version esVersion = Version.indexCreated(indexSettingsService.getSettings());
    final CharArraySet defaultStopwords;
    if (esVersion.onOrAfter(Version.V_1_0_0_RC1)) {
        defaultStopwords = CharArraySet.EMPTY_SET;
    } else {
        defaultStopwords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
    }
    boolean lowercase = settings.getAsBoolean("lowercase", true);
    CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);

    String sPattern = settings.get("pattern", "\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/);
    if (sPattern == null) {
        throw new IllegalArgumentException("Analyzer [" + name + "] of type pattern must have a `pattern` set");
    }
    Pattern pattern = Regex.compile(sPattern, settings.get("flags"));

    analyzer = new PatternAnalyzer(pattern, lowercase, stopWords);
}

开发者ID:baidu，项目名称:Elasticsearch，代码行数:22，代码来源:PatternAnalyzerProvider.java

示例3: testRandomStrings

/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  Analyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
  
  // dodge jre bug http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7104012
  final UncaughtExceptionHandler savedHandler = Thread.getDefaultUncaughtExceptionHandler();
  Thread.setDefaultUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
    @Override
    public void uncaughtException(Thread thread, Throwable throwable) {
      assumeTrue("not failing due to jre bug ", !isJREBug7104012(throwable));
      // otherwise its some other bug, pass to default handler
      savedHandler.uncaughtException(thread, throwable);
    }
  });
  
  try {
    Thread.getDefaultUncaughtExceptionHandler();
    checkRandomData(random(), a, 10000*RANDOM_MULTIPLIER);
  } catch (ArrayIndexOutOfBoundsException ex) {
    assumeTrue("not failing due to jre bug ", !isJREBug7104012(ex));
    throw ex; // otherwise rethrow
  } finally {
    Thread.setDefaultUncaughtExceptionHandler(savedHandler);
  }
}

开发者ID:europeana，项目名称:search，代码行数:25，代码来源:PatternAnalyzerTest.java

示例4: inform

@Override
public void inform(ResourceLoader loader) throws IOException {
  String stopWordFiles = args.get("words");
  ignoreCase = getBoolean("ignoreCase",false);
  enablePositionIncrements = getBoolean("enablePositionIncrements",false);

  if (stopWordFiles != null) {
    if ("snowball".equalsIgnoreCase(args.get("format"))) {
      stopWords = getSnowballWordSet(loader, stopWordFiles, ignoreCase);
    } else {
      stopWords = getWordSet(loader, stopWordFiles, ignoreCase);
    }
  } else {
    stopWords = new CharArraySet(luceneMatchVersion, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
  }
}

开发者ID:pkarmstr，项目名称:NYBC，代码行数:16，代码来源:StopFilterFactory.java

示例5: main

public static void main(String[] args) throws IOException {

    String theSentence =
        "this is the scientific article about chemicals like H20 C2H50H with concentration "
            + "of 3.99 kilograms and 0,123 micrograms also i have some CO2 gas n=3 x=45";
    StringReader reader = new StringReader(theSentence);
    Tokenizer whitespaceTokenizer = new WhitespaceTokenizer(reader);
    TokenStream tokenStream =
        new StopFilter(whitespaceTokenizer, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
    tokenStream = new ScientificFiltering(tokenStream);

    final CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
    tokenStream.reset();

    while (tokenStream.incrementToken()) {
      System.out.println(charTermAttribute.toString());
    }

    tokenStream.end();
    tokenStream.close();
  }

开发者ID:MysterionRise，项目名称:information-retrieval-adventure，代码行数:21，代码来源:SkippingNumbersPreservingChemicals.java

示例6: testNonWordPattern

/**
 * Test PatternAnalyzer when it is configured with a non-word pattern.
 */
public void testNonWordPattern() throws IOException {
  // Split on non-letter pattern, do not lowercase, no stopwords
  PatternAnalyzer a = new PatternAnalyzer(Pattern.compile("\\W+"), false, null);
  assertAnalyzesTo(a, "The quick brown Fox,the abcd1234 (56.78) dc.",
                      new String[] { "The", "quick", "brown", "Fox", "the", "abcd1234", "56", "78", "dc" });

  // split on non-letter pattern, lowercase, english stopwords
  PatternAnalyzer b = new PatternAnalyzer(Pattern.compile("\\W+"), true,
                                          StopAnalyzer.ENGLISH_STOP_WORDS_SET);
  assertAnalyzesTo(b, "The quick brown Fox,the abcd1234 (56.78) dc.",
                       new String[] { "quick", "brown", "fox", "abcd1234", "56", "78", "dc" });
}

开发者ID:justor，项目名称:elasticsearch_my，代码行数:15，代码来源:PatternAnalyzerTests.java

示例7: testWhitespacePattern

/**
 * Test PatternAnalyzer when it is configured with a whitespace pattern.
 * Behavior can be similar to WhitespaceAnalyzer (depending upon options)
 */
public void testWhitespacePattern() throws IOException {
  // Split on whitespace patterns, do not lowercase, no stopwords
  PatternAnalyzer a = new PatternAnalyzer(Pattern.compile("\\s+"), false, null);
  assertAnalyzesTo(a, "The quick brown Fox,the abcd1234 (56.78) dc.",
                      new String[] { "The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc." });

  // Split on whitespace patterns, lowercase, english stopwords
  PatternAnalyzer b = new PatternAnalyzer(Pattern.compile("\\s+"), true,
                                          StopAnalyzer.ENGLISH_STOP_WORDS_SET);
  assertAnalyzesTo(b, "The quick brown Fox,the abcd1234 (56.78) dc.",
                       new String[] { "quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc." });
}

开发者ID:justor，项目名称:elasticsearch_my，代码行数:16，代码来源:PatternAnalyzerTests.java

示例8: testCustomPattern

/**
 * Test PatternAnalyzer when it is configured with a custom pattern. In this
 * case, text is tokenized on the comma ","
 */
public void testCustomPattern() throws IOException {
  // Split on comma, do not lowercase, no stopwords
  PatternAnalyzer a = new PatternAnalyzer(Pattern.compile(","), false, null);
  assertAnalyzesTo(a, "Here,Are,some,Comma,separated,words,",
                       new String[] { "Here", "Are", "some", "Comma", "separated", "words" });

  // split on comma, lowercase, english stopwords
  PatternAnalyzer b = new PatternAnalyzer(Pattern.compile(","), true,
                                           StopAnalyzer.ENGLISH_STOP_WORDS_SET);
  assertAnalyzesTo(b, "Here,Are,some,Comma,separated,words,",
                       new String[] { "here", "some", "comma", "separated", "words" });
}

开发者ID:justor，项目名称:elasticsearch_my，代码行数:16，代码来源:PatternAnalyzerTests.java

示例9: inform

@Override
public void inform(ResourceLoader loader) throws IOException {
  if (commonWordFiles != null) {
    if ("snowball".equalsIgnoreCase(format)) {
      commonWords = getSnowballWordSet(loader, commonWordFiles, ignoreCase);
    } else {
      commonWords = getWordSet(loader, commonWordFiles, ignoreCase);
    }
  } else {
    commonWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
  }
}

开发者ID:lamsfoundation，项目名称:lams，代码行数:12，代码来源:CommonGramsFilterFactory.java

示例10: StandardHtmlStripAnalyzerProvider

@Inject
public StandardHtmlStripAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env,  @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    this.esVersion = Version.indexCreated(indexSettingsService.getSettings());
    final CharArraySet defaultStopwords;
    if (esVersion.onOrAfter(Version.V_1_0_0_RC1)) {
        defaultStopwords = CharArraySet.EMPTY_SET;
    } else {
        defaultStopwords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
    }
    CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
    analyzer = new StandardHtmlStripAnalyzer(stopWords);
    analyzer.setVersion(version);
}

开发者ID:baidu，项目名称:Elasticsearch，代码行数:14，代码来源:StandardHtmlStripAnalyzerProvider.java

示例11: testPositionIncrements

public void testPositionIncrements() throws Exception {
  final ThaiAnalyzer analyzer = new ThaiAnalyzer(StopAnalyzer.ENGLISH_STOP_WORDS_SET);
  assertAnalyzesTo(analyzer, "การที่ได้ต้อง the แสดงว่างานดี", 
      new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
      new int[] { 0, 3, 6, 9, 18, 22, 25, 28 },
      new int[] { 3, 6, 9, 13, 22, 25, 28, 30 },
      new int[] { 1, 1, 1, 1, 2, 1, 1, 1 });

  // case that a stopword is adjacent to thai text, with no whitespace
  assertAnalyzesTo(analyzer, "การที่ได้ต้องthe แสดงว่างานดี", 
      new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
      new int[] { 0, 3, 6, 9, 17, 21, 24, 27 },
      new int[] { 3, 6, 9, 13, 21, 24, 27, 29 },
      new int[] { 1, 1, 1, 1, 2, 1, 1, 1 });
}

开发者ID:europeana，项目名称:search，代码行数:15，代码来源:TestThaiAnalyzer.java

示例12: testNonWordPattern

/**
 * Test PatternAnalyzer when it is configured with a non-word pattern.
 * Behavior can be similar to SimpleAnalyzer (depending upon options)
 */
public void testNonWordPattern() throws IOException {
  // Split on non-letter pattern, do not lowercase, no stopwords
  PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
      false, null);
  check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
      "The", "quick", "brown", "Fox", "the", "abcd", "dc" });

  // split on non-letter pattern, lowercase, english stopwords
  PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
      true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
  check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
      "quick", "brown", "fox", "abcd", "dc" });
}

开发者ID:europeana，项目名称:search，代码行数:17，代码来源:PatternAnalyzerTest.java

示例13: testWhitespacePattern

/**
 * Test PatternAnalyzer when it is configured with a whitespace pattern.
 * Behavior can be similar to WhitespaceAnalyzer (depending upon options)
 */
public void testWhitespacePattern() throws IOException {
  // Split on whitespace patterns, do not lowercase, no stopwords
  PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
      false, null);
  check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
      "The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc." });

  // Split on whitespace patterns, lowercase, english stopwords
  PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
      true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
  check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
      "quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc." });
}

开发者ID:europeana，项目名称:search，代码行数:17，代码来源:PatternAnalyzerTest.java

示例14: testCustomPattern

/**
 * Test PatternAnalyzer when it is configured with a custom pattern. In this
 * case, text is tokenized on the comma ","
 */
public void testCustomPattern() throws IOException {
  // Split on comma, do not lowercase, no stopwords
  PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), false, null);
  check(a, "Here,Are,some,Comma,separated,words,", new String[] { "Here",
      "Are", "some", "Comma", "separated", "words" });

  // split on comma, lowercase, english stopwords
  PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), true,
      StopAnalyzer.ENGLISH_STOP_WORDS_SET);
  check(b, "Here,Are,some,Comma,separated,words,", new String[] { "here",
      "some", "comma", "separated", "words" });
}

开发者ID:europeana，项目名称:search，代码行数:16，代码来源:PatternAnalyzerTest.java

示例15: inform

@Override
public void inform(ResourceLoader loader) throws IOException {
  String commonWordFiles = args.get("words");
  ignoreCase = getBoolean("ignoreCase", false);

  if (commonWordFiles != null) {
    if ("snowball".equalsIgnoreCase(args.get("format"))) {
      commonWords = getSnowballWordSet(loader, commonWordFiles, ignoreCase);
    } else {
      commonWords = getWordSet(loader, commonWordFiles, ignoreCase);
    }
  } else {
    commonWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
  }
}

开发者ID:pkarmstr，项目名称:NYBC，代码行数:15，代码来源:CommonGramsFilterFactory.java

注：本文中的org.apache.lucene.analysis.core.StopAnalyzer.ENGLISH_STOP_WORDS_SET属性示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。