本文整理汇总了Java中org.apache.lucene.analysis.core.StopAnalyzer.ENGLISH_STOP_WORDS_SET属性的典型用法代码示例。如果您正苦于以下问题:Java StopAnalyzer.ENGLISH_STOP_WORDS_SET属性的具体用法?Java StopAnalyzer.ENGLISH_STOP_WORDS_SET怎么用?Java StopAnalyzer.ENGLISH_STOP_WORDS_SET使用的例子?那么恭喜您, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类org.apache.lucene.analysis.core.StopAnalyzer
的用法示例。
在下文中一共展示了StopAnalyzer.ENGLISH_STOP_WORDS_SET属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: StandardAnalyzerProvider
public StandardAnalyzerProvider(Index index, Settings indexSettings, Environment env, String name, Settings settings) {
super(index, indexSettings, name, settings);
this.esVersion = Version.indexCreated(indexSettings);
final CharArraySet defaultStopwords;
if (esVersion.onOrAfter(Version.V_1_0_0_Beta1)) {
defaultStopwords = CharArraySet.EMPTY_SET;
} else {
defaultStopwords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
}
CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
int maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
standardAnalyzer = new StandardAnalyzer(stopWords);
standardAnalyzer.setVersion(version);
standardAnalyzer.setMaxTokenLength(maxTokenLength);
}
示例2: PatternAnalyzerProvider
@Inject
public PatternAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettingsService.getSettings(), name, settings);
Version esVersion = Version.indexCreated(indexSettingsService.getSettings());
final CharArraySet defaultStopwords;
if (esVersion.onOrAfter(Version.V_1_0_0_RC1)) {
defaultStopwords = CharArraySet.EMPTY_SET;
} else {
defaultStopwords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
}
boolean lowercase = settings.getAsBoolean("lowercase", true);
CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
String sPattern = settings.get("pattern", "\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/);
if (sPattern == null) {
throw new IllegalArgumentException("Analyzer [" + name + "] of type pattern must have a `pattern` set");
}
Pattern pattern = Regex.compile(sPattern, settings.get("flags"));
analyzer = new PatternAnalyzer(pattern, lowercase, stopWords);
}
示例3: testRandomStrings
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
Analyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
// dodge jre bug http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7104012
final UncaughtExceptionHandler savedHandler = Thread.getDefaultUncaughtExceptionHandler();
Thread.setDefaultUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
@Override
public void uncaughtException(Thread thread, Throwable throwable) {
assumeTrue("not failing due to jre bug ", !isJREBug7104012(throwable));
// otherwise its some other bug, pass to default handler
savedHandler.uncaughtException(thread, throwable);
}
});
try {
Thread.getDefaultUncaughtExceptionHandler();
checkRandomData(random(), a, 10000*RANDOM_MULTIPLIER);
} catch (ArrayIndexOutOfBoundsException ex) {
assumeTrue("not failing due to jre bug ", !isJREBug7104012(ex));
throw ex; // otherwise rethrow
} finally {
Thread.setDefaultUncaughtExceptionHandler(savedHandler);
}
}
示例4: inform
@Override
public void inform(ResourceLoader loader) throws IOException {
String stopWordFiles = args.get("words");
ignoreCase = getBoolean("ignoreCase",false);
enablePositionIncrements = getBoolean("enablePositionIncrements",false);
if (stopWordFiles != null) {
if ("snowball".equalsIgnoreCase(args.get("format"))) {
stopWords = getSnowballWordSet(loader, stopWordFiles, ignoreCase);
} else {
stopWords = getWordSet(loader, stopWordFiles, ignoreCase);
}
} else {
stopWords = new CharArraySet(luceneMatchVersion, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
}
}
示例5: main
public static void main(String[] args) throws IOException {
String theSentence =
"this is the scientific article about chemicals like H20 C2H50H with concentration "
+ "of 3.99 kilograms and 0,123 micrograms also i have some CO2 gas n=3 x=45";
StringReader reader = new StringReader(theSentence);
Tokenizer whitespaceTokenizer = new WhitespaceTokenizer(reader);
TokenStream tokenStream =
new StopFilter(whitespaceTokenizer, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
tokenStream = new ScientificFiltering(tokenStream);
final CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
tokenStream.reset();
while (tokenStream.incrementToken()) {
System.out.println(charTermAttribute.toString());
}
tokenStream.end();
tokenStream.close();
}
开发者ID:MysterionRise,项目名称:information-retrieval-adventure,代码行数:21,代码来源:SkippingNumbersPreservingChemicals.java
示例6: testNonWordPattern
/**
* Test PatternAnalyzer when it is configured with a non-word pattern.
*/
public void testNonWordPattern() throws IOException {
// Split on non-letter pattern, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(Pattern.compile("\\W+"), false, null);
assertAnalyzesTo(a, "The quick brown Fox,the abcd1234 (56.78) dc.",
new String[] { "The", "quick", "brown", "Fox", "the", "abcd1234", "56", "78", "dc" });
// split on non-letter pattern, lowercase, english stopwords
PatternAnalyzer b = new PatternAnalyzer(Pattern.compile("\\W+"), true,
StopAnalyzer.ENGLISH_STOP_WORDS_SET);
assertAnalyzesTo(b, "The quick brown Fox,the abcd1234 (56.78) dc.",
new String[] { "quick", "brown", "fox", "abcd1234", "56", "78", "dc" });
}
示例7: testWhitespacePattern
/**
* Test PatternAnalyzer when it is configured with a whitespace pattern.
* Behavior can be similar to WhitespaceAnalyzer (depending upon options)
*/
public void testWhitespacePattern() throws IOException {
// Split on whitespace patterns, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(Pattern.compile("\\s+"), false, null);
assertAnalyzesTo(a, "The quick brown Fox,the abcd1234 (56.78) dc.",
new String[] { "The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc." });
// Split on whitespace patterns, lowercase, english stopwords
PatternAnalyzer b = new PatternAnalyzer(Pattern.compile("\\s+"), true,
StopAnalyzer.ENGLISH_STOP_WORDS_SET);
assertAnalyzesTo(b, "The quick brown Fox,the abcd1234 (56.78) dc.",
new String[] { "quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc." });
}
示例8: testCustomPattern
/**
* Test PatternAnalyzer when it is configured with a custom pattern. In this
* case, text is tokenized on the comma ","
*/
public void testCustomPattern() throws IOException {
// Split on comma, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(Pattern.compile(","), false, null);
assertAnalyzesTo(a, "Here,Are,some,Comma,separated,words,",
new String[] { "Here", "Are", "some", "Comma", "separated", "words" });
// split on comma, lowercase, english stopwords
PatternAnalyzer b = new PatternAnalyzer(Pattern.compile(","), true,
StopAnalyzer.ENGLISH_STOP_WORDS_SET);
assertAnalyzesTo(b, "Here,Are,some,Comma,separated,words,",
new String[] { "here", "some", "comma", "separated", "words" });
}
示例9: inform
@Override
public void inform(ResourceLoader loader) throws IOException {
if (commonWordFiles != null) {
if ("snowball".equalsIgnoreCase(format)) {
commonWords = getSnowballWordSet(loader, commonWordFiles, ignoreCase);
} else {
commonWords = getWordSet(loader, commonWordFiles, ignoreCase);
}
} else {
commonWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
}
}
示例10: StandardHtmlStripAnalyzerProvider
@Inject
public StandardHtmlStripAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettingsService.getSettings(), name, settings);
this.esVersion = Version.indexCreated(indexSettingsService.getSettings());
final CharArraySet defaultStopwords;
if (esVersion.onOrAfter(Version.V_1_0_0_RC1)) {
defaultStopwords = CharArraySet.EMPTY_SET;
} else {
defaultStopwords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
}
CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
analyzer = new StandardHtmlStripAnalyzer(stopWords);
analyzer.setVersion(version);
}
示例11: testPositionIncrements
public void testPositionIncrements() throws Exception {
final ThaiAnalyzer analyzer = new ThaiAnalyzer(StopAnalyzer.ENGLISH_STOP_WORDS_SET);
assertAnalyzesTo(analyzer, "การที่ได้ต้อง the แสดงว่างานดี",
new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
new int[] { 0, 3, 6, 9, 18, 22, 25, 28 },
new int[] { 3, 6, 9, 13, 22, 25, 28, 30 },
new int[] { 1, 1, 1, 1, 2, 1, 1, 1 });
// case that a stopword is adjacent to thai text, with no whitespace
assertAnalyzesTo(analyzer, "การที่ได้ต้องthe แสดงว่างานดี",
new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
new int[] { 0, 3, 6, 9, 17, 21, 24, 27 },
new int[] { 3, 6, 9, 13, 21, 24, 27, 29 },
new int[] { 1, 1, 1, 1, 2, 1, 1, 1 });
}
示例12: testNonWordPattern
/**
* Test PatternAnalyzer when it is configured with a non-word pattern.
* Behavior can be similar to SimpleAnalyzer (depending upon options)
*/
public void testNonWordPattern() throws IOException {
// Split on non-letter pattern, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
false, null);
check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
"The", "quick", "brown", "Fox", "the", "abcd", "dc" });
// split on non-letter pattern, lowercase, english stopwords
PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
"quick", "brown", "fox", "abcd", "dc" });
}
示例13: testWhitespacePattern
/**
* Test PatternAnalyzer when it is configured with a whitespace pattern.
* Behavior can be similar to WhitespaceAnalyzer (depending upon options)
*/
public void testWhitespacePattern() throws IOException {
// Split on whitespace patterns, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
false, null);
check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
"The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc." });
// Split on whitespace patterns, lowercase, english stopwords
PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
"quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc." });
}
示例14: testCustomPattern
/**
* Test PatternAnalyzer when it is configured with a custom pattern. In this
* case, text is tokenized on the comma ","
*/
public void testCustomPattern() throws IOException {
// Split on comma, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), false, null);
check(a, "Here,Are,some,Comma,separated,words,", new String[] { "Here",
"Are", "some", "Comma", "separated", "words" });
// split on comma, lowercase, english stopwords
PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), true,
StopAnalyzer.ENGLISH_STOP_WORDS_SET);
check(b, "Here,Are,some,Comma,separated,words,", new String[] { "here",
"some", "comma", "separated", "words" });
}
示例15: inform
@Override
public void inform(ResourceLoader loader) throws IOException {
String commonWordFiles = args.get("words");
ignoreCase = getBoolean("ignoreCase", false);
if (commonWordFiles != null) {
if ("snowball".equalsIgnoreCase(args.get("format"))) {
commonWords = getSnowballWordSet(loader, commonWordFiles, ignoreCase);
} else {
commonWords = getWordSet(loader, commonWordFiles, ignoreCase);
}
} else {
commonWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
}
}