本文整理汇总了Java中org.apache.lucene.analysis.MockTokenizer.WHITESPACE属性的典型用法代码示例。如果您正苦于以下问题:Java MockTokenizer.WHITESPACE属性的具体用法?Java MockTokenizer.WHITESPACE怎么用?Java MockTokenizer.WHITESPACE使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类org.apache.lucene.analysis.MockTokenizer
的用法示例。
在下文中一共展示了MockTokenizer.WHITESPACE属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testSimple
public void testSimple() throws IOException {
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(t, new UniqueTokenFilter(t));
}
};
TokenStream test = analyzer.tokenStream("test", "this test with test");
test.reset();
CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class);
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("this"));
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("test"));
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("with"));
assertThat(test.incrementToken(), equalTo(false));
}
示例2: testRandomStrings
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
for (int i = 0; i < 10; i++) {
final int min = TestUtil.nextInt(random(), 2, 10);
final int max = TestUtil.nextInt(random(), min, 20);
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer,
new NGramTokenFilter(tokenizer, min, max));
}
};
checkRandomData(random(), a, 200*RANDOM_MULTIPLIER, 20);
}
}
示例3: testWordComponentWithLessThanMinimumLength
public void testWordComponentWithLessThanMinimumLength() throws Exception {
CharArraySet dict = makeDictionary("abc", "d", "efg");
Tokenizer tokenizer = new MockTokenizer(new StringReader("abcdefg"), MockTokenizer.WHITESPACE, false);
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(
new WhitespaceTokenizer(
new StringReader(
"abcdefg")
),
dict,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
// since "d" is shorter than the minimum subword size, it should not be added to the token stream
assertTokenStreamContents(tf,
new String[] { "abcdefg", "abc", "efg" },
new int[] { 0, 0, 0},
new int[] { 7, 7, 7},
new int[] { 1, 0, 0}
);
}
示例4: testOutputHangsOffEnd
public void testOutputHangsOffEnd() throws Exception {
b = new SynonymMap.Builder(true);
final boolean keepOrig = false;
// b hangs off the end (no input token under it):
add("a", "a b", keepOrig);
tokensIn = new MockTokenizer(new StringReader("a"),
MockTokenizer.WHITESPACE,
true);
tokensIn.reset();
assertTrue(tokensIn.incrementToken());
assertFalse(tokensIn.incrementToken());
tokensIn.end();
tokensIn.close();
tokensOut = new SynonymFilter(tokensIn,
b.build(),
true);
termAtt = tokensOut.addAttribute(CharTermAttribute.class);
posIncrAtt = tokensOut.addAttribute(PositionIncrementAttribute.class);
offsetAtt = tokensOut.addAttribute(OffsetAttribute.class);
posLenAtt = tokensOut.addAttribute(PositionLengthAttribute.class);
// Make sure endOffset inherits from previous input token:
verify("a", "a b:1");
}
示例5: testFuzzySlopeExtendability
public void testFuzzySlopeExtendability() throws ParseException {
QueryParser qp = new QueryParser("a", new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
@Override
Query handleBareFuzzy(String qfield, Token fuzzySlop, String termImage)
throws ParseException {
if(fuzzySlop.image.endsWith("€")) {
float fms = fuzzyMinSim;
try {
fms = Float.valueOf(fuzzySlop.image.substring(1, fuzzySlop.image.length()-1)).floatValue();
} catch (Exception ignored) { }
float value = Float.parseFloat(termImage);
return getRangeQuery(qfield, Float.toString(value-fms/2.f), Float.toString(value+fms/2.f), true, true);
}
return super.handleBareFuzzy(qfield, fuzzySlop, termImage);
}
};
assertEquals(qp.parse("a:[11.95 TO 12.95]"), qp.parse("12.45~1€"));
}
示例6: testSimple
public void testSimple() throws IOException {
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(t, new TruncateTokenFilter(t, 3));
}
};
TokenStream test = analyzer.tokenStream("test", "a bb ccc dddd eeeee");
test.reset();
CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class);
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("a"));
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("bb"));
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("ccc"));
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("ddd"));
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("eee"));
assertThat(test.incrementToken(), equalTo(false));
}
示例7: testUsingPackagedWordNetReader
@Test
public void testUsingPackagedWordNetReader() throws IOException {
Map<String, String> args = new HashMap<>();
LemmatizerFilterFactory factory = new LemmatizerFilterFactory(args);
StringReader reader = new StringReader("it better works");
final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
in.setReader(reader);
TokenStream stream = factory.create(in);
assertTokenStreamContents(stream, new String[] { "it", "good", "work" });
}
示例8: testWithSamplePhrase
@Test
public void testWithSamplePhrase() throws IOException {
StringReader reader = new StringReader("it better works");
final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
in.setReader(reader);
TokenStream stream = new LemmatizerFilter(in, new WordNetLemmatizer(new PackagedWordNetReader("wordnet.zip"), new RTrie()));
assertTokenStreamContents(stream, new String[] { "it", "good", "work" });
}
示例9: testUsingPackagedWordNetReaderFromFilterFactory
@Test
public void testUsingPackagedWordNetReaderFromFilterFactory() throws IOException {
Map<String, String> args = new HashMap<>();
LemmatizerFilterFactory factory = new LemmatizerFilterFactory(args);
StringReader reader = new StringReader("it better works");
final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
in.setReader(reader);
TokenStream stream = factory.create(in);
assertTokenStreamContents(stream, new String[] { "it", "good", "work" });
}
示例10: testUsingDirectoryWordNetReaderWithDummyPathShouldFailSilently
@Test
public void testUsingDirectoryWordNetReaderWithDummyPathShouldFailSilently() throws IOException {
Map<String, String> args = new HashMap<>();
args.put("dictPath", "/tmp");
LemmatizerFilterFactory factory = new LemmatizerFilterFactory(args);
StringReader reader = new StringReader("it better works");
final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
in.setReader(reader);
TokenStream stream = factory.create(in);
assertTokenStreamContents(stream, new String[] { "it", "better", "works" });
}
示例11: testStemming
/**
* Ensure the filter actually stems text.
*/
public void testStemming() throws Exception {
Reader reader = new StringReader("dogs");
TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
stream = tokenFilterFactory("PorterStem").create(stream);
assertTokenStreamContents(stream, new String[] { "dog" });
}
示例12: testReplaceByEmpty
public void testReplaceByEmpty() throws Exception {
Reader reader = new StringReader("aa bb cc");
reader = charFilterFactory("PatternReplace",
"pattern", "(aa)\\s+(bb)\\s+(cc)").create(reader);
TokenStream ts = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[] {});
}
示例13: testStripFirst
public void testStripFirst() throws Exception {
String input = "aabfooaabfooabfoob ab caaaaaaaaab";
TokenStream ts = new PatternReplaceFilter
(new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false),
Pattern.compile("a*b"),
null, false);
assertTokenStreamContents(ts,
new String[] { "fooaabfooabfoob", "", "c" });
}
示例14: testCapitalization
public void testCapitalization() throws Exception {
Reader reader = new StringReader("kiTTEN");
TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
stream = tokenFilterFactory("Capitalization",
"keep", "and the it BIG",
"onlyFirstWord", "true").create(stream);
assertTokenStreamContents(stream, new String[] { "Kitten" });
}
示例15: testDefaults
/**
* If no words are provided, then a set of english default stopwords is used.
*/
public void testDefaults() throws Exception {
CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery");
CharArraySet words = factory.getCommonWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue(words.contains("the"));
Tokenizer tokenizer = new MockTokenizer(new StringReader("testing the factory"), MockTokenizer.WHITESPACE, false);
TokenStream stream = factory.create(tokenizer);
assertTokenStreamContents(stream,
new String[] { "testing_the", "the_factory" });
}