本文整理汇总了Java中org.apache.lucene.analysis.core.WhitespaceTokenizer.setReader方法的典型用法代码示例。如果您正苦于以下问题:Java WhitespaceTokenizer.setReader方法的具体用法?Java WhitespaceTokenizer.setReader怎么用?Java WhitespaceTokenizer.setReader使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.analysis.core.WhitespaceTokenizer
的用法示例。
在下文中一共展示了WhitespaceTokenizer.setReader方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testReset
import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
public void testReset() throws Exception {
final String input = "How the s a brown s cow d like A B thing?";
WhitespaceTokenizer wt = new WhitespaceTokenizer(new StringReader(input));
CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class);
cgf.reset();
assertTrue(cgf.incrementToken());
assertEquals("How", term.toString());
assertTrue(cgf.incrementToken());
assertEquals("How_the", term.toString());
assertTrue(cgf.incrementToken());
assertEquals("the", term.toString());
assertTrue(cgf.incrementToken());
assertEquals("the_s", term.toString());
cgf.close();
wt.setReader(new StringReader(input));
cgf.reset();
assertTrue(cgf.incrementToken());
assertEquals("How", term.toString());
}
示例2: testQueryReset
import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
public void testQueryReset() throws Exception {
final String input = "How the s a brown s cow d like A B thing?";
WhitespaceTokenizer wt = new WhitespaceTokenizer(new StringReader(input));
CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
CharTermAttribute term = wt.addAttribute(CharTermAttribute.class);
nsf.reset();
assertTrue(nsf.incrementToken());
assertEquals("How_the", term.toString());
assertTrue(nsf.incrementToken());
assertEquals("the_s", term.toString());
nsf.close();
wt.setReader(new StringReader(input));
nsf.reset();
assertTrue(nsf.incrementToken());
assertEquals("How_the", term.toString());
}
示例3: countTokensInText
import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
private int countTokensInText(String text) {
WhitespaceTokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(text));
int tokens = 0;
try {
tokenizer.reset();
while (tokenizer.incrementToken()) {
++tokens;
}
} catch (Exception e) {
LOGGER.error("Error while tokenizing text. Returning.", e);
} finally {
IOUtils.closeQuietly(tokenizer);
}
return tokens;
}
示例4: testReset
import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
public void testReset() throws Exception {
final String input = "How the s a brown s cow d like A B thing?";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class);
cgf.reset();
assertTrue(cgf.incrementToken());
assertEquals("How", term.toString());
assertTrue(cgf.incrementToken());
assertEquals("How_the", term.toString());
assertTrue(cgf.incrementToken());
assertEquals("the", term.toString());
assertTrue(cgf.incrementToken());
assertEquals("the_s", term.toString());
wt.setReader(new StringReader(input));
cgf.reset();
assertTrue(cgf.incrementToken());
assertEquals("How", term.toString());
}
示例5: testQueryReset
import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
public void testQueryReset() throws Exception {
final String input = "How the s a brown s cow d like A B thing?";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
CharTermAttribute term = wt.addAttribute(CharTermAttribute.class);
nsf.reset();
assertTrue(nsf.incrementToken());
assertEquals("How_the", term.toString());
assertTrue(nsf.incrementToken());
assertEquals("the_s", term.toString());
wt.setReader(new StringReader(input));
nsf.reset();
assertTrue(nsf.incrementToken());
assertEquals("How_the", term.toString());
}
示例6: testOverlappingAtBeginning
import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
@Test
public void testOverlappingAtBeginning() throws Exception {
final CharArraySet phraseSets = new CharArraySet(Arrays.asList(
"new york", "new york city", "city of new york"), false);
final String input = "new york city is great";
StringReader reader = new StringReader(input);
final WhitespaceTokenizer in = new WhitespaceTokenizer();
in.setReader(reader);
AutoPhrasingTokenFilter aptf = new AutoPhrasingTokenFilter(in, phraseSets, false);
aptf.setReplaceWhitespaceWith('_');
CharTermAttribute term = aptf.addAttribute(CharTermAttribute.class);
aptf.reset();
assertTrue(aptf.incrementToken());
assertEquals("new_york_city", term.toString());
assertTrue(aptf.incrementToken());
assertEquals("is", term.toString());
assertTrue(aptf.incrementToken());
assertEquals("great", term.toString());
}
示例7: testOverlappingAtEnd
import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
@Test
public void testOverlappingAtEnd() throws Exception {
final CharArraySet phraseSets = new CharArraySet(Arrays.asList(
"new york", "new york city", "city of new york"), false);
final String input = "the great city of new york";
StringReader reader = new StringReader(input);
final WhitespaceTokenizer in = new WhitespaceTokenizer();
in.setReader(reader);
AutoPhrasingTokenFilter aptf = new AutoPhrasingTokenFilter(in, phraseSets, false);
aptf.setReplaceWhitespaceWith('_');
CharTermAttribute term = aptf.addAttribute(CharTermAttribute.class);
aptf.reset();
assertTrue(aptf.incrementToken());
assertEquals("the", term.toString());
assertTrue(aptf.incrementToken());
assertEquals("great", term.toString());
assertTrue(aptf.incrementToken());
assertEquals("city_of_new_york", term.toString());
}
示例8: testIncompletePhrase
import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
@Test
public void testIncompletePhrase() throws Exception {
final CharArraySet phraseSets = new CharArraySet(Arrays.asList(
"big apple", "new york city", "property tax", "three word phrase"), false);
final String input = "some new york";
StringReader reader = new StringReader(input);
final WhitespaceTokenizer in = new WhitespaceTokenizer();
in.setReader(reader);
AutoPhrasingTokenFilter aptf = new AutoPhrasingTokenFilter(in, phraseSets, false);
aptf.setReplaceWhitespaceWith('_');
CharTermAttribute term = aptf.addAttribute(CharTermAttribute.class);
aptf.reset();
assertTrue(aptf.incrementToken());
assertEquals("some", term.toString());
assertTrue(aptf.incrementToken());
assertEquals("new", term.toString());
assertTrue(aptf.incrementToken());
assertEquals("york", term.toString());
}
示例9: testTypical
import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
public void testTypical() throws IOException {
String NYC = "new york city";
WhitespaceTokenizer stream = new WhitespaceTokenizer();
stream.setReader(new StringReader(NYC));
ConcatenateFilter filter = new ConcatenateFilter(stream);
try {
assertTokenStreamContents(filter, new String[]{NYC},
new int[]{0}, new int[]{NYC.length()}, new String[]{"shingle"},
new int[]{1}, null, NYC.length(), true);
} catch (AssertionError e) {
//assertTokenStreamContents tries to test if tokenStream.end() was implemented correctly.
// It's manner of checking this is imperfect and incompatible with
// ConcatenateFilter. Specifically it modifies a special attribute *after* incrementToken(),
// which is weird. To the best of my ability, end() appears to be implemented correctly.
if (!e.getMessage().equals("super.end()/clearAttributes() was not called correctly in end()"))
throw e;
}
}
示例10: testReset
import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
public void testReset() throws Exception {
final String input = "How the s a brown s cow d like A B thing?";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class);
cgf.reset();
assertTrue(cgf.incrementToken());
assertEquals("How", term.toString());
assertTrue(cgf.incrementToken());
assertEquals("How_the", term.toString());
assertTrue(cgf.incrementToken());
assertEquals("the", term.toString());
assertTrue(cgf.incrementToken());
assertEquals("the_s", term.toString());
cgf.close();
wt.setReader(new StringReader(input));
cgf.reset();
assertTrue(cgf.incrementToken());
assertEquals("How", term.toString());
}
示例11: testQueryReset
import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
public void testQueryReset() throws Exception {
final String input = "How the s a brown s cow d like A B thing?";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
CharTermAttribute term = wt.addAttribute(CharTermAttribute.class);
nsf.reset();
assertTrue(nsf.incrementToken());
assertEquals("How_the", term.toString());
assertTrue(nsf.incrementToken());
assertEquals("the_s", term.toString());
nsf.close();
wt.setReader(new StringReader(input));
nsf.reset();
assertTrue(nsf.incrementToken());
assertEquals("How_the", term.toString());
}
示例12: testReset
import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
public void testReset() throws Exception {
WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(new StringReader("abcde"));
EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(tokenizer, 1, 3);
assertTokenStreamContents(filter, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{5,5,5});
tokenizer.setReader(new StringReader("abcde"));
assertTokenStreamContents(filter, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{5,5,5});
}
示例13: testReset
import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
public void testReset() throws Exception {
WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(new StringReader("abcde"));
NGramTokenFilter filter = new NGramTokenFilter(tokenizer, 1, 1);
assertTokenStreamContents(filter, new String[]{"a","b","c","d","e"}, new int[]{0,0,0,0,0}, new int[]{5,5,5,5,5}, new int[]{1,0,0,0,0});
tokenizer.setReader(new StringReader("abcde"));
assertTokenStreamContents(filter, new String[]{"a","b","c","d","e"}, new int[]{0,0,0,0,0}, new int[]{5,5,5,5,5}, new int[]{1,0,0,0,0});
}
示例14: testReset
import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
public void testReset() throws Exception {
WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(tokenizer, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
assertTokenStreamContents(filter, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3});
tokenizer.setReader(new StringReader("abcde"));
assertTokenStreamContents(filter, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3});
}
示例15: testReset
import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
public void testReset() throws Exception {
WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
NGramTokenFilter filter = new NGramTokenFilter(tokenizer, 1, 1);
assertTokenStreamContents(filter, new String[]{"a","b","c","d","e"}, new int[]{0,1,2,3,4}, new int[]{1,2,3,4,5});
tokenizer.setReader(new StringReader("abcde"));
assertTokenStreamContents(filter, new String[]{"a","b","c","d","e"}, new int[]{0,1,2,3,4}, new int[]{1,2,3,4,5});
}