本文整理汇总了Java中org.apache.lucene.analysis.MockTokenizer.setReader方法的典型用法代码示例。如果您正苦于以下问题:Java MockTokenizer.setReader方法的具体用法?Java MockTokenizer.setReader怎么用?Java MockTokenizer.setReader使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.analysis.MockTokenizer
的用法示例。
在下文中一共展示了MockTokenizer.setReader方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testReset
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testReset() throws Exception {
CharArraySet dict = makeDictionary("Rind", "Fleisch", "Draht", "Schere", "Gesetz",
"Aufgabe", "Überwachung");
MockTokenizer wsTokenizer = new MockTokenizer(new StringReader("Rindfleischüberwachungsgesetz"), MockTokenizer.WHITESPACE, false);
wsTokenizer.setEnableChecks(false); // we will reset in a strange place
wsTokenizer.setReader(new StringReader("Rindfleischüberwachungsgesetz"));
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(
wsTokenizer, dict,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
CharTermAttribute termAtt = tf.getAttribute(CharTermAttribute.class);
tf.reset();
assertTrue(tf.incrementToken());
assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString());
assertTrue(tf.incrementToken());
assertEquals("Rind", termAtt.toString());
tf.end();
tf.close();
wsTokenizer.setReader(new StringReader("Rindfleischüberwachungsgesetz"));
tf.reset();
assertTrue(tf.incrementToken());
assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString());
}
示例2: create
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
/**
* Test for {@link JdbcSynonymFilterFactory#create(TokenStream)}.
*/
@Test
public void create() throws Exception {
Map<String, String> args = new HashMap<>();
args.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, Version.LATEST.toString());
args.put(JdbcReaderFactoryParams.DATASOURCE, "java:comp/env/dataSource");
args.put(JdbcReaderFactoryParams.SQL, "select stopword from stopwords");
// White space tokenizer, to lower case tokenizer.
MockTokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("test1 somestring test2 anotherstring"));
JdbcStopFilterFactory factory = new JdbcStopFilterFactory(args);
factory.inform(new ClasspathResourceLoader(getClass().getClassLoader()));
try (TokenStream stream = factory.create(tokenizer)) {
CharTermAttribute attribute = stream.addAttribute(CharTermAttribute.class);
stream.reset();
assertTrue(stream.incrementToken());
assertEquals("test1", attribute.toString());
assertTrue(stream.incrementToken());
assertEquals("test2", attribute.toString());
assertFalse(stream.incrementToken());
stream.end();
}
}
示例3: testUsingPackagedWordNetReader
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
@Test
public void testUsingPackagedWordNetReader() throws IOException {
Map<String, String> args = new HashMap<>();
LemmatizerFilterFactory factory = new LemmatizerFilterFactory(args);
StringReader reader = new StringReader("it better works");
final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
in.setReader(reader);
TokenStream stream = factory.create(in);
assertTokenStreamContents(stream, new String[] { "it", "good", "work" });
}
示例4: testWithSamplePhrase
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
@Test
public void testWithSamplePhrase() throws IOException {
StringReader reader = new StringReader("it better works");
final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
in.setReader(reader);
TokenStream stream = new LemmatizerFilter(in, new WordNetLemmatizer(new PackagedWordNetReader("wordnet.zip"), new RTrie()));
assertTokenStreamContents(stream, new String[] { "it", "good", "work" });
}
示例5: testUsingPackagedWordNetReaderFromFilterFactory
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
@Test
public void testUsingPackagedWordNetReaderFromFilterFactory() throws IOException {
Map<String, String> args = new HashMap<>();
LemmatizerFilterFactory factory = new LemmatizerFilterFactory(args);
StringReader reader = new StringReader("it better works");
final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
in.setReader(reader);
TokenStream stream = factory.create(in);
assertTokenStreamContents(stream, new String[] { "it", "good", "work" });
}
示例6: testUsingDirectoryWordNetReaderWithDummyPathShouldFailSilently
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
@Test
public void testUsingDirectoryWordNetReaderWithDummyPathShouldFailSilently() throws IOException {
Map<String, String> args = new HashMap<>();
args.put("dictPath", "/tmp");
LemmatizerFilterFactory factory = new LemmatizerFilterFactory(args);
StringReader reader = new StringReader("it better works");
final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
in.setReader(reader);
TokenStream stream = factory.create(in);
assertTokenStreamContents(stream, new String[] { "it", "better", "works" });
}
示例7: testPrefix
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testPrefix() throws IOException {
StringReader reader = new StringReader("test_and_of_for_the");
final MockTokenizer in = new MockTokenizer(MockTokenizer.KEYWORD, false);
in.setReader(reader);
TokenStream stream = new ShinglesStopFilter(in, stopwords, "_");
assertTokenStreamContents(stream, new String[]{"test"});
}
示例8: testStopAtSuffix
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testStopAtSuffix() throws IOException {
StringReader reader = new StringReader("the_test_and_of_trend_for_the");
final MockTokenizer in = new MockTokenizer(MockTokenizer.KEYWORD, false);
in.setReader(reader);
TokenStream stream = new ShinglesStopFilter(in, stopwords, "_");
assertTokenStreamContents(stream, new String[]{"the_test_and_of_trend"});
}
示例9: createTokeStream
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
private TokenStream createTokeStream(final String text, String replacement) throws IOException {
Reader cs = new ProlongedSoundMarkCharFilter(new StringReader(text),
replacement.charAt(0));
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
tokenizer.setReader(cs);
return tokenizer;
}
示例10: before
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
@Before
public void before() throws IOException {
Map<String, String> params = new HashMap<String, String>();
params.put("minGramSize", "1");
params.put("maxGramSize", "20");
MockTokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("zhongguoren shixi xuexi"));
this.filter = (PinyinNGramTokenFilter) new PinyinNGramTokenFilterFactory(params).create(tokenizer);
}
示例11: before
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
@Before
public void before() throws IOException {
Map<String, String> params = new HashMap<String, String>();
MockTokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("和平 重量 and 中国")) ;
this.filter = (PinyinTransformTokenFilter) new PinyinTransformTokenFilterFactory(params).create(tokenizer);
}
示例12: create
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
/**
* Test for {@link JdbcSynonymFilterFactory#create(TokenStream)}.
*/
@Test
public void create() throws Exception {
Map<String, String> args = new HashMap<>();
args.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, Version.LATEST.toString());
args.put(JdbcReaderFactoryParams.DATASOURCE, "java:comp/env/dataSource");
args.put(JdbcReaderFactoryParams.SQL, "select synonyms from synonyms");
// White space tokenizer, to lower case tokenizer.
MockTokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("test1 test2"));
JdbcSynonymFilterFactory factory = new JdbcSynonymFilterFactory(args);
factory.inform(new ClasspathResourceLoader(getClass().getClassLoader()));
try (TokenStream stream = factory.create(tokenizer)) {
CharTermAttribute attribute = stream.addAttribute(CharTermAttribute.class);
stream.reset();
assertTrue(stream.incrementToken());
assertEquals("testA", attribute.toString());
assertTrue(stream.incrementToken());
assertEquals("testB", attribute.toString());
assertTrue(stream.incrementToken());
assertEquals("testC", attribute.toString());
assertTrue(stream.incrementToken());
assertEquals("testD", attribute.toString());
assertFalse(stream.incrementToken());
stream.end();
}
}
示例13: createTokeStream
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
private TokenStream createTokeStream(final String text) throws IOException {
Reader cs = new IterationMarkCharFilter(new StringReader(text));
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
tokenizer.setReader(cs);
return tokenizer;
}