当前位置: 首页>>代码示例>>Java>>正文


Java MockTokenizer.setReader方法代码示例

本文整理汇总了Java中org.apache.lucene.analysis.MockTokenizer.setReader方法的典型用法代码示例。如果您正苦于以下问题:Java MockTokenizer.setReader方法的具体用法?Java MockTokenizer.setReader怎么用?Java MockTokenizer.setReader使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.analysis.MockTokenizer的用法示例。


在下文中一共展示了MockTokenizer.setReader方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testReset

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testReset() throws Exception {
  CharArraySet dict = makeDictionary("Rind", "Fleisch", "Draht", "Schere", "Gesetz",
      "Aufgabe", "Überwachung");

  MockTokenizer wsTokenizer = new MockTokenizer(new StringReader("Rindfleischüberwachungsgesetz"), MockTokenizer.WHITESPACE, false);
  wsTokenizer.setEnableChecks(false); // we will reset in a strange place
  wsTokenizer.setReader(new StringReader("Rindfleischüberwachungsgesetz"));
  DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(
      wsTokenizer, dict,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
  
  CharTermAttribute termAtt = tf.getAttribute(CharTermAttribute.class);
  tf.reset();
  assertTrue(tf.incrementToken());
  assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString());
  assertTrue(tf.incrementToken());
  assertEquals("Rind", termAtt.toString());
  tf.end();
  tf.close();
  wsTokenizer.setReader(new StringReader("Rindfleischüberwachungsgesetz"));
  tf.reset();
  assertTrue(tf.incrementToken());
  assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString());
}
 
开发者ID:europeana,项目名称:search,代码行数:27,代码来源:TestCompoundWordTokenFilter.java

示例2: create

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
/**
 * Test for {@link JdbcSynonymFilterFactory#create(TokenStream)}.
 */
@Test
public void create() throws Exception {
   Map<String, String> args = new HashMap<>();
   args.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, Version.LATEST.toString());
   args.put(JdbcReaderFactoryParams.DATASOURCE, "java:comp/env/dataSource");
   args.put(JdbcReaderFactoryParams.SQL, "select stopword from stopwords");

   // White space tokenizer, to lower case tokenizer.
   MockTokenizer tokenizer = new MockTokenizer();
   tokenizer.setReader(new StringReader("test1 somestring test2 anotherstring"));

   JdbcStopFilterFactory factory = new JdbcStopFilterFactory(args);
   factory.inform(new ClasspathResourceLoader(getClass().getClassLoader()));

   try (TokenStream stream = factory.create(tokenizer)) {
      CharTermAttribute attribute = stream.addAttribute(CharTermAttribute.class);
      stream.reset();
      assertTrue(stream.incrementToken());
      assertEquals("test1", attribute.toString());
      assertTrue(stream.incrementToken());
      assertEquals("test2", attribute.toString());
      assertFalse(stream.incrementToken());
      stream.end();
   }
}
 
开发者ID:shopping24,项目名称:solr-jdbc,代码行数:29,代码来源:JdbcStopFilterFactoryTest.java

示例3: testUsingPackagedWordNetReader

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
@Test
public void testUsingPackagedWordNetReader() throws IOException {
    Map<String, String> args = new HashMap<>();
    LemmatizerFilterFactory factory = new LemmatizerFilterFactory(args);
    StringReader reader = new StringReader("it better works");
    final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    in.setReader(reader);
    TokenStream stream = factory.create(in);
    assertTokenStreamContents(stream, new String[] { "it", "good", "work" });
}
 
开发者ID:nicholasding,项目名称:solr-lemmatizer,代码行数:11,代码来源:LemmatizerFilterFactoryTest.java

示例4: testWithSamplePhrase

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
@Test
public void testWithSamplePhrase() throws IOException {
    StringReader reader = new StringReader("it better works");
    final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    in.setReader(reader);
    TokenStream stream = new LemmatizerFilter(in, new WordNetLemmatizer(new PackagedWordNetReader("wordnet.zip"), new RTrie()));
    assertTokenStreamContents(stream, new String[] { "it", "good", "work" });
}
 
开发者ID:nicholasding,项目名称:solr-lemmatizer,代码行数:9,代码来源:LemmatizerFilterTest.java

示例5: testUsingPackagedWordNetReaderFromFilterFactory

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
@Test
public void testUsingPackagedWordNetReaderFromFilterFactory() throws IOException {
    Map<String, String> args = new HashMap<>();
    LemmatizerFilterFactory factory = new LemmatizerFilterFactory(args);

    StringReader reader = new StringReader("it better works");
    final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    in.setReader(reader);
    TokenStream stream = factory.create(in);
    assertTokenStreamContents(stream, new String[] { "it", "good", "work" });
}
 
开发者ID:nicholasding,项目名称:solr-lemmatizer,代码行数:12,代码来源:LemmatizerFilterTest.java

示例6: testUsingDirectoryWordNetReaderWithDummyPathShouldFailSilently

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
@Test
public void testUsingDirectoryWordNetReaderWithDummyPathShouldFailSilently() throws IOException {
    Map<String, String> args = new HashMap<>();
    args.put("dictPath", "/tmp");
    LemmatizerFilterFactory factory = new LemmatizerFilterFactory(args);

    StringReader reader = new StringReader("it better works");
    final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    in.setReader(reader);
    TokenStream stream = factory.create(in);
    assertTokenStreamContents(stream, new String[] { "it", "better", "works" });
}
 
开发者ID:nicholasding,项目名称:solr-lemmatizer,代码行数:13,代码来源:LemmatizerFilterTest.java

示例7: testPrefix

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testPrefix() throws IOException {
    StringReader reader = new StringReader("test_and_of_for_the");

    final MockTokenizer in = new MockTokenizer(MockTokenizer.KEYWORD, false);
    in.setReader(reader);

    TokenStream stream = new ShinglesStopFilter(in, stopwords, "_");
    assertTokenStreamContents(stream, new String[]{"test"});
}
 
开发者ID:spyk,项目名称:shingle-stop-filter,代码行数:10,代码来源:ShingleStopFilterTest.java

示例8: testStopAtSuffix

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testStopAtSuffix() throws IOException {
    StringReader reader = new StringReader("the_test_and_of_trend_for_the");

    final MockTokenizer in = new MockTokenizer(MockTokenizer.KEYWORD, false);
    in.setReader(reader);

    TokenStream stream = new ShinglesStopFilter(in, stopwords, "_");
    assertTokenStreamContents(stream, new String[]{"the_test_and_of_trend"});
}
 
开发者ID:spyk,项目名称:shingle-stop-filter,代码行数:10,代码来源:ShingleStopFilterTest.java

示例9: createTokeStream

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
private TokenStream createTokeStream(final String text, String replacement) throws IOException {
    Reader cs = new ProlongedSoundMarkCharFilter(new StringReader(text),
            replacement.charAt(0));
    MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    tokenizer.setReader(cs);
    return tokenizer;
}
 
开发者ID:codelibs,项目名称:analyzers-ja,代码行数:8,代码来源:ProlongedSoundMarkCharFilterTest.java

示例10: before

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
@Before
public void before() throws IOException {
	Map<String, String> params = new HashMap<String, String>();
	params.put("minGramSize", "1");
	params.put("maxGramSize", "20");

	MockTokenizer tokenizer = new MockTokenizer();
	tokenizer.setReader(new StringReader("zhongguoren shixi xuexi"));
	this.filter = (PinyinNGramTokenFilter) new PinyinNGramTokenFilterFactory(params).create(tokenizer);
}
 
开发者ID:liangbaolin,项目名称:pinyinAnalyzer,代码行数:11,代码来源:TestPinyinNGramTokenFilter.java

示例11: before

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
@Before
public void before() throws IOException {
	Map<String, String> params = new HashMap<String, String>();
	
	MockTokenizer tokenizer = new MockTokenizer();
	tokenizer.setReader(new StringReader("和平 重量 and 中国"))  ;
	this.filter = (PinyinTransformTokenFilter) new PinyinTransformTokenFilterFactory(params).create(tokenizer);
}
 
开发者ID:liangbaolin,项目名称:pinyinAnalyzer,代码行数:9,代码来源:TestPinyinTransformTokenFilter.java

示例12: create

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
/**
 * Test for {@link JdbcSynonymFilterFactory#create(TokenStream)}.
 */
@Test
public void create() throws Exception {
   Map<String, String> args = new HashMap<>();
   args.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, Version.LATEST.toString());
   args.put(JdbcReaderFactoryParams.DATASOURCE, "java:comp/env/dataSource");
   args.put(JdbcReaderFactoryParams.SQL, "select synonyms from synonyms");

   // White space tokenizer, to lower case tokenizer.
   MockTokenizer tokenizer = new MockTokenizer();
   tokenizer.setReader(new StringReader("test1 test2"));

   JdbcSynonymFilterFactory factory = new JdbcSynonymFilterFactory(args);
   factory.inform(new ClasspathResourceLoader(getClass().getClassLoader()));

   try (TokenStream stream = factory.create(tokenizer)) {
      CharTermAttribute attribute = stream.addAttribute(CharTermAttribute.class);
      stream.reset();
      assertTrue(stream.incrementToken());
      assertEquals("testA", attribute.toString());
      assertTrue(stream.incrementToken());
      assertEquals("testB", attribute.toString());
      assertTrue(stream.incrementToken());
      assertEquals("testC", attribute.toString());
      assertTrue(stream.incrementToken());
      assertEquals("testD", attribute.toString());
      assertFalse(stream.incrementToken());
      stream.end();
   }
}
 
开发者ID:shopping24,项目名称:solr-jdbc,代码行数:33,代码来源:JdbcSynonymFilterFactoryTest.java

示例13: createTokeStream

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
private TokenStream createTokeStream(final String text) throws IOException {
    Reader cs = new IterationMarkCharFilter(new StringReader(text));
    MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    tokenizer.setReader(cs);
    return tokenizer;
}
 
开发者ID:codelibs,项目名称:analyzers-ja,代码行数:7,代码来源:IterationMarkCharFilterTest.java


注:本文中的org.apache.lucene.analysis.MockTokenizer.setReader方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。