当前位置: 首页>>代码示例>>Java>>正文


Java WhitespaceTokenizer.setReader方法代码示例

本文整理汇总了Java中org.apache.lucene.analysis.core.WhitespaceTokenizer.setReader方法的典型用法代码示例。如果您正苦于以下问题:Java WhitespaceTokenizer.setReader方法的具体用法?Java WhitespaceTokenizer.setReader怎么用?Java WhitespaceTokenizer.setReader使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.analysis.core.WhitespaceTokenizer的用法示例。


在下文中一共展示了WhitespaceTokenizer.setReader方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testReset

import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
public void testReset() throws Exception {
  final String input = "How the s a brown s cow d like A B thing?";
  WhitespaceTokenizer wt = new WhitespaceTokenizer(new StringReader(input));
  CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
  
  CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class);
  cgf.reset();
  assertTrue(cgf.incrementToken());
  assertEquals("How", term.toString());
  assertTrue(cgf.incrementToken());
  assertEquals("How_the", term.toString());
  assertTrue(cgf.incrementToken());
  assertEquals("the", term.toString());
  assertTrue(cgf.incrementToken());
  assertEquals("the_s", term.toString());
  cgf.close();
  
  wt.setReader(new StringReader(input));
  cgf.reset();
  assertTrue(cgf.incrementToken());
  assertEquals("How", term.toString());
}
 
开发者ID:europeana,项目名称:search,代码行数:23,代码来源:CommonGramsFilterTest.java

示例2: testQueryReset

import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
public void testQueryReset() throws Exception {
  final String input = "How the s a brown s cow d like A B thing?";
  WhitespaceTokenizer wt = new WhitespaceTokenizer(new StringReader(input));
  CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
  CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
  
  CharTermAttribute term = wt.addAttribute(CharTermAttribute.class);
  nsf.reset();
  assertTrue(nsf.incrementToken());
  assertEquals("How_the", term.toString());
  assertTrue(nsf.incrementToken());
  assertEquals("the_s", term.toString());
  nsf.close();
  
  wt.setReader(new StringReader(input));
  nsf.reset();
  assertTrue(nsf.incrementToken());
  assertEquals("How_the", term.toString());
}
 
开发者ID:europeana,项目名称:search,代码行数:20,代码来源:CommonGramsFilterTest.java

示例3: countTokensInText

import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
private int countTokensInText(String text) {
    WhitespaceTokenizer tokenizer = new WhitespaceTokenizer();
    tokenizer.setReader(new StringReader(text));
    int tokens = 0;
    try {
        tokenizer.reset();
        while (tokenizer.incrementToken()) {
            ++tokens;
        }
    } catch (Exception e) {
        LOGGER.error("Error while tokenizing text. Returning.", e);
    } finally {
        IOUtils.closeQuietly(tokenizer);
    }
    return tokens;
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:17,代码来源:DatasetAnalyzer.java

示例4: testReset

import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
public void testReset() throws Exception {
  final String input = "How the s a brown s cow d like A B thing?";
  WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
  CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
  
  CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class);
  cgf.reset();
  assertTrue(cgf.incrementToken());
  assertEquals("How", term.toString());
  assertTrue(cgf.incrementToken());
  assertEquals("How_the", term.toString());
  assertTrue(cgf.incrementToken());
  assertEquals("the", term.toString());
  assertTrue(cgf.incrementToken());
  assertEquals("the_s", term.toString());
  
  wt.setReader(new StringReader(input));
  cgf.reset();
  assertTrue(cgf.incrementToken());
  assertEquals("How", term.toString());
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:22,代码来源:CommonGramsFilterTest.java

示例5: testQueryReset

import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
public void testQueryReset() throws Exception {
  final String input = "How the s a brown s cow d like A B thing?";
  WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
  CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
  CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
  
  CharTermAttribute term = wt.addAttribute(CharTermAttribute.class);
  nsf.reset();
  assertTrue(nsf.incrementToken());
  assertEquals("How_the", term.toString());
  assertTrue(nsf.incrementToken());
  assertEquals("the_s", term.toString());
  
  wt.setReader(new StringReader(input));
  nsf.reset();
  assertTrue(nsf.incrementToken());
  assertEquals("How_the", term.toString());
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:19,代码来源:CommonGramsFilterTest.java

示例6: testOverlappingAtBeginning

import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
@Test
public void testOverlappingAtBeginning() throws Exception {
    final CharArraySet phraseSets = new CharArraySet(Arrays.asList(
            "new york", "new york city", "city of new york"), false);

    final String input = "new york city is great";

    StringReader reader = new StringReader(input);
    final WhitespaceTokenizer in = new WhitespaceTokenizer();
    in.setReader(reader);

    AutoPhrasingTokenFilter aptf = new AutoPhrasingTokenFilter(in, phraseSets, false);
    aptf.setReplaceWhitespaceWith('_');
    CharTermAttribute term = aptf.addAttribute(CharTermAttribute.class);
    aptf.reset();

    assertTrue(aptf.incrementToken());
    assertEquals("new_york_city", term.toString());
    assertTrue(aptf.incrementToken());
    assertEquals("is", term.toString());
    assertTrue(aptf.incrementToken());
    assertEquals("great", term.toString());
}
 
开发者ID:jprante,项目名称:elasticsearch-plugin-bundle,代码行数:24,代码来源:AutoPhrasingTokenFilterTest.java

示例7: testOverlappingAtEnd

import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
@Test
public void testOverlappingAtEnd() throws Exception {
    final CharArraySet phraseSets = new CharArraySet(Arrays.asList(
            "new york", "new york city", "city of new york"), false);

    final String input = "the great city of new york";

    StringReader reader = new StringReader(input);
    final WhitespaceTokenizer in = new WhitespaceTokenizer();
    in.setReader(reader);

    AutoPhrasingTokenFilter aptf = new AutoPhrasingTokenFilter(in, phraseSets, false);
    aptf.setReplaceWhitespaceWith('_');
    CharTermAttribute term = aptf.addAttribute(CharTermAttribute.class);
    aptf.reset();

    assertTrue(aptf.incrementToken());
    assertEquals("the", term.toString());
    assertTrue(aptf.incrementToken());
    assertEquals("great", term.toString());
    assertTrue(aptf.incrementToken());
    assertEquals("city_of_new_york", term.toString());
}
 
开发者ID:jprante,项目名称:elasticsearch-plugin-bundle,代码行数:24,代码来源:AutoPhrasingTokenFilterTest.java

示例8: testIncompletePhrase

import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
@Test
public void testIncompletePhrase() throws Exception {
    final CharArraySet phraseSets = new CharArraySet(Arrays.asList(
            "big apple", "new york city", "property tax", "three word phrase"), false);

    final String input = "some new york";

    StringReader reader = new StringReader(input);
    final WhitespaceTokenizer in = new WhitespaceTokenizer();
    in.setReader(reader);

    AutoPhrasingTokenFilter aptf = new AutoPhrasingTokenFilter(in, phraseSets, false);
    aptf.setReplaceWhitespaceWith('_');
    CharTermAttribute term = aptf.addAttribute(CharTermAttribute.class);
    aptf.reset();

    assertTrue(aptf.incrementToken());
    assertEquals("some", term.toString());
    assertTrue(aptf.incrementToken());
    assertEquals("new", term.toString());
    assertTrue(aptf.incrementToken());
    assertEquals("york", term.toString());
}
 
开发者ID:jprante,项目名称:elasticsearch-plugin-bundle,代码行数:24,代码来源:AutoPhrasingTokenFilterTest.java

示例9: testTypical

import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
public void testTypical() throws IOException {
  String NYC = "new york city";
  WhitespaceTokenizer stream = new WhitespaceTokenizer();
  stream.setReader(new StringReader(NYC));
  ConcatenateFilter filter = new ConcatenateFilter(stream);
  try {
    assertTokenStreamContents(filter, new String[]{NYC},
        new int[]{0}, new int[]{NYC.length()}, new String[]{"shingle"},
        new int[]{1}, null, NYC.length(), true);
  } catch (AssertionError e) {
    //assertTokenStreamContents tries to test if tokenStream.end() was implemented correctly.
    // It's manner of checking this is imperfect and incompatible with
    // ConcatenateFilter. Specifically it modifies a special attribute *after* incrementToken(),
    // which is weird. To the best of my ability, end() appears to be implemented correctly.
    if (!e.getMessage().equals("super.end()/clearAttributes() was not called correctly in end()"))
      throw e;
  }
}
 
开发者ID:OpenSextant,项目名称:SolrTextTagger,代码行数:19,代码来源:ConcatenateFilterTest.java

示例10: testReset

import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
public void testReset() throws Exception {
  final String input = "How the s a brown s cow d like A B thing?";
  WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
  CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
  
  CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class);
  cgf.reset();
  assertTrue(cgf.incrementToken());
  assertEquals("How", term.toString());
  assertTrue(cgf.incrementToken());
  assertEquals("How_the", term.toString());
  assertTrue(cgf.incrementToken());
  assertEquals("the", term.toString());
  assertTrue(cgf.incrementToken());
  assertEquals("the_s", term.toString());
  cgf.close();
  
  wt.setReader(new StringReader(input));
  cgf.reset();
  assertTrue(cgf.incrementToken());
  assertEquals("How", term.toString());
}
 
开发者ID:jimaguere,项目名称:Maskana-Gestor-de-Conocimiento,代码行数:23,代码来源:CommonGramsFilterTest.java

示例11: testQueryReset

import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
public void testQueryReset() throws Exception {
  final String input = "How the s a brown s cow d like A B thing?";
  WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
  CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
  CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
  
  CharTermAttribute term = wt.addAttribute(CharTermAttribute.class);
  nsf.reset();
  assertTrue(nsf.incrementToken());
  assertEquals("How_the", term.toString());
  assertTrue(nsf.incrementToken());
  assertEquals("the_s", term.toString());
  nsf.close();
  
  wt.setReader(new StringReader(input));
  nsf.reset();
  assertTrue(nsf.incrementToken());
  assertEquals("How_the", term.toString());
}
 
开发者ID:jimaguere,项目名称:Maskana-Gestor-de-Conocimiento,代码行数:20,代码来源:CommonGramsFilterTest.java

示例12: testReset

import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
public void testReset() throws Exception {
  WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(new StringReader("abcde"));
  EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(tokenizer, 1, 3);
  assertTokenStreamContents(filter, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{5,5,5});
  tokenizer.setReader(new StringReader("abcde"));
  assertTokenStreamContents(filter, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{5,5,5});
}
 
开发者ID:europeana,项目名称:search,代码行数:8,代码来源:EdgeNGramTokenFilterTest.java

示例13: testReset

import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
public void testReset() throws Exception {
  WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(new StringReader("abcde"));
  NGramTokenFilter filter = new NGramTokenFilter(tokenizer, 1, 1);
  assertTokenStreamContents(filter, new String[]{"a","b","c","d","e"}, new int[]{0,0,0,0,0}, new int[]{5,5,5,5,5}, new int[]{1,0,0,0,0});
  tokenizer.setReader(new StringReader("abcde"));
  assertTokenStreamContents(filter, new String[]{"a","b","c","d","e"}, new int[]{0,0,0,0,0}, new int[]{5,5,5,5,5}, new int[]{1,0,0,0,0});
}
 
开发者ID:europeana,项目名称:search,代码行数:8,代码来源:NGramTokenFilterTest.java

示例14: testReset

import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
public void testReset() throws Exception {
  WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
  EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(tokenizer, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
  assertTokenStreamContents(filter, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3});
  tokenizer.setReader(new StringReader("abcde"));
  assertTokenStreamContents(filter, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3});
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:8,代码来源:EdgeNGramTokenFilterTest.java

示例15: testReset

import org.apache.lucene.analysis.core.WhitespaceTokenizer; //导入方法依赖的package包/类
public void testReset() throws Exception {
  WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
  NGramTokenFilter filter = new NGramTokenFilter(tokenizer, 1, 1);
  assertTokenStreamContents(filter, new String[]{"a","b","c","d","e"}, new int[]{0,1,2,3,4}, new int[]{1,2,3,4,5});
  tokenizer.setReader(new StringReader("abcde"));
  assertTokenStreamContents(filter, new String[]{"a","b","c","d","e"}, new int[]{0,1,2,3,4}, new int[]{1,2,3,4,5});
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:8,代码来源:NGramTokenFilterTest.java


注:本文中的org.apache.lucene.analysis.core.WhitespaceTokenizer.setReader方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。