当前位置: 首页>>代码示例>>Java>>正文


Java MockTokenizer.WHITESPACE属性代码示例

本文整理汇总了Java中org.apache.lucene.analysis.MockTokenizer.WHITESPACE属性的典型用法代码示例。如果您正苦于以下问题:Java MockTokenizer.WHITESPACE属性的具体用法?Java MockTokenizer.WHITESPACE怎么用?Java MockTokenizer.WHITESPACE使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在org.apache.lucene.analysis.MockTokenizer的用法示例。


在下文中一共展示了MockTokenizer.WHITESPACE属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testSimple

public void testSimple() throws IOException {
    Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(t, new UniqueTokenFilter(t));
        }
    };

    TokenStream test = analyzer.tokenStream("test", "this test with test");
    test.reset();
    CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class);
    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("this"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("test"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("with"));

    assertThat(test.incrementToken(), equalTo(false));
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:23,代码来源:UniqueTokenFilterTests.java

示例2: testRandomStrings

/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  for (int i = 0; i < 10; i++) {
    final int min = TestUtil.nextInt(random(), 2, 10);
    final int max = TestUtil.nextInt(random(), min, 20);
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer, 
            new NGramTokenFilter(tokenizer, min, max));
      }    
    };
    checkRandomData(random(), a, 200*RANDOM_MULTIPLIER, 20);
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:16,代码来源:NGramTokenFilterTest.java

示例3: testWordComponentWithLessThanMinimumLength

public void testWordComponentWithLessThanMinimumLength() throws Exception {
  CharArraySet dict = makeDictionary("abc", "d", "efg");

  Tokenizer tokenizer = new MockTokenizer(new StringReader("abcdefg"), MockTokenizer.WHITESPACE, false);
  DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(
    new WhitespaceTokenizer(
      new StringReader(
        "abcdefg")
      ),
    dict,
    CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
    CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
    CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);

// since "d" is shorter than the minimum subword size, it should not be added to the token stream
  assertTokenStreamContents(tf,
    new String[] { "abcdefg", "abc", "efg" },
    new int[] { 0, 0, 0},
    new int[] { 7, 7, 7},
    new int[] { 1, 0, 0}
    );
}
 
开发者ID:europeana,项目名称:search,代码行数:22,代码来源:TestCompoundWordTokenFilter.java

示例4: testOutputHangsOffEnd

public void testOutputHangsOffEnd() throws Exception {
  b = new SynonymMap.Builder(true);
  final boolean keepOrig = false;
  // b hangs off the end (no input token under it):
  add("a", "a b", keepOrig);
  tokensIn = new MockTokenizer(new StringReader("a"),
                               MockTokenizer.WHITESPACE,
                               true);
  tokensIn.reset();
  assertTrue(tokensIn.incrementToken());
  assertFalse(tokensIn.incrementToken());
  tokensIn.end();
  tokensIn.close();

  tokensOut = new SynonymFilter(tokensIn,
                                b.build(),
                                true);
  termAtt = tokensOut.addAttribute(CharTermAttribute.class);
  posIncrAtt = tokensOut.addAttribute(PositionIncrementAttribute.class);
  offsetAtt = tokensOut.addAttribute(OffsetAttribute.class);
  posLenAtt = tokensOut.addAttribute(PositionLengthAttribute.class);

  // Make sure endOffset inherits from previous input token:
  verify("a", "a b:1");
}
 
开发者ID:europeana,项目名称:search,代码行数:25,代码来源:TestSynonymMapFilter.java

示例5: testFuzzySlopeExtendability

public void testFuzzySlopeExtendability() throws ParseException {
  QueryParser qp = new QueryParser("a",  new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {

    @Override
    Query handleBareFuzzy(String qfield, Token fuzzySlop, String termImage)
        throws ParseException {
      
      if(fuzzySlop.image.endsWith("€")) {
        float fms = fuzzyMinSim;
        try {
          fms = Float.valueOf(fuzzySlop.image.substring(1, fuzzySlop.image.length()-1)).floatValue();
        } catch (Exception ignored) { }
        float value = Float.parseFloat(termImage);
        return getRangeQuery(qfield, Float.toString(value-fms/2.f), Float.toString(value+fms/2.f), true, true);
      }
      return super.handleBareFuzzy(qfield, fuzzySlop, termImage);
    }
    
  };
  assertEquals(qp.parse("a:[11.95 TO 12.95]"), qp.parse("12.45~1€"));
}
 
开发者ID:europeana,项目名称:search,代码行数:21,代码来源:TestQueryParser.java

示例6: testSimple

public void testSimple() throws IOException {
    Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(t, new TruncateTokenFilter(t, 3));
        }
    };

    TokenStream test = analyzer.tokenStream("test", "a bb ccc dddd eeeee");
    test.reset();
    CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class);
    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("a"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("bb"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("ccc"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("ddd"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("eee"));

    assertThat(test.incrementToken(), equalTo(false));
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:29,代码来源:TruncateTokenFilterTests.java

示例7: testUsingPackagedWordNetReader

@Test
public void testUsingPackagedWordNetReader() throws IOException {
    Map<String, String> args = new HashMap<>();
    LemmatizerFilterFactory factory = new LemmatizerFilterFactory(args);
    StringReader reader = new StringReader("it better works");
    final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    in.setReader(reader);
    TokenStream stream = factory.create(in);
    assertTokenStreamContents(stream, new String[] { "it", "good", "work" });
}
 
开发者ID:nicholasding,项目名称:solr-lemmatizer,代码行数:10,代码来源:LemmatizerFilterFactoryTest.java

示例8: testWithSamplePhrase

@Test
public void testWithSamplePhrase() throws IOException {
    StringReader reader = new StringReader("it better works");
    final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    in.setReader(reader);
    TokenStream stream = new LemmatizerFilter(in, new WordNetLemmatizer(new PackagedWordNetReader("wordnet.zip"), new RTrie()));
    assertTokenStreamContents(stream, new String[] { "it", "good", "work" });
}
 
开发者ID:nicholasding,项目名称:solr-lemmatizer,代码行数:8,代码来源:LemmatizerFilterTest.java

示例9: testUsingPackagedWordNetReaderFromFilterFactory

@Test
public void testUsingPackagedWordNetReaderFromFilterFactory() throws IOException {
    Map<String, String> args = new HashMap<>();
    LemmatizerFilterFactory factory = new LemmatizerFilterFactory(args);

    StringReader reader = new StringReader("it better works");
    final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    in.setReader(reader);
    TokenStream stream = factory.create(in);
    assertTokenStreamContents(stream, new String[] { "it", "good", "work" });
}
 
开发者ID:nicholasding,项目名称:solr-lemmatizer,代码行数:11,代码来源:LemmatizerFilterTest.java

示例10: testUsingDirectoryWordNetReaderWithDummyPathShouldFailSilently

@Test
public void testUsingDirectoryWordNetReaderWithDummyPathShouldFailSilently() throws IOException {
    Map<String, String> args = new HashMap<>();
    args.put("dictPath", "/tmp");
    LemmatizerFilterFactory factory = new LemmatizerFilterFactory(args);

    StringReader reader = new StringReader("it better works");
    final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    in.setReader(reader);
    TokenStream stream = factory.create(in);
    assertTokenStreamContents(stream, new String[] { "it", "better", "works" });
}
 
开发者ID:nicholasding,项目名称:solr-lemmatizer,代码行数:12,代码来源:LemmatizerFilterTest.java

示例11: testStemming

/**
 * Ensure the filter actually stems text.
 */
public void testStemming() throws Exception {
  Reader reader = new StringReader("dogs");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("PorterStem").create(stream);
  assertTokenStreamContents(stream, new String[] { "dog" });
}
 
开发者ID:europeana,项目名称:search,代码行数:9,代码来源:TestPorterStemFilterFactory.java

示例12: testReplaceByEmpty

public void testReplaceByEmpty() throws Exception {
  Reader reader = new StringReader("aa bb cc");
  reader = charFilterFactory("PatternReplace",
      "pattern", "(aa)\\s+(bb)\\s+(cc)").create(reader);
  TokenStream ts = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  assertTokenStreamContents(ts, new String[] {});
}
 
开发者ID:europeana,项目名称:search,代码行数:7,代码来源:TestPatternReplaceCharFilterFactory.java

示例13: testStripFirst

public void testStripFirst() throws Exception {
  String input = "aabfooaabfooabfoob ab caaaaaaaaab";
  TokenStream ts = new PatternReplaceFilter
          (new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false),
                  Pattern.compile("a*b"),
                  null, false);
  assertTokenStreamContents(ts,
      new String[] { "fooaabfooabfoob", "", "c" });
}
 
开发者ID:europeana,项目名称:search,代码行数:9,代码来源:TestPatternReplaceFilter.java

示例14: testCapitalization

public void testCapitalization() throws Exception {
  Reader reader = new StringReader("kiTTEN");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("Capitalization",
      "keep", "and the it BIG",
      "onlyFirstWord", "true").create(stream);
  assertTokenStreamContents(stream, new String[] { "Kitten" });
}
 
开发者ID:europeana,项目名称:search,代码行数:8,代码来源:TestCapitalizationFilterFactory.java

示例15: testDefaults

/**
 * If no words are provided, then a set of english default stopwords is used.
 */
public void testDefaults() throws Exception {
  CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery");
  CharArraySet words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue(words.contains("the"));
  Tokenizer tokenizer = new MockTokenizer(new StringReader("testing the factory"), MockTokenizer.WHITESPACE, false);
  TokenStream stream = factory.create(tokenizer);
  assertTokenStreamContents(stream, 
      new String[] { "testing_the", "the_factory" });
}
 
开发者ID:europeana,项目名称:search,代码行数:13,代码来源:TestCommonGramsQueryFilterFactory.java


注:本文中的org.apache.lucene.analysis.MockTokenizer.WHITESPACE属性示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。