当前位置: 首页>>代码示例>>Java>>正文


Java MockTokenizer.setEnableChecks方法代码示例

本文整理汇总了Java中org.apache.lucene.analysis.MockTokenizer.setEnableChecks方法的典型用法代码示例。如果您正苦于以下问题:Java MockTokenizer.setEnableChecks方法的具体用法?Java MockTokenizer.setEnableChecks怎么用?Java MockTokenizer.setEnableChecks使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.analysis.MockTokenizer的用法示例。


在下文中一共展示了MockTokenizer.setEnableChecks方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testMaxPosition3WithSynomyms

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testMaxPosition3WithSynomyms() throws IOException {
  for (final boolean consumeAll : new boolean[]{true, false}) {
    MockTokenizer tokenizer = new MockTokenizer(new StringReader("one two three four five"), MockTokenizer.WHITESPACE, false);
    // if we are consuming all tokens, we can use the checks, otherwise we can't
    tokenizer.setEnableChecks(consumeAll);

    SynonymMap.Builder builder = new SynonymMap.Builder(true);
    builder.add(new CharsRef("one"), new CharsRef("first"), true);
    builder.add(new CharsRef("one"), new CharsRef("alpha"), true);
    builder.add(new CharsRef("one"), new CharsRef("beguine"), true);
    CharsRefBuilder multiWordCharsRef = new CharsRefBuilder();
    SynonymMap.Builder.join(new String[]{"and", "indubitably", "single", "only"}, multiWordCharsRef);
    builder.add(new CharsRef("one"), multiWordCharsRef.get(), true);
    SynonymMap.Builder.join(new String[]{"dopple", "ganger"}, multiWordCharsRef);
    builder.add(new CharsRef("two"), multiWordCharsRef.get(), true);
    SynonymMap synonymMap = builder.build();
    TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true);
    stream = new LimitTokenPositionFilter(stream, 3, consumeAll);

    // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3.
    assertTokenStreamContents(stream,
        new String[]{"one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger"},
        new int[]{1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0});
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:26,代码来源:TestLimitTokenPositionFilter.java

示例2: testReset

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testReset() throws Exception {
  CharArraySet dict = makeDictionary("Rind", "Fleisch", "Draht", "Schere", "Gesetz",
      "Aufgabe", "Überwachung");

  MockTokenizer wsTokenizer = new MockTokenizer(new StringReader("Rindfleischüberwachungsgesetz"), MockTokenizer.WHITESPACE, false);
  wsTokenizer.setEnableChecks(false); // we will reset in a strange place
  wsTokenizer.setReader(new StringReader("Rindfleischüberwachungsgesetz"));
  DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(
      wsTokenizer, dict,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
  
  CharTermAttribute termAtt = tf.getAttribute(CharTermAttribute.class);
  tf.reset();
  assertTrue(tf.incrementToken());
  assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString());
  assertTrue(tf.incrementToken());
  assertEquals("Rind", termAtt.toString());
  tf.end();
  tf.close();
  wsTokenizer.setReader(new StringReader("Rindfleischüberwachungsgesetz"));
  tf.reset();
  assertTrue(tf.incrementToken());
  assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString());
}
 
开发者ID:europeana,项目名称:search,代码行数:27,代码来源:TestCompoundWordTokenFilter.java

示例3: testMaxPosition1WithShingles

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testMaxPosition1WithShingles() throws Exception {
  for (final boolean consumeAll : new boolean[]{true, false}) {
    Reader reader = new StringReader("one two three four five");
    MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
    // if we are consuming all tokens, we can use the checks, otherwise we can't
    tokenizer.setEnableChecks(consumeAll);
    TokenStream stream = tokenizer;
    stream = tokenFilterFactory("Shingle",
        "minShingleSize", "2",
        "maxShingleSize", "3",
        "outputUnigrams", "true").create(stream);
    stream = tokenFilterFactory("LimitTokenPosition",
        LimitTokenPositionFilterFactory.MAX_TOKEN_POSITION_KEY, "1",
        LimitTokenPositionFilterFactory.CONSUME_ALL_TOKENS_KEY, Boolean.toString(consumeAll)
    ).create(stream);
    assertTokenStreamContents(stream, new String[]{"one", "one two", "one two three"});
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:19,代码来源:TestLimitTokenPositionFilterFactory.java

示例4: createComponents

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
  MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
  tokenizer.setEnableChecks(true);
  TokenStream next;
  if (numStopChars != 0) {
    next = new TokenEater(preserveHoles, tokenizer, numStopChars);
  } else {
    next = tokenizer;
  }
  return new TokenStreamComponents(tokenizer, next);
}
 
开发者ID:europeana,项目名称:search,代码行数:13,代码来源:FuzzySuggesterTest.java

示例5: createComponents

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
  MockTokenizer tokenizer = new MockTokenizer(MockUTF16TermAttributeImpl.UTF16_TERM_ATTRIBUTE_FACTORY, reader, MockTokenizer.WHITESPACE, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
  tokenizer.setEnableChecks(true);
  TokenStream next;
  if (numStopChars != 0) {
    next = new TokenEater(preserveHoles, tokenizer, numStopChars);
  } else {
    next = tokenizer;
  }
  return new TokenStreamComponents(tokenizer, next);
}
 
开发者ID:europeana,项目名称:search,代码行数:13,代码来源:AnalyzingSuggesterTest.java

示例6: testMissingPayload

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testMissingPayload() throws Exception {
  Directory dir = newDirectory();

  // MockAnalyzer minus maybePayload else it sometimes stuffs in an 8-byte payload!
  Analyzer a = new Analyzer() {
      @Override
      public TokenStreamComponents createComponents(String fieldName, Reader reader) {
        MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true, 100);
        tokenizer.setEnableChecks(true);
        MockTokenFilter filt = new MockTokenFilter(tokenizer, MockTokenFilter.EMPTY_STOPSET);
        return new TokenStreamComponents(tokenizer, filt);
      }
    };
  IndexWriterConfig iwc = newIndexWriterConfig(a);
  iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
  RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
  Document doc = new Document();
  doc.add(newTextField("id", "id", Field.Store.NO));
  try {
    w.addDocument(doc);
    w.commit();
    fail("didn't hit expected exception");
  } catch (IllegalArgumentException iae) {
    // expected
  }
           
  w.close();
  dir.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:30,代码来源:TestIDVersionPostingsFormat.java

示例7: testLongestOnly

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
/** simple test for longestOnly option */
public void testLongestOnly() throws IOException {
  MockTokenizer tokenizer = new MockTokenizer(new StringReader("lucene is awesome"));
  tokenizer.setEnableChecks(true);
  HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary, true, true);
  assertTokenStreamContents(filter, new String[]{"lucene", "is", "awesome"}, new int[] {1, 1, 1});
}
 
开发者ID:europeana,项目名称:search,代码行数:8,代码来源:TestHunspellStemFilter.java

示例8: test

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void test() throws Exception {
  for (final boolean consumeAll : new boolean[]{true, false}) {
    MockTokenizer tokenizer = new MockTokenizer(new StringReader("A1 B2 C3 D4 E5 F6"), MockTokenizer.WHITESPACE, false);
    tokenizer.setEnableChecks(consumeAll);
    TokenStream stream = new LimitTokenCountFilter(tokenizer, 3, consumeAll);
    assertTokenStreamContents(stream, new String[]{"A1", "B2", "C3"});
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:9,代码来源:TestLimitTokenCountFilter.java

示例9: test

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void test() throws Exception {
  for (final boolean consumeAll : new boolean[]{true, false}) {
    Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
    MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
    tokenizer.setEnableChecks(consumeAll);
    TokenStream stream = tokenizer;
    stream = tokenFilterFactory("LimitTokenCount",
        LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY, "3",
        LimitTokenCountFilterFactory.CONSUME_ALL_TOKENS_KEY, Boolean.toString(consumeAll)
    ).create(stream);
    assertTokenStreamContents(stream, new String[]{"A1", "B2", "C3"});
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:14,代码来源:TestLimitTokenCountFilterFactory.java

示例10: testMaxPosition1

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testMaxPosition1() throws Exception {
  for (final boolean consumeAll : new boolean[]{true, false}) {
    Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
    MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
    // if we are consuming all tokens, we can use the checks, otherwise we can't
    tokenizer.setEnableChecks(consumeAll);
    TokenStream stream = tokenizer;
    stream = tokenFilterFactory("LimitTokenPosition",
        LimitTokenPositionFilterFactory.MAX_TOKEN_POSITION_KEY, "1",
        LimitTokenPositionFilterFactory.CONSUME_ALL_TOKENS_KEY, Boolean.toString(consumeAll)
    ).create(stream);
    assertTokenStreamContents(stream, new String[]{"A1"});
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:15,代码来源:TestLimitTokenPositionFilterFactory.java

示例11: testMaxPosition2

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testMaxPosition2() throws IOException {
  for (final boolean consumeAll : new boolean[]{true, false}) {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        // if we are consuming all tokens, we can use the checks, otherwise we can't
        tokenizer.setEnableChecks(consumeAll);
        return new TokenStreamComponents(tokenizer, new LimitTokenPositionFilter(tokenizer, 2, consumeAll));
      }
    };

    // don't use assertAnalyzesTo here, as the end offset is not the end of the string (unless consumeAll is true, in which case its correct)!
    assertTokenStreamContents(a.tokenStream("dummy", "1  2     3  4  5"),
        new String[]{"1", "2"}, new int[]{0, 3}, new int[]{1, 4}, consumeAll ? 16 : null);
    assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")),
        new String[]{"1", "2"}, new int[]{0, 2}, new int[]{1, 3}, consumeAll ? 9 : null);

    // less than the limit, ensure we behave correctly
    assertTokenStreamContents(a.tokenStream("dummy", "1  "),
        new String[]{"1"}, new int[]{0}, new int[]{1}, consumeAll ? 3 : null);

    // equal to limit
    assertTokenStreamContents(a.tokenStream("dummy", "1  2  "),
        new String[]{"1", "2"}, new int[]{0, 3}, new int[]{1, 4}, consumeAll ? 6 : null);
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:28,代码来源:TestLimitTokenPositionFilter.java

示例12: testExceptionJustBeforeFlush

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testExceptionJustBeforeFlush() throws IOException {
  Directory dir = newDirectory();
  IndexWriter w = RandomIndexWriter.mockIndexWriter(dir, 
                                                    newIndexWriterConfig(new MockAnalyzer(random()))
                                                      .setMaxBufferedDocs(2), 
                                                    new TestPoint1());
  Document doc = new Document();
  doc.add(newTextField("field", "a field", Field.Store.YES));
  w.addDocument(doc);

  Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) {
    @Override
    public TokenStreamComponents createComponents(String fieldName, Reader reader) {
      MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
      return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer));
    }
  };

  Document crashDoc = new Document();
  crashDoc.add(newTextField("crash", "do it on token 4", Field.Store.YES));
  try {
    w.addDocument(crashDoc, analyzer);
    fail("did not hit expected exception");
  } catch (IOException ioe) {
    // expected
  }
  w.addDocument(doc);
  w.close();
  dir.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:32,代码来源:TestIndexWriterExceptions.java

示例13: create

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
@Override
public MockTokenizer create(AttributeFactory factory, Reader input) {
  MockTokenizer t = new MockTokenizer(factory, input, pattern, false);
  t.setEnableChecks(enableChecks);
  return t;
}
 
开发者ID:europeana,项目名称:search,代码行数:7,代码来源:MockTokenizerFactory.java


注:本文中的org.apache.lucene.analysis.MockTokenizer.setEnableChecks方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。