当前位置: 首页>>代码示例>>Java>>正文


Java MockTokenizer.reset方法代码示例

本文整理汇总了Java中org.apache.lucene.analysis.MockTokenizer.reset方法的典型用法代码示例。如果您正苦于以下问题:Java MockTokenizer.reset方法的具体用法?Java MockTokenizer.reset怎么用?Java MockTokenizer.reset使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.analysis.MockTokenizer的用法示例。


在下文中一共展示了MockTokenizer.reset方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testOutputHangsOffEnd

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testOutputHangsOffEnd() throws Exception {
  b = new SynonymMap.Builder(true);
  final boolean keepOrig = false;
  // b hangs off the end (no input token under it):
  add("a", "a b", keepOrig);
  tokensIn = new MockTokenizer(new StringReader("a"),
                               MockTokenizer.WHITESPACE,
                               true);
  tokensIn.reset();
  assertTrue(tokensIn.incrementToken());
  assertFalse(tokensIn.incrementToken());
  tokensIn.end();
  tokensIn.close();

  tokensOut = new SynonymFilter(tokensIn,
                                b.build(),
                                true);
  termAtt = tokensOut.addAttribute(CharTermAttribute.class);
  posIncrAtt = tokensOut.addAttribute(PositionIncrementAttribute.class);
  offsetAtt = tokensOut.addAttribute(OffsetAttribute.class);
  posLenAtt = tokensOut.addAttribute(PositionLengthAttribute.class);

  // Make sure endOffset inherits from previous input token:
  verify("a", "a b:1");
}
 
开发者ID:europeana,项目名称:search,代码行数:26,代码来源:TestSynonymMapFilter.java

示例2: testBasic2

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testBasic2() throws Exception {
  b = new SynonymMap.Builder(true);
  final boolean keepOrig = false;
  add("aaa", "aaaa1 aaaa2 aaaa3", keepOrig);
  add("bbb", "bbbb1 bbbb2", keepOrig);
  tokensIn = new MockTokenizer(new StringReader("a"),
                               MockTokenizer.WHITESPACE,
                               true);
  tokensIn.reset();
  assertTrue(tokensIn.incrementToken());
  assertFalse(tokensIn.incrementToken());
  tokensIn.end();
  tokensIn.close();

  tokensOut = new SynonymFilter(tokensIn,
                                   b.build(),
                                   true);
  termAtt = tokensOut.addAttribute(CharTermAttribute.class);
  posIncrAtt = tokensOut.addAttribute(PositionIncrementAttribute.class);
  posLenAtt = tokensOut.addAttribute(PositionLengthAttribute.class);
  offsetAtt = tokensOut.addAttribute(OffsetAttribute.class);

  if (keepOrig) {
    verify("xyzzy bbb pot of gold", "xyzzy bbb/bbbb1 pot/bbbb2 of gold");
    verify("xyzzy aaa pot of gold", "xyzzy aaa/aaaa1 pot/aaaa2 of/aaaa3 gold");
  } else {
    verify("xyzzy bbb pot of gold", "xyzzy bbbb1 pot/bbbb2 of gold");
    verify("xyzzy aaa pot of gold", "xyzzy aaaa1 pot/aaaa2 of/aaaa3 gold");
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:31,代码来源:TestSynonymMapFilter.java

示例3: testBasic

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testBasic() throws Exception {
  b = new SynonymMap.Builder(true);
  add("a", "foo", true);
  add("a b", "bar fee", true);
  add("b c", "dog collar", true);
  add("c d", "dog harness holder extras", true);
  add("m c e", "dog barks loudly", false);
  add("i j k", "feep", true);

  add("e f", "foo bar", false);
  add("e f", "baz bee", false);

  add("z", "boo", false);
  add("y", "bee", true);

  tokensIn = new MockTokenizer(new StringReader("a"),
                               MockTokenizer.WHITESPACE,
                               true);
  tokensIn.reset();
  assertTrue(tokensIn.incrementToken());
  assertFalse(tokensIn.incrementToken());
  tokensIn.end();
  tokensIn.close();

  tokensOut = new SynonymFilter(tokensIn,
                                   b.build(),
                                   true);
  termAtt = tokensOut.addAttribute(CharTermAttribute.class);
  posIncrAtt = tokensOut.addAttribute(PositionIncrementAttribute.class);
  posLenAtt = tokensOut.addAttribute(PositionLengthAttribute.class);
  offsetAtt = tokensOut.addAttribute(OffsetAttribute.class);

  verify("a b c", "a/bar b/fee c");

  // syn output extends beyond input tokens
  verify("x a b c d", "x a/bar b/fee c/dog d/harness holder extras");

  verify("a b a", "a/bar b/fee a/foo");

  // outputs that add to one another:
  verify("c d c d", "c/dog d/harness c/holder/dog d/extras/harness holder extras");

  // two outputs for same input
  verify("e f", "foo/baz bar/bee");

  // verify multi-word / single-output offsets:
  verify("g i j k g", "g i/feep:7_3 j k g");

  // mixed keepOrig true/false:
  verify("a m c e x", "a/foo dog barks loudly x");
  verify("c d m c e x", "c/dog d/harness holder/dog extras/barks loudly x");
  assertTrue(tokensOut.getCaptureCount() > 0);

  // no captureStates when no syns matched
  verify("p q r s t", "p q r s t");
  assertEquals(0, tokensOut.getCaptureCount());

  // no captureStates when only single-input syns, w/ no
  // lookahead needed, matched
  verify("p q z y t", "p q boo y/bee t");
  assertEquals(0, tokensOut.getCaptureCount());
}
 
开发者ID:europeana,项目名称:search,代码行数:63,代码来源:TestSynonymMapFilter.java

示例4: testRandom

import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testRandom() throws Exception {
  
  final int alphabetSize = TestUtil.nextInt(random(), 2, 7);

  final int docLen = atLeast(3000);
  //final int docLen = 50;

  final String document = getRandomString('a', alphabetSize, docLen);

  if (VERBOSE) {
    System.out.println("TEST: doc=" + document);
  }

  final int numSyn = atLeast(5);
  //final int numSyn = 2;

  final Map<String,OneSyn> synMap = new HashMap<>();
  final List<OneSyn> syns = new ArrayList<>();
  final boolean dedup = random().nextBoolean();
  if (VERBOSE) {
    System.out.println("  dedup=" + dedup);
  }
  b = new SynonymMap.Builder(dedup);
  for(int synIDX=0;synIDX<numSyn;synIDX++) {
    final String synIn = getRandomString('a', alphabetSize, TestUtil.nextInt(random(), 1, 5)).trim();
    OneSyn s = synMap.get(synIn);
    if (s == null) {
      s = new OneSyn();
      s.in = synIn;
      syns.add(s);
      s.out = new ArrayList<>();
      synMap.put(synIn, s);
      s.keepOrig = random().nextBoolean();
    }
    final String synOut = getRandomString('0', 10, TestUtil.nextInt(random(), 1, 5)).trim();
    s.out.add(synOut);
    add(synIn, synOut, s.keepOrig);
    if (VERBOSE) {
      System.out.println("  syns[" + synIDX + "] = " + s.in + " -> " + s.out + " keepOrig=" + s.keepOrig);
    }
  }

  tokensIn = new MockTokenizer(new StringReader("a"),
                               MockTokenizer.WHITESPACE,
                               true);
  tokensIn.reset();
  assertTrue(tokensIn.incrementToken());
  assertFalse(tokensIn.incrementToken());
  tokensIn.end();
  tokensIn.close();

  tokensOut = new SynonymFilter(tokensIn,
                                   b.build(),
                                   true);
  termAtt = tokensOut.addAttribute(CharTermAttribute.class);
  posIncrAtt = tokensOut.addAttribute(PositionIncrementAttribute.class);
  posLenAtt = tokensOut.addAttribute(PositionLengthAttribute.class);
  offsetAtt = tokensOut.addAttribute(OffsetAttribute.class);

  if (dedup) {
    pruneDups(syns);
  }

  final String expected = slowSynMatcher(document, syns, 5);

  if (VERBOSE) {
    System.out.println("TEST: expected=" + expected);
  }

  verify(document, expected);
}
 
开发者ID:europeana,项目名称:search,代码行数:72,代码来源:TestSynonymMapFilter.java


注:本文中的org.apache.lucene.analysis.MockTokenizer.reset方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。