当前位置: 首页>>代码示例>>Java>>正文


Java MockTokenizer类代码示例

本文整理汇总了Java中org.apache.lucene.analysis.MockTokenizer的典型用法代码示例。如果您正苦于以下问题:Java MockTokenizer类的具体用法?Java MockTokenizer怎么用?Java MockTokenizer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


MockTokenizer类属于org.apache.lucene.analysis包,在下文中一共展示了MockTokenizer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testSimple

import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
public void testSimple() throws IOException {
    Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(t, new UniqueTokenFilter(t));
        }
    };

    TokenStream test = analyzer.tokenStream("test", "this test with test");
    test.reset();
    CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class);
    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("this"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("test"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("with"));

    assertThat(test.incrementToken(), equalTo(false));
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:24,代码来源:UniqueTokenFilterTests.java

示例2: testBackwardsCompatibilityEdgeNgramTokenFilter

import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
public void testBackwardsCompatibilityEdgeNgramTokenFilter() throws Exception {
    int iters = scaledRandomIntBetween(20, 100);
    for (int i = 0; i < iters; i++) {
        final Index index = new Index("test", "_na_");
        final String name = "ngr";
        Version v = randomVersion(random());
        Builder builder = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3);
        boolean reverse = random().nextBoolean();
        if (reverse) {
            builder.put("side", "back");
        }
        Settings settings = builder.build();
        Settings indexSettings = newAnalysisSettingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, v.id).build();
        Tokenizer tokenizer = new MockTokenizer();
        tokenizer.setReader(new StringReader("foo bar"));
        TokenStream edgeNGramTokenFilter = new EdgeNGramTokenFilterFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create(tokenizer);
        if (reverse) {
            assertThat(edgeNGramTokenFilter, instanceOf(ReverseStringFilter.class));
        } else {
            assertThat(edgeNGramTokenFilter, instanceOf(EdgeNGramTokenFilter.class));
        }
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:24,代码来源:NGramTokenizerFactoryTests.java

示例3: testCountPositions

import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
public void testCountPositions() throws IOException {
    // We're looking to make sure that we:
    Token t1 = new Token();      // Don't count tokens without an increment
    t1.setPositionIncrement(0);
    Token t2 = new Token();
    t2.setPositionIncrement(1);  // Count normal tokens with one increment
    Token t3 = new Token();
    t2.setPositionIncrement(2);  // Count funny tokens with more than one increment
    int finalTokenIncrement = 4; // Count the final token increment on the rare token streams that have them
    Token[] tokens = new Token[] {t1, t2, t3};
    Collections.shuffle(Arrays.asList(tokens), random());
    final TokenStream tokenStream = new CannedTokenStream(finalTokenIncrement, 0, tokens);
    // TODO: we have no CannedAnalyzer?
    Analyzer analyzer = new Analyzer() {
            @Override
            public TokenStreamComponents createComponents(String fieldName) {
                return new TokenStreamComponents(new MockTokenizer(), tokenStream);
            }
        };
    assertThat(TokenCountFieldMapper.countPositions(analyzer, "", ""), equalTo(7));
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:22,代码来源:TokenCountFieldMapperTests.java

示例4: testRecursion3

import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
public void testRecursion3() throws Exception {
  b = new SynonymMap.Builder(true);
  final boolean keepOrig = true;
  add("zoo zoo", "zoo", keepOrig);
  final SynonymMap map = b.build();
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
      return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true));
    }
  };
  
  assertAnalyzesTo(a, "zoo zoo $ zoo",
      new String[] { "zoo", "zoo", "zoo", "$", "zoo" },
      new int[] { 1, 0, 1, 1, 1 });
}
 
开发者ID:europeana,项目名称:search,代码行数:18,代码来源:TestSynonymMapFilter.java

示例5: testInvalidArguments

import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
/**
 * Test that invalid arguments result in exception
 */
public void testInvalidArguments() throws Exception {
  for (final String arg : new String[]{"minWordLength", "maxTokenLength", "maxWordCount"}) {
    try {
      Reader reader = new StringReader("foo foobar super-duper-trooper");
      TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

      tokenFilterFactory("Capitalization",
          "keep", "and the it BIG",
          "onlyFirstWord", "false",
          arg, "-3",
          "okPrefix", "McK",
          "forceFirstLetter", "true").create(stream);
      fail();
    } catch (IllegalArgumentException expected) {
      assertTrue(expected.getMessage().contains(arg + " must be greater than or equal to zero")
          || expected.getMessage().contains(arg + " must be greater than zero"));
    }
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:23,代码来源:TestCapitalizationFilterFactory.java

示例6: testRandomString

import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
public void testRandomString() throws Exception {
  Analyzer a = new Analyzer() {

    @Override
    protected TokenStreamComponents createComponents(String fieldName,
        Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader,
          MockTokenizer.WHITESPACE, false);
      return new TokenStreamComponents(tokenizer,
          new PatternCaptureGroupTokenFilter(tokenizer, false,
              Pattern.compile("((..)(..))")));
    }
  };

  checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
}
 
开发者ID:europeana,项目名称:search,代码行数:17,代码来源:TestPatternCaptureGroupTokenFilter.java

示例7: setUp

import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
/**
 * Initializes the tests by adding 4 identical documents to the index.
 */
@Override
public void setUp() throws Exception {
  super.setUp();
  // create test index
  mDirectory = newDirectory();
  final RandomIndexWriter writer = new RandomIndexWriter(random(), mDirectory, 
      newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET))
          .setMergePolicy(newLogMergePolicy()).setSimilarity(new DefaultSimilarity()));
  addDocument(writer, "1", "I think it should work.");
  addDocument(writer, "2", "I think it should work.");
  addDocument(writer, "3", "I think it should work.");
  addDocument(writer, "4", "I think it should work.");
  reader = writer.getReader();
  writer.close();
  searcher = newSearcher(reader);
  searcher.setSimilarity(new DefaultSimilarity());
}
 
开发者ID:europeana,项目名称:search,代码行数:21,代码来源:TestSpansAdvanced.java

示例8: testHyphenationCompoundWordsDELongestMatch

import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
public void testHyphenationCompoundWordsDELongestMatch() throws Exception {
  CharArraySet dict = makeDictionary("basketball", "basket", "ball", "kurv");

  InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
  HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
      .getHyphenationTree(is);

  // the word basket will not be added due to the longest match option
  HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
      new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), 
      hyphenator, dict,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
  assertTokenStreamContents(tf, 
      new String[] { "basketballkurv", "basketball", "ball", "kurv" },
      new int[] { 1, 0, 0, 0 }
  );

}
 
开发者ID:europeana,项目名称:search,代码行数:20,代码来源:TestCompoundWordTokenFilter.java

示例9: test

import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
public void test() throws IOException {

    PrefixAwareTokenFilter ts;

    ts = new PrefixAwareTokenFilter(
        new SingleTokenTokenStream(createToken("a", 0, 1)),
        new SingleTokenTokenStream(createToken("b", 0, 1)));
    assertTokenStreamContents(ts, 
        new String[] { "a", "b" },
        new int[] { 0, 1 },
        new int[] { 1, 2 });

    // prefix and suffix using 2x prefix

    ts = new PrefixAwareTokenFilter(new SingleTokenTokenStream(createToken("^", 0, 0)),
        new MockTokenizer(new StringReader("hello world"), MockTokenizer.WHITESPACE, false));
    ts = new PrefixAwareTokenFilter(ts, new SingleTokenTokenStream(createToken("$", 0, 0)));

    assertTokenStreamContents(ts,
        new String[] { "^", "hello", "world", "$" },
        new int[] { 0, 0, 6, 11 },
        new int[] { 0, 5, 11, 11 });
  }
 
开发者ID:europeana,项目名称:search,代码行数:24,代码来源:TestPrefixAwareTokenFilter.java

示例10: checkPhraseQuery

import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
private float  checkPhraseQuery(Document doc, PhraseQuery query, int slop, int expectedNumResults) throws Exception {
  query.setSlop(slop);

  Directory ramDir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), ramDir, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
  writer.addDocument(doc);

  IndexReader reader = writer.getReader();

  IndexSearcher searcher = newSearcher(reader);
  MaxFreqCollector c = new MaxFreqCollector();
  searcher.search(query, c);
  assertEquals("slop: "+slop+"  query: "+query+"  doc: "+doc+"  Wrong number of hits", expectedNumResults, c.totalHits);

  //QueryUtils.check(query,searcher);
  writer.close();
  reader.close();
  ramDir.close();

  // returns the max Scorer.freq() found, because even though norms are omitted, many index stats are different
  // with these different tokens/distributions/lengths.. otherwise this test is very fragile.
  return c.max; 
}
 
开发者ID:europeana,项目名称:search,代码行数:24,代码来源:TestSloppyPhraseQuery.java

示例11: testTokenEndingWithWordComponentOfMinimumLength

import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
public void testTokenEndingWithWordComponentOfMinimumLength() throws Exception {
  CharArraySet dict = makeDictionary("ab", "cd", "ef");

  Tokenizer tokenizer = new MockTokenizer(new StringReader("abcdef"), MockTokenizer.WHITESPACE, false);
  DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(
    new WhitespaceTokenizer(
      new StringReader(
        "abcdef")
      ),
    dict,
    CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
    CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
    CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);

  assertTokenStreamContents(tf,
    new String[] { "abcdef", "ab", "cd", "ef" },
    new int[] { 0, 0, 0, 0},
    new int[] { 6, 6, 6, 6},
    new int[] { 1, 0, 0, 0}
    );
}
 
开发者ID:europeana,项目名称:search,代码行数:22,代码来源:TestCompoundWordTokenFilter.java

示例12: beforeClass

import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
@BeforeClass
public static void beforeClass() throws Exception {
  String[] data = new String[] { "A 1 2 3 4 5 6", "Z       4 5 6", null,
      "B   2   4 5 6", "Y     3   5 6", null, "C     3     6",
      "X       4 5 6" };

  small = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), small, 
      newIndexWriterConfig(
          new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)).setMergePolicy(newLogMergePolicy()));

  FieldType customType = new FieldType(TextField.TYPE_STORED);
  customType.setTokenized(false);
  for (int i = 0; i < data.length; i++) {
    Document doc = new Document();
    doc.add(newField("id", String.valueOf(i), customType));// Field.Keyword("id",String.valueOf(i)));
    doc.add(newField("all", "all", customType));// Field.Keyword("all","all"));
    if (null != data[i]) {
      doc.add(newTextField("data", data[i], Field.Store.YES));// Field.Text("data",data[i]));
    }
    writer.addDocument(doc);
  }

  reader = writer.getReader();
  writer.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:27,代码来源:TestMultiTermConstantScore.java

示例13: getOriginalValues

import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
private Map<String,Float> getOriginalValues() throws IOException {
  Map<String,Float> originalValues = new HashMap<>();
  MoreLikeThis mlt = new MoreLikeThis(reader);
  mlt.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
  mlt.setMinDocFreq(1);
  mlt.setMinTermFreq(1);
  mlt.setMinWordLen(1);
  mlt.setFieldNames(new String[] {"text"});
  mlt.setBoost(true);
  BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader(
      "lucene release"));
  List<BooleanClause> clauses = query.clauses();

  for (BooleanClause clause : clauses) {
    TermQuery tq = (TermQuery) clause.getQuery();
    originalValues.put(tq.getTerm().text(), tq.getBoost());
  }
  return originalValues;
}
 
开发者ID:europeana,项目名称:search,代码行数:20,代码来源:TestMoreLikeThis.java

示例14: testMultiValues

import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
public void testMultiValues() throws Exception {
  MoreLikeThis mlt = new MoreLikeThis(reader);
  mlt.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
  mlt.setMinDocFreq(1);
  mlt.setMinTermFreq(1);
  mlt.setMinWordLen(1);
  mlt.setFieldNames(new String[] {"text"});

  BooleanQuery query = (BooleanQuery) mlt.like("text",
      new StringReader("lucene"), new StringReader("lucene release"),
      new StringReader("apache"), new StringReader("apache lucene"));
  List<BooleanClause> clauses = query.clauses();
  assertEquals("Expected 2 clauses only!", 2, clauses.size());
  for (BooleanClause clause : clauses) {
    Term term = ((TermQuery) clause.getQuery()).getTerm();
    assertTrue(Arrays.asList(new Term("text", "lucene"), new Term("text", "apache")).contains(term));
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:19,代码来源:TestMoreLikeThis.java

示例15: testRandom2GraphAfter

import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
public void testRandom2GraphAfter() throws Exception {
  final int numIters = atLeast(3);
  Random random = random();
  for (int i = 0; i < numIters; i++) {
    b = new SynonymMap.Builder(random.nextBoolean());
    final int numEntries = atLeast(10);
    for (int j = 0; j < numEntries; j++) {
      add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
    }
    final SynonymMap map = b.build();
    final boolean ignoreCase = random.nextBoolean();
    
    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
        TokenStream syns = new SynonymFilter(tokenizer, map, ignoreCase);
        TokenStream graph = new MockGraphTokenFilter(random(), syns);
        return new TokenStreamComponents(tokenizer, graph);
      }
    };

    checkRandomData(random, analyzer, 100);
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:26,代码来源:TestSynonymMapFilter.java


注:本文中的org.apache.lucene.analysis.MockTokenizer类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。