当前位置: 首页>>代码示例>>Java>>正文


Java MockTokenizer.SIMPLE属性代码示例

本文整理汇总了Java中org.apache.lucene.analysis.MockTokenizer.SIMPLE属性的典型用法代码示例。如果您正苦于以下问题:Java MockTokenizer.SIMPLE属性的具体用法?Java MockTokenizer.SIMPLE怎么用?Java MockTokenizer.SIMPLE使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在org.apache.lucene.analysis.MockTokenizer的用法示例。


在下文中一共展示了MockTokenizer.SIMPLE属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testTransposition2

public void testTransposition2() throws Exception {
  DirectSpellChecker spellChecker = new DirectSpellChecker();
  Directory dir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, 
      new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));

  for (int i = 0; i < 20; i++) {
    Document doc = new Document();
    doc.add(newTextField("numbers", English.intToEnglish(i), Field.Store.NO));
    writer.addDocument(doc);
  }

  IndexReader ir = writer.getReader();

  SuggestWord[] similar = spellChecker.suggestSimilar(new Term(
      "numbers", "seevntene"), 2, ir,
      SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
  assertEquals(1, similar.length);
  assertEquals("seventeen", similar[0].string);
  ir.close();
  writer.close();
  dir.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:23,代码来源:TestDirectSpellChecker.java

示例2: testRandom2

/** simple random test, doesn't verify correctness.
 *  does verify it doesnt throw exceptions, or that the stream doesn't misbehave
 */
public void testRandom2() throws Exception {
  final int numIters = atLeast(3);
  for (int i = 0; i < numIters; i++) {
    b = new SynonymMap.Builder(random().nextBoolean());
    final int numEntries = atLeast(10);
    for (int j = 0; j < numEntries; j++) {
      add(randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
    }
    final SynonymMap map = b.build();
    final boolean ignoreCase = random().nextBoolean();
    
    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
        return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
      }
    };

    checkRandomData(random(), analyzer, 100);
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:25,代码来源:TestSynonymMapFilter.java

示例3: testTransposition

public void testTransposition() throws Exception {
  DirectSpellChecker spellChecker = new DirectSpellChecker();
  Directory dir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, 
      new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));

  for (int i = 0; i < 20; i++) {
    Document doc = new Document();
    doc.add(newTextField("numbers", English.intToEnglish(i), Field.Store.NO));
    writer.addDocument(doc);
  }

  IndexReader ir = writer.getReader();

  SuggestWord[] similar = spellChecker.suggestSimilar(new Term(
      "numbers", "fvie"), 1, ir,
      SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
  assertEquals(1, similar.length);
  assertEquals("five", similar[0].string);
  ir.close();
  writer.close();
  dir.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:23,代码来源:TestDirectSpellChecker.java

示例4: testBogusField

public void testBogusField() throws Exception {
  DirectSpellChecker spellChecker = new DirectSpellChecker();
  Directory dir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, 
      new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));

  for (int i = 0; i < 20; i++) {
    Document doc = new Document();
    doc.add(newTextField("numbers", English.intToEnglish(i), Field.Store.NO));
    writer.addDocument(doc);
  }

  IndexReader ir = writer.getReader();

  SuggestWord[] similar = spellChecker.suggestSimilar(new Term(
      "bogusFieldBogusField", "fvie"), 2, ir,
      SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
  assertEquals(0, similar.length);
  ir.close();
  writer.close();
  dir.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:22,代码来源:TestDirectSpellChecker.java

示例5: testInvalidAnalyzesToNothingOutput

/** parse a syn file with bad syntax */
@Test(expected=ParseException.class)
public void testInvalidAnalyzesToNothingOutput() throws Exception {
  String testFile = "a => 1"; 
  SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random(), MockTokenizer.SIMPLE, false));
  parser.parse(new StringReader(testFile));
}
 
开发者ID:europeana,项目名称:search,代码行数:7,代码来源:TestSolrSynonymParser.java

示例6: getAnalyzer

public static Analyzer getAnalyzer(){
   return new Analyzer() {
      @Override
      public TokenStreamComponents createComponents(String fieldName, Reader reader) {
         Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
         return new TokenStreamComponents(tokenizer, tokenizer);
      }
   };
}
 
开发者ID:europeana,项目名称:search,代码行数:9,代码来源:FuzzyTermOnShortTermsTest.java

示例7: testWildcardInConstantScore

public void testWildcardInConstantScore() throws Exception {
  Directory dir = newDirectory();
  // use simpleanalyzer for more natural tokenization (else "test." is a token)
  final Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
  IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
  iwc.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

  FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
  offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
  Field body = new Field("body", "", offsetsType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test.");
  iw.addDocument(doc);
  body.setStringValue("Test a one sentence document.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  PostingsHighlighter highlighter = new PostingsHighlighter() {
    @Override
    protected Analyzer getIndexAnalyzer(String field) {
      return analyzer;
    }
  };
  ConstantScoreQuery query = new ConstantScoreQuery(new WildcardQuery(new Term("body", "te*")));
  TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits);
  String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a <b>test</b>.", snippets[0]);
  assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

  ir.close();
  dir.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:40,代码来源:TestMultiTermHighlighting.java

示例8: testStartPositions

public void testStartPositions() throws Exception {
  Directory dir = newDirectory();
  
  // mimic StopAnalyzer
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|a|of").toAutomaton());
  Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);
  
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, analyzer);
  Document doc = new Document();
  doc.add(newTextField("field", "the quick brown fox", Field.Store.NO));
  writer.addDocument(doc);
  Document doc2 = new Document();
  doc2.add(newTextField("field", "quick brown fox", Field.Store.NO));
  writer.addDocument(doc2);
  
  IndexReader reader = writer.getReader();
  IndexSearcher searcher = newSearcher(reader);
  
  // user queries on "starts-with quick"
  SpanQuery sfq = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 1);
  assertEquals(1, searcher.search(sfq, 10).totalHits);
  
  // user queries on "starts-with the quick"
  SpanQuery include = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 2);
  sfq = new SpanNotQuery(include, sfq);
  assertEquals(1, searcher.search(sfq, 10).totalHits);
  
  writer.close();
  reader.close();
  dir.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:31,代码来源:TestSpanFirstQuery.java

示例9: testSpanWildcard

public void testSpanWildcard() throws Exception {
  Directory dir = newDirectory();
  // use simpleanalyzer for more natural tokenization (else "test." is a token)
  final Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
  IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
  iwc.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
  
  FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
  offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
  Field body = new Field("body", "", offsetsType);
  Document doc = new Document();
  doc.add(body);
  
  body.setStringValue("This is a test.");
  iw.addDocument(doc);
  body.setStringValue("Test a one sentence document.");
  iw.addDocument(doc);
  
  IndexReader ir = iw.getReader();
  iw.close();
  
  IndexSearcher searcher = newSearcher(ir);
  PostingsHighlighter highlighter = new PostingsHighlighter() {
    @Override
    protected Analyzer getIndexAnalyzer(String field) {
      return analyzer;
    }
  };
  Query query = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
  TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits);
  String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a <b>test</b>.", snippets[0]);
  assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
  
  ir.close();
  dir.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:40,代码来源:TestMultiTermHighlighting.java

示例10: testInvalidAnalyzesToNothingInput

/** parse a syn file with bad syntax */
@Test(expected=ParseException.class)
public void testInvalidAnalyzesToNothingInput() throws Exception {
  String testFile = "1 => a"; 
  SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random(), MockTokenizer.SIMPLE, false));
  parser.parse(new StringReader(testFile));
}
 
开发者ID:europeana,项目名称:search,代码行数:7,代码来源:TestSolrSynonymParser.java

示例11: testCuriousGeorge

public void testCuriousGeorge() throws Exception {
  String text = "It’s the formula for success for preschoolers—Curious George and fire trucks! " + 
                "Curious George and the Firefighters is a story based on H. A. and Margret Rey’s " +
                "popular primate and painted in the original watercolor and charcoal style. " + 
                "Firefighters are a famously brave lot, but can they withstand a visit from one curious monkey?";
  Directory dir = newDirectory();
  Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
  FieldType positionsType = new FieldType(TextField.TYPE_STORED);
  positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
  Field body = new Field("body", text, positionsType);
  Document document = new Document();
  document.add(body);
  iw.addDocument(document);
  IndexReader ir = iw.getReader();
  iw.close();
  IndexSearcher searcher = newSearcher(ir);
  PhraseQuery query = new PhraseQuery();
  query.add(new Term("body", "curious"));
  query.add(new Term("body", "george"));
  TopDocs topDocs = searcher.search(query, 10);
  assertEquals(1, topDocs.totalHits);
  PostingsHighlighter highlighter = new PostingsHighlighter();
  String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
  assertEquals(1, snippets.length);
  assertFalse(snippets[0].contains("<b>Curious</b>Curious"));
  ir.close();
  dir.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:29,代码来源:TestPostingsHighlighter.java

示例12: testRandomHuge

/** simple random test like testRandom2, but for larger docs
 */
public void testRandomHuge() throws Exception {
  Random random = random();
  final int numIters = atLeast(3);
  for (int i = 0; i < numIters; i++) {
    b = new SynonymMap.Builder(random.nextBoolean());
    final int numEntries = atLeast(10);
    if (VERBOSE) {
      System.out.println("TEST: iter=" + i + " numEntries=" + numEntries);
    }
    for (int j = 0; j < numEntries; j++) {
      add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
    }
    final SynonymMap map = b.build();
    final boolean ignoreCase = random.nextBoolean();
    
    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
        return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
      }
    };

    checkRandomData(random, analyzer, 100, 1024);
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:28,代码来源:TestSynonymMapFilter.java

示例13: testBooleanMustNot

public void testBooleanMustNot() throws Exception {
  Directory dir = newDirectory();
  Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
  FieldType positionsType = new FieldType(TextField.TYPE_STORED);
  positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
  Field body = new Field("body", "This sentence has both terms.  This sentence has only terms.", positionsType);
  Document document = new Document();
  document.add(body);
  iw.addDocument(document);
  IndexReader ir = iw.getReader();
  iw.close();
  IndexSearcher searcher = newSearcher(ir);
  BooleanQuery query = new BooleanQuery();
  query.add(new TermQuery(new Term("body", "terms")), BooleanClause.Occur.SHOULD);
  BooleanQuery query2 = new BooleanQuery();
  query.add(query2, BooleanClause.Occur.SHOULD);
  query2.add(new TermQuery(new Term("body", "both")), BooleanClause.Occur.MUST_NOT);
  TopDocs topDocs = searcher.search(query, 10);
  assertEquals(1, topDocs.totalHits);
  PostingsHighlighter highlighter = new PostingsHighlighter(Integer.MAX_VALUE-1);
  String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
  assertEquals(1, snippets.length);
  assertFalse(snippets[0].contains("<b>both</b>"));
  ir.close();
  dir.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:27,代码来源:TestPostingsHighlighter.java

示例14: testRanking

/** 
 * indexes a bunch of gibberish, and then highlights top(n).
 * asserts that top(n) highlights is a subset of top(n+1) up to some max N
 */
// TODO: this only tests single-valued fields. we should also index multiple values per field!
public void testRanking() throws Exception {
  // number of documents: we will check each one
  final int numDocs = atLeast(100);
  // number of top-N snippets, we will check 1 .. N
  final int maxTopN = 5;
  // maximum number of elements to put in a sentence.
  final int maxSentenceLength = 10;
  // maximum number of sentences in a document
  final int maxNumSentences = 20;
  
  Directory dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
  Document document = new Document();
  Field id = new StringField("id", "", Field.Store.NO);
  FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
  offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
  Field body = new Field("body", "", offsetsType);
  document.add(id);
  document.add(body);
  
  for (int i = 0; i < numDocs; i++) {
    StringBuilder bodyText = new StringBuilder();
    int numSentences = TestUtil.nextInt(random(), 1, maxNumSentences);
    for (int j = 0; j < numSentences; j++) {
      bodyText.append(newSentence(random(), maxSentenceLength));
    }
    body.setStringValue(bodyText.toString());
    id.setStringValue(Integer.toString(i));
    iw.addDocument(document);
  }
  
  IndexReader ir = iw.getReader();
  IndexSearcher searcher = newSearcher(ir);
  for (int i = 0; i < numDocs; i++) {
    checkDocument(searcher, i, maxTopN);
  }
  iw.close();
  ir.close();
  dir.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:45,代码来源:TestPostingsHighlighterRanking.java

示例15: testOnePrefix

public void testOnePrefix() throws Exception {
  Directory dir = newDirectory();
  // use simpleanalyzer for more natural tokenization (else "test." is a token)
  final Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
  IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
  iwc.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
  
  FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
  offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
  Field body = new Field("body", "", offsetsType);
  Document doc = new Document();
  doc.add(body);
  
  body.setStringValue("This is a test.");
  iw.addDocument(doc);
  body.setStringValue("Test a one sentence document.");
  iw.addDocument(doc);
  
  IndexReader ir = iw.getReader();
  iw.close();
  
  IndexSearcher searcher = newSearcher(ir);
  PostingsHighlighter highlighter = new PostingsHighlighter() {
    @Override
    protected Analyzer getIndexAnalyzer(String field) {
      return analyzer;
    }
  };
  Query query = new PrefixQuery(new Term("body", "te"));
  TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits);
  String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a <b>test</b>.", snippets[0]);
  assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
  
  // wrong field
  BooleanQuery bq = new BooleanQuery();
  bq.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
  bq.add(new PrefixQuery(new Term("bogus", "te")), BooleanClause.Occur.SHOULD);
  topDocs = searcher.search(bq, null, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits);
  snippets = highlighter.highlight("body", bq, searcher, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a test.", snippets[0]);
  assertEquals("Test a one sentence document.", snippets[1]);
  
  ir.close();
  dir.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:51,代码来源:TestMultiTermHighlighting.java


注:本文中的org.apache.lucene.analysis.MockTokenizer.SIMPLE属性示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。