当前位置: 首页>>代码示例>>Java>>正文


Java MockTokenizer.KEYWORD属性代码示例

本文整理汇总了Java中org.apache.lucene.analysis.MockTokenizer.KEYWORD属性的典型用法代码示例。如果您正苦于以下问题:Java MockTokenizer.KEYWORD属性的具体用法?Java MockTokenizer.KEYWORD怎么用?Java MockTokenizer.KEYWORD使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在org.apache.lucene.analysis.MockTokenizer的用法示例。


在下文中一共展示了MockTokenizer.KEYWORD属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testRandomEdits

public void testRandomEdits() throws IOException {
  List<Input> keys = new ArrayList<>();
  int numTerms = atLeast(100);
  for (int i = 0; i < numTerms; i++) {
    keys.add(new Input("boo" + TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
  }
  keys.add(new Input("foo bar boo far", 12));
  MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
  FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
                                                0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE);
  suggester.build(new InputArrayIterator(keys));
  int numIters = atLeast(10);
  for (int i = 0; i < numIters; i++) {
    String addRandomEdit = addRandomEdit("foo bar boo", FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX);
    List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence(addRandomEdit, random()), false, 2);
    assertEquals(addRandomEdit, 1, results.size());
    assertEquals("foo bar boo far", results.get(0).key.toString());
    assertEquals(12, results.get(0).value, 0.01F);  
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:20,代码来源:FuzzySuggesterTest.java

示例2: testNonLatinRandomEdits

public void testNonLatinRandomEdits() throws IOException {
  List<Input> keys = new ArrayList<>();
  int numTerms = atLeast(100);
  for (int i = 0; i < numTerms; i++) {
    keys.add(new Input("буу" + TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
  }
  keys.add(new Input("фуу бар буу фар", 12));
  MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
  FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
      0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, true);
  suggester.build(new InputArrayIterator(keys));
  int numIters = atLeast(10);
  for (int i = 0; i < numIters; i++) {
    String addRandomEdit = addRandomEdit("фуу бар буу", 0);
    List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence(addRandomEdit, random()), false, 2);
    assertEquals(addRandomEdit, 1, results.size());
    assertEquals("фуу бар буу фар", results.get(0).key.toString());
    assertEquals(12, results.get(0).value, 0.01F);
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:20,代码来源:FuzzySuggesterTest.java

示例3: MockTokenizerFactory

/** Creates a new MockTokenizerFactory */
public MockTokenizerFactory(Map<String,String> args) {
  super(args);
  String patternArg = get(args, "pattern", Arrays.asList("keyword", "simple", "whitespace"));
  if ("keyword".equalsIgnoreCase(patternArg)) {
    pattern = MockTokenizer.KEYWORD;
  } else if ("simple".equalsIgnoreCase(patternArg)) {
    pattern = MockTokenizer.SIMPLE;
  } else {
    pattern = MockTokenizer.WHITESPACE;
  }
  
  enableChecks = getBoolean(args, "enableChecks", true);
  if (!args.isEmpty()) {
    throw new IllegalArgumentException("Unknown parameters: " + args);
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:17,代码来源:MockTokenizerFactory.java

示例4: testPrefix

public void testPrefix() throws IOException {
    StringReader reader = new StringReader("test_and_of_for_the");

    final MockTokenizer in = new MockTokenizer(MockTokenizer.KEYWORD, false);
    in.setReader(reader);

    TokenStream stream = new ShinglesStopFilter(in, stopwords, "_");
    assertTokenStreamContents(stream, new String[]{"test"});
}
 
开发者ID:spyk,项目名称:shingle-stop-filter,代码行数:9,代码来源:ShingleStopFilterTest.java

示例5: testStopAtSuffix

public void testStopAtSuffix() throws IOException {
    StringReader reader = new StringReader("the_test_and_of_trend_for_the");

    final MockTokenizer in = new MockTokenizer(MockTokenizer.KEYWORD, false);
    in.setReader(reader);

    TokenStream stream = new ShinglesStopFilter(in, stopwords, "_");
    assertTokenStreamContents(stream, new String[]{"the_test_and_of_trend"});
}
 
开发者ID:spyk,项目名称:shingle-stop-filter,代码行数:9,代码来源:ShingleStopFilterTest.java

示例6: testKeepIgnoreCase2

public void testKeepIgnoreCase2() throws Exception {
  Reader reader = new StringReader("kiTTEN");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
  stream = tokenFilterFactory("Capitalization",
      "keep", "kitten",
      "keepIgnoreCase", "true",
      "onlyFirstWord", "true",
      "forceFirstLetter", "false").create(stream);

  assertTokenStreamContents(stream, new String[] { "kiTTEN" });
}
 
开发者ID:europeana,项目名称:search,代码行数:11,代码来源:TestCapitalizationFilterFactory.java

示例7: testEmpty

public void testEmpty() throws Exception {
  FuzzySuggester suggester = new FuzzySuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
  suggester.build(new InputArrayIterator(new Input[0]));

  List<LookupResult> result = suggester.lookup("a", false, 20);
  assertTrue(result.isEmpty());
}
 
开发者ID:europeana,项目名称:search,代码行数:7,代码来源:FuzzySuggesterTest.java

示例8: testIterationMarksWithKeywordTokenizer

public void testIterationMarksWithKeywordTokenizer() throws IOException {
  final String text = "時々馬鹿々々しいところゞゝゝミスヾ";
  JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(new HashMap<String,String>());
  CharFilter filter = filterFactory.create(new StringReader(text));
  TokenStream tokenStream = new MockTokenizer(filter, MockTokenizer.KEYWORD, false);
  assertTokenStreamContents(tokenStream, new String[]{"時時馬鹿馬鹿しいところどころミスズ"});
}
 
开发者ID:europeana,项目名称:search,代码行数:7,代码来源:TestJapaneseIterationMarkCharFilterFactory.java

示例9: testCustomAttribute

public void testCustomAttribute() throws IOException {
  TokenStream stream = new MockTokenizer(new StringReader("D'Angelo"), MockTokenizer.KEYWORD, false);
  stream = new PatternKeywordMarkerFilter(stream, Pattern.compile(".*"));
  stream = new BeiderMorseFilter(stream, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true));
  KeywordAttribute keyAtt = stream.addAttribute(KeywordAttribute.class);
  stream.reset();
  int i = 0;
  while(stream.incrementToken()) {
    assertTrue(keyAtt.isKeyword());
    i++;
  }
  assertEquals(12, i);
  stream.end();
  stream.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:15,代码来源:TestBeiderMorseFilter.java

示例10: testEscapedStuff

/** parse a syn file with some escaped syntax chars */
public void testEscapedStuff() throws Exception {
  String testFile = 
    "a\\=>a => b\\=>b\n" +
    "a\\,a => b\\,b";
  SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
  parser.parse(new StringReader(testFile));
  final SynonymMap map = parser.build();
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
      return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
    }
  };
  
  assertAnalyzesTo(analyzer, "ball", 
      new String[] { "ball" },
      new int[] { 1 });
  
  assertAnalyzesTo(analyzer, "a=>a",
      new String[] { "b=>b" },
      new int[] { 1 });
  
  assertAnalyzesTo(analyzer, "a,a",
      new String[] { "b,b" },
      new int[] { 1 });
}
 
开发者ID:europeana,项目名称:search,代码行数:28,代码来源:TestSolrSynonymParser.java

示例11: testKeepIgnoreCase

public void testKeepIgnoreCase() throws Exception {
  Reader reader = new StringReader("kiTTEN");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
  stream = tokenFilterFactory("Capitalization",
      "keep", "kitten",
      "keepIgnoreCase", "true",
      "onlyFirstWord", "true",
      "forceFirstLetter", "true").create(stream);

  assertTokenStreamContents(stream, new String[] { "KiTTEN" });
}
 
开发者ID:europeana,项目名称:search,代码行数:11,代码来源:TestCapitalizationFilterFactory.java

示例12: test2

public void test2() throws Exception {
  Directory directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
  addDoc("LANGE", writer);
  addDoc("LUETH", writer);
  addDoc("PIRSING", writer);
  addDoc("RIEGEL", writer);
  addDoc("TRZECZIAK", writer);
  addDoc("WALKER", writer);
  addDoc("WBR", writer);
  addDoc("WE", writer);
  addDoc("WEB", writer);
  addDoc("WEBE", writer);
  addDoc("WEBER", writer);
  addDoc("WEBERE", writer);
  addDoc("WEBREE", writer);
  addDoc("WEBEREI", writer);
  addDoc("WBRE", writer);
  addDoc("WITTKOPF", writer);
  addDoc("WOJNAROWSKI", writer);
  addDoc("WRICKE", writer);

  IndexReader reader = writer.getReader();
  IndexSearcher searcher = newSearcher(reader);
  writer.close();

  FuzzyQuery query = new FuzzyQuery(new Term("field", "WEBER"), 2, 1);
  //query.setRewriteMethod(FuzzyQuery.SCORING_BOOLEAN_QUERY_REWRITE);
  ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
  assertEquals(8, hits.length);

  reader.close();
  directory.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:34,代码来源:TestFuzzyQuery.java

示例13: testCapitalization7

public void testCapitalization7() throws Exception {
  Reader reader = new StringReader("Hello thEre my Name is Ryan");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
  stream = tokenFilterFactory("Capitalization",
      "keep", "and the it BIG",
      "onlyFirstWord", "true",
      "forceFirstLetter", "true").create(stream);
  assertTokenStreamContents(stream, new String[] { "Hello there my name is ryan" });
}
 
开发者ID:europeana,项目名称:search,代码行数:9,代码来源:TestCapitalizationFilterFactory.java

示例14: testCapitalization13

public void testCapitalization13() throws Exception {
  Reader reader = new StringReader("the The the");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
  stream = tokenFilterFactory("Capitalization",
      "keep", "and the it BIG",
      "onlyFirstWord", "false",
      "minWordLength", "3",
      "okPrefix", "McK",
      "forceFirstLetter", "true").create(stream);
  assertTokenStreamContents(stream, new String[] { "The The the" });
}
 
开发者ID:europeana,项目名称:search,代码行数:11,代码来源:TestCapitalizationFilterFactory.java

示例15: testKeyword

/** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */
public void testKeyword() throws Exception {
  Iterable<Input> keys = shuffle(
      new Input("foo", 50),
      new Input("bar", 10),
      new Input("barbar", 10),
      new Input("barbar", 12),
      new Input("barbara", 6),
      new Input("bar", 5),
      new Input("barbara", 1)
  );

  AnalyzingSuggester suggester = new AnalyzingSuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
  suggester.build(new InputArrayIterator(keys));
  
  // top N of 2, but only foo is available
  List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("f", random()), false, 2);
  assertEquals(1, results.size());
  assertEquals("foo", results.get(0).key.toString());
  assertEquals(50, results.get(0).value, 0.01F);
  
  // top N of 1 for 'bar': we return this even though
  // barbar is higher because exactFirst is enabled:
  results = suggester.lookup(TestUtil.stringToCharSequence("bar", random()), false, 1);
  assertEquals(1, results.size());
  assertEquals("bar", results.get(0).key.toString());
  assertEquals(10, results.get(0).value, 0.01F);
  
  // top N Of 2 for 'b'
  results = suggester.lookup(TestUtil.stringToCharSequence("b", random()), false, 2);
  assertEquals(2, results.size());
  assertEquals("barbar", results.get(0).key.toString());
  assertEquals(12, results.get(0).value, 0.01F);
  assertEquals("bar", results.get(1).key.toString());
  assertEquals(10, results.get(1).value, 0.01F);
  
  // top N of 3 for 'ba'
  results = suggester.lookup(TestUtil.stringToCharSequence("ba", random()), false, 3);
  assertEquals(3, results.size());
  assertEquals("barbar", results.get(0).key.toString());
  assertEquals(12, results.get(0).value, 0.01F);
  assertEquals("bar", results.get(1).key.toString());
  assertEquals(10, results.get(1).value, 0.01F);
  assertEquals("barbara", results.get(2).key.toString());
  assertEquals(6, results.get(2).value, 0.01F);
}
 
开发者ID:europeana,项目名称:search,代码行数:46,代码来源:AnalyzingSuggesterTest.java


注:本文中的org.apache.lucene.analysis.MockTokenizer.KEYWORD属性示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。