本文整理汇总了Java中org.apache.lucene.analysis.MockTokenizer.KEYWORD属性的典型用法代码示例。如果您正苦于以下问题:Java MockTokenizer.KEYWORD属性的具体用法?Java MockTokenizer.KEYWORD怎么用?Java MockTokenizer.KEYWORD使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类org.apache.lucene.analysis.MockTokenizer
的用法示例。
在下文中一共展示了MockTokenizer.KEYWORD属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testRandomEdits
public void testRandomEdits() throws IOException {
List<Input> keys = new ArrayList<>();
int numTerms = atLeast(100);
for (int i = 0; i < numTerms; i++) {
keys.add(new Input("boo" + TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
}
keys.add(new Input("foo bar boo far", 12));
MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE);
suggester.build(new InputArrayIterator(keys));
int numIters = atLeast(10);
for (int i = 0; i < numIters; i++) {
String addRandomEdit = addRandomEdit("foo bar boo", FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX);
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence(addRandomEdit, random()), false, 2);
assertEquals(addRandomEdit, 1, results.size());
assertEquals("foo bar boo far", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
}
}
示例2: testNonLatinRandomEdits
public void testNonLatinRandomEdits() throws IOException {
List<Input> keys = new ArrayList<>();
int numTerms = atLeast(100);
for (int i = 0; i < numTerms; i++) {
keys.add(new Input("буу" + TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
}
keys.add(new Input("фуу бар буу фар", 12));
MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, true);
suggester.build(new InputArrayIterator(keys));
int numIters = atLeast(10);
for (int i = 0; i < numIters; i++) {
String addRandomEdit = addRandomEdit("фуу бар буу", 0);
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence(addRandomEdit, random()), false, 2);
assertEquals(addRandomEdit, 1, results.size());
assertEquals("фуу бар буу фар", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
}
}
示例3: MockTokenizerFactory
/** Creates a new MockTokenizerFactory */
public MockTokenizerFactory(Map<String,String> args) {
super(args);
String patternArg = get(args, "pattern", Arrays.asList("keyword", "simple", "whitespace"));
if ("keyword".equalsIgnoreCase(patternArg)) {
pattern = MockTokenizer.KEYWORD;
} else if ("simple".equalsIgnoreCase(patternArg)) {
pattern = MockTokenizer.SIMPLE;
} else {
pattern = MockTokenizer.WHITESPACE;
}
enableChecks = getBoolean(args, "enableChecks", true);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
示例4: testPrefix
public void testPrefix() throws IOException {
StringReader reader = new StringReader("test_and_of_for_the");
final MockTokenizer in = new MockTokenizer(MockTokenizer.KEYWORD, false);
in.setReader(reader);
TokenStream stream = new ShinglesStopFilter(in, stopwords, "_");
assertTokenStreamContents(stream, new String[]{"test"});
}
示例5: testStopAtSuffix
public void testStopAtSuffix() throws IOException {
StringReader reader = new StringReader("the_test_and_of_trend_for_the");
final MockTokenizer in = new MockTokenizer(MockTokenizer.KEYWORD, false);
in.setReader(reader);
TokenStream stream = new ShinglesStopFilter(in, stopwords, "_");
assertTokenStreamContents(stream, new String[]{"the_test_and_of_trend"});
}
示例6: testKeepIgnoreCase2
public void testKeepIgnoreCase2() throws Exception {
Reader reader = new StringReader("kiTTEN");
TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
stream = tokenFilterFactory("Capitalization",
"keep", "kitten",
"keepIgnoreCase", "true",
"onlyFirstWord", "true",
"forceFirstLetter", "false").create(stream);
assertTokenStreamContents(stream, new String[] { "kiTTEN" });
}
示例7: testEmpty
public void testEmpty() throws Exception {
FuzzySuggester suggester = new FuzzySuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
suggester.build(new InputArrayIterator(new Input[0]));
List<LookupResult> result = suggester.lookup("a", false, 20);
assertTrue(result.isEmpty());
}
示例8: testIterationMarksWithKeywordTokenizer
public void testIterationMarksWithKeywordTokenizer() throws IOException {
final String text = "時々馬鹿々々しいところゞゝゝミスヾ";
JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(new HashMap<String,String>());
CharFilter filter = filterFactory.create(new StringReader(text));
TokenStream tokenStream = new MockTokenizer(filter, MockTokenizer.KEYWORD, false);
assertTokenStreamContents(tokenStream, new String[]{"時時馬鹿馬鹿しいところどころミスズ"});
}
示例9: testCustomAttribute
public void testCustomAttribute() throws IOException {
TokenStream stream = new MockTokenizer(new StringReader("D'Angelo"), MockTokenizer.KEYWORD, false);
stream = new PatternKeywordMarkerFilter(stream, Pattern.compile(".*"));
stream = new BeiderMorseFilter(stream, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true));
KeywordAttribute keyAtt = stream.addAttribute(KeywordAttribute.class);
stream.reset();
int i = 0;
while(stream.incrementToken()) {
assertTrue(keyAtt.isKeyword());
i++;
}
assertEquals(12, i);
stream.end();
stream.close();
}
示例10: testEscapedStuff
/** parse a syn file with some escaped syntax chars */
public void testEscapedStuff() throws Exception {
String testFile =
"a\\=>a => b\\=>b\n" +
"a\\,a => b\\,b";
SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
parser.parse(new StringReader(testFile));
final SynonymMap map = parser.build();
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
}
};
assertAnalyzesTo(analyzer, "ball",
new String[] { "ball" },
new int[] { 1 });
assertAnalyzesTo(analyzer, "a=>a",
new String[] { "b=>b" },
new int[] { 1 });
assertAnalyzesTo(analyzer, "a,a",
new String[] { "b,b" },
new int[] { 1 });
}
示例11: testKeepIgnoreCase
public void testKeepIgnoreCase() throws Exception {
Reader reader = new StringReader("kiTTEN");
TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
stream = tokenFilterFactory("Capitalization",
"keep", "kitten",
"keepIgnoreCase", "true",
"onlyFirstWord", "true",
"forceFirstLetter", "true").create(stream);
assertTokenStreamContents(stream, new String[] { "KiTTEN" });
}
示例12: test2
public void test2() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
addDoc("LANGE", writer);
addDoc("LUETH", writer);
addDoc("PIRSING", writer);
addDoc("RIEGEL", writer);
addDoc("TRZECZIAK", writer);
addDoc("WALKER", writer);
addDoc("WBR", writer);
addDoc("WE", writer);
addDoc("WEB", writer);
addDoc("WEBE", writer);
addDoc("WEBER", writer);
addDoc("WEBERE", writer);
addDoc("WEBREE", writer);
addDoc("WEBEREI", writer);
addDoc("WBRE", writer);
addDoc("WITTKOPF", writer);
addDoc("WOJNAROWSKI", writer);
addDoc("WRICKE", writer);
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
writer.close();
FuzzyQuery query = new FuzzyQuery(new Term("field", "WEBER"), 2, 1);
//query.setRewriteMethod(FuzzyQuery.SCORING_BOOLEAN_QUERY_REWRITE);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals(8, hits.length);
reader.close();
directory.close();
}
示例13: testCapitalization7
public void testCapitalization7() throws Exception {
Reader reader = new StringReader("Hello thEre my Name is Ryan");
TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
stream = tokenFilterFactory("Capitalization",
"keep", "and the it BIG",
"onlyFirstWord", "true",
"forceFirstLetter", "true").create(stream);
assertTokenStreamContents(stream, new String[] { "Hello there my name is ryan" });
}
示例14: testCapitalization13
public void testCapitalization13() throws Exception {
Reader reader = new StringReader("the The the");
TokenStream stream = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
stream = tokenFilterFactory("Capitalization",
"keep", "and the it BIG",
"onlyFirstWord", "false",
"minWordLength", "3",
"okPrefix", "McK",
"forceFirstLetter", "true").create(stream);
assertTokenStreamContents(stream, new String[] { "The The the" });
}
示例15: testKeyword
/** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */
public void testKeyword() throws Exception {
Iterable<Input> keys = shuffle(
new Input("foo", 50),
new Input("bar", 10),
new Input("barbar", 10),
new Input("barbar", 12),
new Input("barbara", 6),
new Input("bar", 5),
new Input("barbara", 1)
);
AnalyzingSuggester suggester = new AnalyzingSuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
suggester.build(new InputArrayIterator(keys));
// top N of 2, but only foo is available
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("f", random()), false, 2);
assertEquals(1, results.size());
assertEquals("foo", results.get(0).key.toString());
assertEquals(50, results.get(0).value, 0.01F);
// top N of 1 for 'bar': we return this even though
// barbar is higher because exactFirst is enabled:
results = suggester.lookup(TestUtil.stringToCharSequence("bar", random()), false, 1);
assertEquals(1, results.size());
assertEquals("bar", results.get(0).key.toString());
assertEquals(10, results.get(0).value, 0.01F);
// top N Of 2 for 'b'
results = suggester.lookup(TestUtil.stringToCharSequence("b", random()), false, 2);
assertEquals(2, results.size());
assertEquals("barbar", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
assertEquals("bar", results.get(1).key.toString());
assertEquals(10, results.get(1).value, 0.01F);
// top N of 3 for 'ba'
results = suggester.lookup(TestUtil.stringToCharSequence("ba", random()), false, 3);
assertEquals(3, results.size());
assertEquals("barbar", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
assertEquals("bar", results.get(1).key.toString());
assertEquals(10, results.get(1).value, 0.01F);
assertEquals("barbara", results.get(2).key.toString());
assertEquals(6, results.get(2).value, 0.01F);
}