本文整理汇总了Java中org.apache.lucene.analysis.MockTokenizer.SIMPLE属性的典型用法代码示例。如果您正苦于以下问题:Java MockTokenizer.SIMPLE属性的具体用法?Java MockTokenizer.SIMPLE怎么用?Java MockTokenizer.SIMPLE使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类org.apache.lucene.analysis.MockTokenizer
的用法示例。
在下文中一共展示了MockTokenizer.SIMPLE属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testTransposition2
public void testTransposition2() throws Exception {
DirectSpellChecker spellChecker = new DirectSpellChecker();
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
for (int i = 0; i < 20; i++) {
Document doc = new Document();
doc.add(newTextField("numbers", English.intToEnglish(i), Field.Store.NO));
writer.addDocument(doc);
}
IndexReader ir = writer.getReader();
SuggestWord[] similar = spellChecker.suggestSimilar(new Term(
"numbers", "seevntene"), 2, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals(1, similar.length);
assertEquals("seventeen", similar[0].string);
ir.close();
writer.close();
dir.close();
}
示例2: testRandom2
/** simple random test, doesn't verify correctness.
* does verify it doesnt throw exceptions, or that the stream doesn't misbehave
*/
public void testRandom2() throws Exception {
final int numIters = atLeast(3);
for (int i = 0; i < numIters; i++) {
b = new SynonymMap.Builder(random().nextBoolean());
final int numEntries = atLeast(10);
for (int j = 0; j < numEntries; j++) {
add(randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
}
final SynonymMap map = b.build();
final boolean ignoreCase = random().nextBoolean();
final Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
}
};
checkRandomData(random(), analyzer, 100);
}
}
示例3: testTransposition
public void testTransposition() throws Exception {
DirectSpellChecker spellChecker = new DirectSpellChecker();
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
for (int i = 0; i < 20; i++) {
Document doc = new Document();
doc.add(newTextField("numbers", English.intToEnglish(i), Field.Store.NO));
writer.addDocument(doc);
}
IndexReader ir = writer.getReader();
SuggestWord[] similar = spellChecker.suggestSimilar(new Term(
"numbers", "fvie"), 1, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals(1, similar.length);
assertEquals("five", similar[0].string);
ir.close();
writer.close();
dir.close();
}
示例4: testBogusField
public void testBogusField() throws Exception {
DirectSpellChecker spellChecker = new DirectSpellChecker();
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
for (int i = 0; i < 20; i++) {
Document doc = new Document();
doc.add(newTextField("numbers", English.intToEnglish(i), Field.Store.NO));
writer.addDocument(doc);
}
IndexReader ir = writer.getReader();
SuggestWord[] similar = spellChecker.suggestSimilar(new Term(
"bogusFieldBogusField", "fvie"), 2, ir,
SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
assertEquals(0, similar.length);
ir.close();
writer.close();
dir.close();
}
示例5: testInvalidAnalyzesToNothingOutput
/** parse a syn file with bad syntax */
@Test(expected=ParseException.class)
public void testInvalidAnalyzesToNothingOutput() throws Exception {
String testFile = "a => 1";
SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random(), MockTokenizer.SIMPLE, false));
parser.parse(new StringReader(testFile));
}
示例6: getAnalyzer
public static Analyzer getAnalyzer(){
return new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer, tokenizer);
}
};
}
示例7: testWildcardInConstantScore
public void testWildcardInConstantScore() throws Exception {
Directory dir = newDirectory();
// use simpleanalyzer for more natural tokenization (else "test." is a token)
final Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test.");
iw.addDocument(doc);
body.setStringValue("Test a one sentence document.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter() {
@Override
protected Analyzer getIndexAnalyzer(String field) {
return analyzer;
}
};
ConstantScoreQuery query = new ConstantScoreQuery(new WildcardQuery(new Term("body", "te*")));
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
ir.close();
dir.close();
}
示例8: testStartPositions
public void testStartPositions() throws Exception {
Directory dir = newDirectory();
// mimic StopAnalyzer
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|a|of").toAutomaton());
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, analyzer);
Document doc = new Document();
doc.add(newTextField("field", "the quick brown fox", Field.Store.NO));
writer.addDocument(doc);
Document doc2 = new Document();
doc2.add(newTextField("field", "quick brown fox", Field.Store.NO));
writer.addDocument(doc2);
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
// user queries on "starts-with quick"
SpanQuery sfq = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 1);
assertEquals(1, searcher.search(sfq, 10).totalHits);
// user queries on "starts-with the quick"
SpanQuery include = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 2);
sfq = new SpanNotQuery(include, sfq);
assertEquals(1, searcher.search(sfq, 10).totalHits);
writer.close();
reader.close();
dir.close();
}
示例9: testSpanWildcard
public void testSpanWildcard() throws Exception {
Directory dir = newDirectory();
// use simpleanalyzer for more natural tokenization (else "test." is a token)
final Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test.");
iw.addDocument(doc);
body.setStringValue("Test a one sentence document.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter() {
@Override
protected Analyzer getIndexAnalyzer(String field) {
return analyzer;
}
};
Query query = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
ir.close();
dir.close();
}
示例10: testInvalidAnalyzesToNothingInput
/** parse a syn file with bad syntax */
@Test(expected=ParseException.class)
public void testInvalidAnalyzesToNothingInput() throws Exception {
String testFile = "1 => a";
SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random(), MockTokenizer.SIMPLE, false));
parser.parse(new StringReader(testFile));
}
示例11: testCuriousGeorge
public void testCuriousGeorge() throws Exception {
String text = "It’s the formula for success for preschoolers—Curious George and fire trucks! " +
"Curious George and the Firefighters is a story based on H. A. and Margret Rey’s " +
"popular primate and painted in the original watercolor and charcoal style. " +
"Firefighters are a famously brave lot, but can they withstand a visit from one curious monkey?";
Directory dir = newDirectory();
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
FieldType positionsType = new FieldType(TextField.TYPE_STORED);
positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", text, positionsType);
Document document = new Document();
document.add(body);
iw.addDocument(document);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
PhraseQuery query = new PhraseQuery();
query.add(new Term("body", "curious"));
query.add(new Term("body", "george"));
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits);
PostingsHighlighter highlighter = new PostingsHighlighter();
String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
assertEquals(1, snippets.length);
assertFalse(snippets[0].contains("<b>Curious</b>Curious"));
ir.close();
dir.close();
}
示例12: testRandomHuge
/** simple random test like testRandom2, but for larger docs
*/
public void testRandomHuge() throws Exception {
Random random = random();
final int numIters = atLeast(3);
for (int i = 0; i < numIters; i++) {
b = new SynonymMap.Builder(random.nextBoolean());
final int numEntries = atLeast(10);
if (VERBOSE) {
System.out.println("TEST: iter=" + i + " numEntries=" + numEntries);
}
for (int j = 0; j < numEntries; j++) {
add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
}
final SynonymMap map = b.build();
final boolean ignoreCase = random.nextBoolean();
final Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
}
};
checkRandomData(random, analyzer, 100, 1024);
}
}
示例13: testBooleanMustNot
public void testBooleanMustNot() throws Exception {
Directory dir = newDirectory();
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
FieldType positionsType = new FieldType(TextField.TYPE_STORED);
positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "This sentence has both terms. This sentence has only terms.", positionsType);
Document document = new Document();
document.add(body);
iw.addDocument(document);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
BooleanQuery query = new BooleanQuery();
query.add(new TermQuery(new Term("body", "terms")), BooleanClause.Occur.SHOULD);
BooleanQuery query2 = new BooleanQuery();
query.add(query2, BooleanClause.Occur.SHOULD);
query2.add(new TermQuery(new Term("body", "both")), BooleanClause.Occur.MUST_NOT);
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits);
PostingsHighlighter highlighter = new PostingsHighlighter(Integer.MAX_VALUE-1);
String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
assertEquals(1, snippets.length);
assertFalse(snippets[0].contains("<b>both</b>"));
ir.close();
dir.close();
}
示例14: testRanking
/**
* indexes a bunch of gibberish, and then highlights top(n).
* asserts that top(n) highlights is a subset of top(n+1) up to some max N
*/
// TODO: this only tests single-valued fields. we should also index multiple values per field!
public void testRanking() throws Exception {
// number of documents: we will check each one
final int numDocs = atLeast(100);
// number of top-N snippets, we will check 1 .. N
final int maxTopN = 5;
// maximum number of elements to put in a sentence.
final int maxSentenceLength = 10;
// maximum number of sentences in a document
final int maxNumSentences = 20;
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
Document document = new Document();
Field id = new StringField("id", "", Field.Store.NO);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
document.add(id);
document.add(body);
for (int i = 0; i < numDocs; i++) {
StringBuilder bodyText = new StringBuilder();
int numSentences = TestUtil.nextInt(random(), 1, maxNumSentences);
for (int j = 0; j < numSentences; j++) {
bodyText.append(newSentence(random(), maxSentenceLength));
}
body.setStringValue(bodyText.toString());
id.setStringValue(Integer.toString(i));
iw.addDocument(document);
}
IndexReader ir = iw.getReader();
IndexSearcher searcher = newSearcher(ir);
for (int i = 0; i < numDocs; i++) {
checkDocument(searcher, i, maxTopN);
}
iw.close();
ir.close();
dir.close();
}
示例15: testOnePrefix
public void testOnePrefix() throws Exception {
Directory dir = newDirectory();
// use simpleanalyzer for more natural tokenization (else "test." is a token)
final Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test.");
iw.addDocument(doc);
body.setStringValue("Test a one sentence document.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter() {
@Override
protected Analyzer getIndexAnalyzer(String field) {
return analyzer;
}
};
Query query = new PrefixQuery(new Term("body", "te"));
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
// wrong field
BooleanQuery bq = new BooleanQuery();
bq.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
bq.add(new PrefixQuery(new Term("bogus", "te")), BooleanClause.Occur.SHOULD);
topDocs = searcher.search(bq, null, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
snippets = highlighter.highlight("body", bq, searcher, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a test.", snippets[0]);
assertEquals("Test a one sentence document.", snippets[1]);
ir.close();
dir.close();
}