本文整理汇总了Java中org.apache.lucene.analysis.MockTokenizer.setEnableChecks方法的典型用法代码示例。如果您正苦于以下问题:Java MockTokenizer.setEnableChecks方法的具体用法?Java MockTokenizer.setEnableChecks怎么用?Java MockTokenizer.setEnableChecks使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.analysis.MockTokenizer
的用法示例。
在下文中一共展示了MockTokenizer.setEnableChecks方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testMaxPosition3WithSynomyms
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testMaxPosition3WithSynomyms() throws IOException {
for (final boolean consumeAll : new boolean[]{true, false}) {
MockTokenizer tokenizer = new MockTokenizer(new StringReader("one two three four five"), MockTokenizer.WHITESPACE, false);
// if we are consuming all tokens, we can use the checks, otherwise we can't
tokenizer.setEnableChecks(consumeAll);
SynonymMap.Builder builder = new SynonymMap.Builder(true);
builder.add(new CharsRef("one"), new CharsRef("first"), true);
builder.add(new CharsRef("one"), new CharsRef("alpha"), true);
builder.add(new CharsRef("one"), new CharsRef("beguine"), true);
CharsRefBuilder multiWordCharsRef = new CharsRefBuilder();
SynonymMap.Builder.join(new String[]{"and", "indubitably", "single", "only"}, multiWordCharsRef);
builder.add(new CharsRef("one"), multiWordCharsRef.get(), true);
SynonymMap.Builder.join(new String[]{"dopple", "ganger"}, multiWordCharsRef);
builder.add(new CharsRef("two"), multiWordCharsRef.get(), true);
SynonymMap synonymMap = builder.build();
TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true);
stream = new LimitTokenPositionFilter(stream, 3, consumeAll);
// "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3.
assertTokenStreamContents(stream,
new String[]{"one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger"},
new int[]{1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0});
}
}
示例2: testReset
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testReset() throws Exception {
CharArraySet dict = makeDictionary("Rind", "Fleisch", "Draht", "Schere", "Gesetz",
"Aufgabe", "Überwachung");
MockTokenizer wsTokenizer = new MockTokenizer(new StringReader("Rindfleischüberwachungsgesetz"), MockTokenizer.WHITESPACE, false);
wsTokenizer.setEnableChecks(false); // we will reset in a strange place
wsTokenizer.setReader(new StringReader("Rindfleischüberwachungsgesetz"));
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(
wsTokenizer, dict,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
CharTermAttribute termAtt = tf.getAttribute(CharTermAttribute.class);
tf.reset();
assertTrue(tf.incrementToken());
assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString());
assertTrue(tf.incrementToken());
assertEquals("Rind", termAtt.toString());
tf.end();
tf.close();
wsTokenizer.setReader(new StringReader("Rindfleischüberwachungsgesetz"));
tf.reset();
assertTrue(tf.incrementToken());
assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString());
}
示例3: testMaxPosition1WithShingles
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testMaxPosition1WithShingles() throws Exception {
for (final boolean consumeAll : new boolean[]{true, false}) {
Reader reader = new StringReader("one two three four five");
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
// if we are consuming all tokens, we can use the checks, otherwise we can't
tokenizer.setEnableChecks(consumeAll);
TokenStream stream = tokenizer;
stream = tokenFilterFactory("Shingle",
"minShingleSize", "2",
"maxShingleSize", "3",
"outputUnigrams", "true").create(stream);
stream = tokenFilterFactory("LimitTokenPosition",
LimitTokenPositionFilterFactory.MAX_TOKEN_POSITION_KEY, "1",
LimitTokenPositionFilterFactory.CONSUME_ALL_TOKENS_KEY, Boolean.toString(consumeAll)
).create(stream);
assertTokenStreamContents(stream, new String[]{"one", "one two", "one two three"});
}
}
示例4: createComponents
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
tokenizer.setEnableChecks(true);
TokenStream next;
if (numStopChars != 0) {
next = new TokenEater(preserveHoles, tokenizer, numStopChars);
} else {
next = tokenizer;
}
return new TokenStreamComponents(tokenizer, next);
}
示例5: createComponents
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(MockUTF16TermAttributeImpl.UTF16_TERM_ATTRIBUTE_FACTORY, reader, MockTokenizer.WHITESPACE, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
tokenizer.setEnableChecks(true);
TokenStream next;
if (numStopChars != 0) {
next = new TokenEater(preserveHoles, tokenizer, numStopChars);
} else {
next = tokenizer;
}
return new TokenStreamComponents(tokenizer, next);
}
示例6: testMissingPayload
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testMissingPayload() throws Exception {
Directory dir = newDirectory();
// MockAnalyzer minus maybePayload else it sometimes stuffs in an 8-byte payload!
Analyzer a = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true, 100);
tokenizer.setEnableChecks(true);
MockTokenFilter filt = new MockTokenFilter(tokenizer, MockTokenFilter.EMPTY_STOPSET);
return new TokenStreamComponents(tokenizer, filt);
}
};
IndexWriterConfig iwc = newIndexWriterConfig(a);
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(newTextField("id", "id", Field.Store.NO));
try {
w.addDocument(doc);
w.commit();
fail("didn't hit expected exception");
} catch (IllegalArgumentException iae) {
// expected
}
w.close();
dir.close();
}
示例7: testLongestOnly
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
/** simple test for longestOnly option */
public void testLongestOnly() throws IOException {
MockTokenizer tokenizer = new MockTokenizer(new StringReader("lucene is awesome"));
tokenizer.setEnableChecks(true);
HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary, true, true);
assertTokenStreamContents(filter, new String[]{"lucene", "is", "awesome"}, new int[] {1, 1, 1});
}
示例8: test
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void test() throws Exception {
for (final boolean consumeAll : new boolean[]{true, false}) {
MockTokenizer tokenizer = new MockTokenizer(new StringReader("A1 B2 C3 D4 E5 F6"), MockTokenizer.WHITESPACE, false);
tokenizer.setEnableChecks(consumeAll);
TokenStream stream = new LimitTokenCountFilter(tokenizer, 3, consumeAll);
assertTokenStreamContents(stream, new String[]{"A1", "B2", "C3"});
}
}
示例9: test
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void test() throws Exception {
for (final boolean consumeAll : new boolean[]{true, false}) {
Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
tokenizer.setEnableChecks(consumeAll);
TokenStream stream = tokenizer;
stream = tokenFilterFactory("LimitTokenCount",
LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY, "3",
LimitTokenCountFilterFactory.CONSUME_ALL_TOKENS_KEY, Boolean.toString(consumeAll)
).create(stream);
assertTokenStreamContents(stream, new String[]{"A1", "B2", "C3"});
}
}
示例10: testMaxPosition1
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testMaxPosition1() throws Exception {
for (final boolean consumeAll : new boolean[]{true, false}) {
Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
// if we are consuming all tokens, we can use the checks, otherwise we can't
tokenizer.setEnableChecks(consumeAll);
TokenStream stream = tokenizer;
stream = tokenFilterFactory("LimitTokenPosition",
LimitTokenPositionFilterFactory.MAX_TOKEN_POSITION_KEY, "1",
LimitTokenPositionFilterFactory.CONSUME_ALL_TOKENS_KEY, Boolean.toString(consumeAll)
).create(stream);
assertTokenStreamContents(stream, new String[]{"A1"});
}
}
示例11: testMaxPosition2
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testMaxPosition2() throws IOException {
for (final boolean consumeAll : new boolean[]{true, false}) {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
// if we are consuming all tokens, we can use the checks, otherwise we can't
tokenizer.setEnableChecks(consumeAll);
return new TokenStreamComponents(tokenizer, new LimitTokenPositionFilter(tokenizer, 2, consumeAll));
}
};
// don't use assertAnalyzesTo here, as the end offset is not the end of the string (unless consumeAll is true, in which case its correct)!
assertTokenStreamContents(a.tokenStream("dummy", "1 2 3 4 5"),
new String[]{"1", "2"}, new int[]{0, 3}, new int[]{1, 4}, consumeAll ? 16 : null);
assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")),
new String[]{"1", "2"}, new int[]{0, 2}, new int[]{1, 3}, consumeAll ? 9 : null);
// less than the limit, ensure we behave correctly
assertTokenStreamContents(a.tokenStream("dummy", "1 "),
new String[]{"1"}, new int[]{0}, new int[]{1}, consumeAll ? 3 : null);
// equal to limit
assertTokenStreamContents(a.tokenStream("dummy", "1 2 "),
new String[]{"1", "2"}, new int[]{0, 3}, new int[]{1, 4}, consumeAll ? 6 : null);
}
}
示例12: testExceptionJustBeforeFlush
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
public void testExceptionJustBeforeFlush() throws IOException {
Directory dir = newDirectory();
IndexWriter w = RandomIndexWriter.mockIndexWriter(dir,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(2),
new TestPoint1());
Document doc = new Document();
doc.add(newTextField("field", "a field", Field.Store.YES));
w.addDocument(doc);
Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer));
}
};
Document crashDoc = new Document();
crashDoc.add(newTextField("crash", "do it on token 4", Field.Store.YES));
try {
w.addDocument(crashDoc, analyzer);
fail("did not hit expected exception");
} catch (IOException ioe) {
// expected
}
w.addDocument(doc);
w.close();
dir.close();
}
示例13: create
import org.apache.lucene.analysis.MockTokenizer; //导入方法依赖的package包/类
@Override
public MockTokenizer create(AttributeFactory factory, Reader input) {
MockTokenizer t = new MockTokenizer(factory, input, pattern, false);
t.setEnableChecks(enableChecks);
return t;
}