本文整理汇总了Java中org.apache.lucene.analysis.MockTokenizer类的典型用法代码示例。如果您正苦于以下问题:Java MockTokenizer类的具体用法?Java MockTokenizer怎么用?Java MockTokenizer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
MockTokenizer类属于org.apache.lucene.analysis包,在下文中一共展示了MockTokenizer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testSimple
import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
public void testSimple() throws IOException {
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(t, new UniqueTokenFilter(t));
}
};
TokenStream test = analyzer.tokenStream("test", "this test with test");
test.reset();
CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class);
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("this"));
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("test"));
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("with"));
assertThat(test.incrementToken(), equalTo(false));
}
示例2: testBackwardsCompatibilityEdgeNgramTokenFilter
import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
public void testBackwardsCompatibilityEdgeNgramTokenFilter() throws Exception {
int iters = scaledRandomIntBetween(20, 100);
for (int i = 0; i < iters; i++) {
final Index index = new Index("test", "_na_");
final String name = "ngr";
Version v = randomVersion(random());
Builder builder = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3);
boolean reverse = random().nextBoolean();
if (reverse) {
builder.put("side", "back");
}
Settings settings = builder.build();
Settings indexSettings = newAnalysisSettingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, v.id).build();
Tokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("foo bar"));
TokenStream edgeNGramTokenFilter = new EdgeNGramTokenFilterFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create(tokenizer);
if (reverse) {
assertThat(edgeNGramTokenFilter, instanceOf(ReverseStringFilter.class));
} else {
assertThat(edgeNGramTokenFilter, instanceOf(EdgeNGramTokenFilter.class));
}
}
}
示例3: testCountPositions
import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
public void testCountPositions() throws IOException {
// We're looking to make sure that we:
Token t1 = new Token(); // Don't count tokens without an increment
t1.setPositionIncrement(0);
Token t2 = new Token();
t2.setPositionIncrement(1); // Count normal tokens with one increment
Token t3 = new Token();
t2.setPositionIncrement(2); // Count funny tokens with more than one increment
int finalTokenIncrement = 4; // Count the final token increment on the rare token streams that have them
Token[] tokens = new Token[] {t1, t2, t3};
Collections.shuffle(Arrays.asList(tokens), random());
final TokenStream tokenStream = new CannedTokenStream(finalTokenIncrement, 0, tokens);
// TODO: we have no CannedAnalyzer?
Analyzer analyzer = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new MockTokenizer(), tokenStream);
}
};
assertThat(TokenCountFieldMapper.countPositions(analyzer, "", ""), equalTo(7));
}
示例4: testRecursion3
import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
public void testRecursion3() throws Exception {
b = new SynonymMap.Builder(true);
final boolean keepOrig = true;
add("zoo zoo", "zoo", keepOrig);
final SynonymMap map = b.build();
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true));
}
};
assertAnalyzesTo(a, "zoo zoo $ zoo",
new String[] { "zoo", "zoo", "zoo", "$", "zoo" },
new int[] { 1, 0, 1, 1, 1 });
}
示例5: testInvalidArguments
import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
/**
* Test that invalid arguments result in exception
*/
public void testInvalidArguments() throws Exception {
for (final String arg : new String[]{"minWordLength", "maxTokenLength", "maxWordCount"}) {
try {
Reader reader = new StringReader("foo foobar super-duper-trooper");
TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
tokenFilterFactory("Capitalization",
"keep", "and the it BIG",
"onlyFirstWord", "false",
arg, "-3",
"okPrefix", "McK",
"forceFirstLetter", "true").create(stream);
fail();
} catch (IllegalArgumentException expected) {
assertTrue(expected.getMessage().contains(arg + " must be greater than or equal to zero")
|| expected.getMessage().contains(arg + " must be greater than zero"));
}
}
}
示例6: testRandomString
import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
public void testRandomString() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader,
MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer,
new PatternCaptureGroupTokenFilter(tokenizer, false,
Pattern.compile("((..)(..))")));
}
};
checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
}
示例7: setUp
import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
/**
* Initializes the tests by adding 4 identical documents to the index.
*/
@Override
public void setUp() throws Exception {
super.setUp();
// create test index
mDirectory = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random(), mDirectory,
newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET))
.setMergePolicy(newLogMergePolicy()).setSimilarity(new DefaultSimilarity()));
addDocument(writer, "1", "I think it should work.");
addDocument(writer, "2", "I think it should work.");
addDocument(writer, "3", "I think it should work.");
addDocument(writer, "4", "I think it should work.");
reader = writer.getReader();
writer.close();
searcher = newSearcher(reader);
searcher.setSimilarity(new DefaultSimilarity());
}
示例8: testHyphenationCompoundWordsDELongestMatch
import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
public void testHyphenationCompoundWordsDELongestMatch() throws Exception {
CharArraySet dict = makeDictionary("basketball", "basket", "ball", "kurv");
InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
.getHyphenationTree(is);
// the word basket will not be added due to the longest match option
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
hyphenator, dict,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
assertTokenStreamContents(tf,
new String[] { "basketballkurv", "basketball", "ball", "kurv" },
new int[] { 1, 0, 0, 0 }
);
}
示例9: test
import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
public void test() throws IOException {
PrefixAwareTokenFilter ts;
ts = new PrefixAwareTokenFilter(
new SingleTokenTokenStream(createToken("a", 0, 1)),
new SingleTokenTokenStream(createToken("b", 0, 1)));
assertTokenStreamContents(ts,
new String[] { "a", "b" },
new int[] { 0, 1 },
new int[] { 1, 2 });
// prefix and suffix using 2x prefix
ts = new PrefixAwareTokenFilter(new SingleTokenTokenStream(createToken("^", 0, 0)),
new MockTokenizer(new StringReader("hello world"), MockTokenizer.WHITESPACE, false));
ts = new PrefixAwareTokenFilter(ts, new SingleTokenTokenStream(createToken("$", 0, 0)));
assertTokenStreamContents(ts,
new String[] { "^", "hello", "world", "$" },
new int[] { 0, 0, 6, 11 },
new int[] { 0, 5, 11, 11 });
}
示例10: checkPhraseQuery
import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
private float checkPhraseQuery(Document doc, PhraseQuery query, int slop, int expectedNumResults) throws Exception {
query.setSlop(slop);
Directory ramDir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), ramDir, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
MaxFreqCollector c = new MaxFreqCollector();
searcher.search(query, c);
assertEquals("slop: "+slop+" query: "+query+" doc: "+doc+" Wrong number of hits", expectedNumResults, c.totalHits);
//QueryUtils.check(query,searcher);
writer.close();
reader.close();
ramDir.close();
// returns the max Scorer.freq() found, because even though norms are omitted, many index stats are different
// with these different tokens/distributions/lengths.. otherwise this test is very fragile.
return c.max;
}
示例11: testTokenEndingWithWordComponentOfMinimumLength
import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
public void testTokenEndingWithWordComponentOfMinimumLength() throws Exception {
CharArraySet dict = makeDictionary("ab", "cd", "ef");
Tokenizer tokenizer = new MockTokenizer(new StringReader("abcdef"), MockTokenizer.WHITESPACE, false);
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(
new WhitespaceTokenizer(
new StringReader(
"abcdef")
),
dict,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
assertTokenStreamContents(tf,
new String[] { "abcdef", "ab", "cd", "ef" },
new int[] { 0, 0, 0, 0},
new int[] { 6, 6, 6, 6},
new int[] { 1, 0, 0, 0}
);
}
示例12: beforeClass
import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
@BeforeClass
public static void beforeClass() throws Exception {
String[] data = new String[] { "A 1 2 3 4 5 6", "Z 4 5 6", null,
"B 2 4 5 6", "Y 3 5 6", null, "C 3 6",
"X 4 5 6" };
small = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), small,
newIndexWriterConfig(
new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)).setMergePolicy(newLogMergePolicy()));
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setTokenized(false);
for (int i = 0; i < data.length; i++) {
Document doc = new Document();
doc.add(newField("id", String.valueOf(i), customType));// Field.Keyword("id",String.valueOf(i)));
doc.add(newField("all", "all", customType));// Field.Keyword("all","all"));
if (null != data[i]) {
doc.add(newTextField("data", data[i], Field.Store.YES));// Field.Text("data",data[i]));
}
writer.addDocument(doc);
}
reader = writer.getReader();
writer.close();
}
示例13: getOriginalValues
import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
private Map<String,Float> getOriginalValues() throws IOException {
Map<String,Float> originalValues = new HashMap<>();
MoreLikeThis mlt = new MoreLikeThis(reader);
mlt.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
mlt.setMinDocFreq(1);
mlt.setMinTermFreq(1);
mlt.setMinWordLen(1);
mlt.setFieldNames(new String[] {"text"});
mlt.setBoost(true);
BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader(
"lucene release"));
List<BooleanClause> clauses = query.clauses();
for (BooleanClause clause : clauses) {
TermQuery tq = (TermQuery) clause.getQuery();
originalValues.put(tq.getTerm().text(), tq.getBoost());
}
return originalValues;
}
示例14: testMultiValues
import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
public void testMultiValues() throws Exception {
MoreLikeThis mlt = new MoreLikeThis(reader);
mlt.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
mlt.setMinDocFreq(1);
mlt.setMinTermFreq(1);
mlt.setMinWordLen(1);
mlt.setFieldNames(new String[] {"text"});
BooleanQuery query = (BooleanQuery) mlt.like("text",
new StringReader("lucene"), new StringReader("lucene release"),
new StringReader("apache"), new StringReader("apache lucene"));
List<BooleanClause> clauses = query.clauses();
assertEquals("Expected 2 clauses only!", 2, clauses.size());
for (BooleanClause clause : clauses) {
Term term = ((TermQuery) clause.getQuery()).getTerm();
assertTrue(Arrays.asList(new Term("text", "lucene"), new Term("text", "apache")).contains(term));
}
}
示例15: testRandom2GraphAfter
import org.apache.lucene.analysis.MockTokenizer; //导入依赖的package包/类
public void testRandom2GraphAfter() throws Exception {
final int numIters = atLeast(3);
Random random = random();
for (int i = 0; i < numIters; i++) {
b = new SynonymMap.Builder(random.nextBoolean());
final int numEntries = atLeast(10);
for (int j = 0; j < numEntries; j++) {
add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
}
final SynonymMap map = b.build();
final boolean ignoreCase = random.nextBoolean();
final Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
TokenStream syns = new SynonymFilter(tokenizer, map, ignoreCase);
TokenStream graph = new MockGraphTokenFilter(random(), syns);
return new TokenStreamComponents(tokenizer, graph);
}
};
checkRandomData(random, analyzer, 100);
}
}