本文整理汇总了Java中org.apache.lucene.util.automaton.CharacterRunAutomaton类的典型用法代码示例。如果您正苦于以下问题:Java CharacterRunAutomaton类的具体用法?Java CharacterRunAutomaton怎么用?Java CharacterRunAutomaton使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
CharacterRunAutomaton类属于org.apache.lucene.util.automaton包,在下文中一共展示了CharacterRunAutomaton类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: checkCondition
import org.apache.lucene.util.automaton.CharacterRunAutomaton; //导入依赖的package包/类
/** checks condition of the concatenation of two strings */
// note: this is pretty stupid, we really should subtract strip from the condition up front and just check the stem
// but this is a little bit more complicated.
private boolean checkCondition(int condition, char c1[], int c1off, int c1len, char c2[], int c2off, int c2len) {
if (condition != 0) {
CharacterRunAutomaton pattern = dictionary.patterns.get(condition);
int state = pattern.getInitialState();
for (int i = c1off; i < c1off + c1len; i++) {
state = pattern.step(state, c1[i]);
if (state == -1) {
return false;
}
}
for (int i = c2off; i < c2off + c2len; i++) {
state = pattern.step(state, c2[i]);
if (state == -1) {
return false;
}
}
return pattern.isAccept(state);
}
return true;
}
示例2: testMaxSizeEndHighlight
import org.apache.lucene.util.automaton.CharacterRunAutomaton; //导入依赖的package包/类
public void testMaxSizeEndHighlight() throws Exception {
TestHighlightRunner helper = new TestHighlightRunner() {
@Override
public void run() throws Exception {
CharacterRunAutomaton stopWords = new CharacterRunAutomaton(new RegExp("i[nt]").toAutomaton());
TermQuery query = new TermQuery(new Term("text", "searchterm"));
String text = "this is a text with searchterm in it";
SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
Highlighter hg = getHighlighter(query, "text", fm);
hg.setTextFragmenter(new NullFragmenter());
hg.setMaxDocCharsToAnalyze(36);
String match = hg.getBestFragment(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords), "text", text);
assertTrue(
"Matched text should contain remainder of text after highlighted query ",
match.endsWith("in it"));
}
};
helper.start();
}
示例3: setUp
import org.apache.lucene.util.automaton.CharacterRunAutomaton; //导入依赖的package包/类
@Override
public void setUp() throws Exception {
super.setUp();
Automaton single = new Automaton();
int initial = single.createState();
int accept = single.createState();
single.setAccept(accept, true);
// build an automaton matching this jvm's letter definition
for (int i = 0; i <= 0x10FFFF; i++) {
if (Character.isLetter(i)) {
single.addTransition(initial, accept, i);
}
}
Automaton repeat = Operations.repeat(single);
jvmLetter = new CharacterRunAutomaton(repeat);
}
示例4: testBoost
import org.apache.lucene.util.automaton.CharacterRunAutomaton; //导入依赖的package包/类
public void testBoost() throws Exception {
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(Automata.makeString("on"));
Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);
PrecedenceQueryParser qp = new PrecedenceQueryParser();
qp.setAnalyzer(oneStopAnalyzer);
Query q = qp.parse("on^1.0", "field");
assertNotNull(q);
q = qp.parse("\"hello\"^2.0", "field");
assertNotNull(q);
assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
q = qp.parse("hello^2.0", "field");
assertNotNull(q);
assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
q = qp.parse("\"on\"^1.0", "field");
assertNotNull(q);
q = getParser(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)).parse("the^3",
"field");
assertNotNull(q);
}
示例5: testBoost
import org.apache.lucene.util.automaton.CharacterRunAutomaton; //导入依赖的package包/类
public void testBoost() throws Exception {
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(Automata.makeString("on"));
Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);
StandardQueryParser qp = new StandardQueryParser();
qp.setAnalyzer(oneStopAnalyzer);
Query q = qp.parse("on^1.0", "field");
assertNotNull(q);
q = qp.parse("\"hello\"^2.0", "field");
assertNotNull(q);
assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
q = qp.parse("hello^2.0", "field");
assertNotNull(q);
assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
q = qp.parse("\"on\"^1.0", "field");
assertNotNull(q);
StandardQueryParser qp2 = new StandardQueryParser();
qp2.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET));
q = qp2.parse("the^3", "field");
// "the" is a stop word so the result is an empty query:
assertNotNull(q);
assertEquals("", q.toString());
assertEquals(1.0f, q.getBoost(), 0.01f);
}
示例6: testStopwords
import org.apache.lucene.util.automaton.CharacterRunAutomaton; //导入依赖的package包/类
public void testStopwords() throws Exception {
StandardQueryParser qp = new StandardQueryParser();
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
qp.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet));
Query result = qp.parse("a:the OR a:foo", "a");
assertNotNull("result is null and it shouldn't be", result);
assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: "
+ 0, ((BooleanQuery) result).clauses().size() == 0);
result = qp.parse("a:woo OR a:the", "a");
assertNotNull("result is null and it shouldn't be", result);
assertTrue("result is not a TermQuery", result instanceof TermQuery);
result = qp.parse(
"(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)",
"a");
assertNotNull("result is null and it shouldn't be", result);
assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
if (VERBOSE)
System.out.println("Result: " + result);
assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: "
+ 2, ((BooleanQuery) result).clauses().size() == 2);
}
示例7: testBoost
import org.apache.lucene.util.automaton.CharacterRunAutomaton; //导入依赖的package包/类
public void testBoost()
throws Exception {
CharacterRunAutomaton stopWords = new CharacterRunAutomaton(Automata.makeString("on"));
Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords);
CommonQueryParserConfiguration qp = getParserConfig(oneStopAnalyzer);
Query q = getQuery("on^1.0",qp);
assertNotNull(q);
q = getQuery("\"hello\"^2.0",qp);
assertNotNull(q);
assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
q = getQuery("hello^2.0",qp);
assertNotNull(q);
assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
q = getQuery("\"on\"^1.0",qp);
assertNotNull(q);
Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
CommonQueryParserConfiguration qp2 = getParserConfig(a2);
q = getQuery("the^3", qp2);
// "the" is a stop word so the result is an empty query:
assertNotNull(q);
assertEquals("", q.toString());
assertEquals(1.0f, q.getBoost(), 0.01f);
}
示例8: testStopwords
import org.apache.lucene.util.automaton.CharacterRunAutomaton; //导入依赖的package包/类
public void testStopwords() throws Exception {
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet));
Query result = getQuery("field:the OR field:foo",qp);
assertNotNull("result is null and it shouldn't be", result);
assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 0, ((BooleanQuery) result).clauses().size() == 0);
result = getQuery("field:woo OR field:the",qp);
assertNotNull("result is null and it shouldn't be", result);
assertTrue("result is not a TermQuery", result instanceof TermQuery);
result = getQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)",qp);
assertNotNull("result is null and it shouldn't be", result);
assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
if (VERBOSE) System.out.println("Result: " + result);
assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2);
}
示例9: testTwoChars
import org.apache.lucene.util.automaton.CharacterRunAutomaton; //导入依赖的package包/类
/** Test a configuration where two characters makes a term */
public void testTwoChars() throws Exception {
CharacterRunAutomaton single =
new CharacterRunAutomaton(new RegExp("..").toAutomaton());
Analyzer a = new MockAnalyzer(random(), single, false);
assertAnalyzesTo(a, "foobar",
new String[] { "fo", "ob", "ar"},
new int[] { 0, 2, 4 },
new int[] { 2, 4, 6 }
);
// make sure when last term is a "partial" match that end() is correct
assertTokenStreamContents(a.tokenStream("bogus", "fooba"),
new String[] { "fo", "ob" },
new int[] { 0, 2 },
new int[] { 2, 4 },
new int[] { 1, 1 },
new Integer(5)
);
checkRandomData(random(), a, 100);
}
示例10: testThreeChars
import org.apache.lucene.util.automaton.CharacterRunAutomaton; //导入依赖的package包/类
/** Test a configuration where three characters makes a term */
public void testThreeChars() throws Exception {
CharacterRunAutomaton single =
new CharacterRunAutomaton(new RegExp("...").toAutomaton());
Analyzer a = new MockAnalyzer(random(), single, false);
assertAnalyzesTo(a, "foobar",
new String[] { "foo", "bar"},
new int[] { 0, 3 },
new int[] { 3, 6 }
);
// make sure when last term is a "partial" match that end() is correct
assertTokenStreamContents(a.tokenStream("bogus", "fooba"),
new String[] { "foo" },
new int[] { 0 },
new int[] { 3 },
new int[] { 1 },
new Integer(5)
);
checkRandomData(random(), a, 100);
}
示例11: testUppercase
import org.apache.lucene.util.automaton.CharacterRunAutomaton; //导入依赖的package包/类
/** Test a configuration where word starts with one uppercase */
public void testUppercase() throws Exception {
CharacterRunAutomaton single =
new CharacterRunAutomaton(new RegExp("[A-Z][a-z]*").toAutomaton());
Analyzer a = new MockAnalyzer(random(), single, false);
assertAnalyzesTo(a, "FooBarBAZ",
new String[] { "Foo", "Bar", "B", "A", "Z"},
new int[] { 0, 3, 6, 7, 8 },
new int[] { 3, 6, 7, 8, 9 }
);
assertAnalyzesTo(a, "aFooBar",
new String[] { "Foo", "Bar" },
new int[] { 1, 4 },
new int[] { 4, 7 }
);
checkRandomData(random(), a, 100);
}
示例12: testRandomRegexps
import org.apache.lucene.util.automaton.CharacterRunAutomaton; //导入依赖的package包/类
/** blast some random strings through differently configured tokenizers */
public void testRandomRegexps() throws Exception {
int iters = atLeast(30);
for (int i = 0; i < iters; i++) {
final CharacterRunAutomaton dfa = new CharacterRunAutomaton(AutomatonTestUtil.randomAutomaton(random()), Integer.MAX_VALUE);
final boolean lowercase = random().nextBoolean();
final int limit = TestUtil.nextInt(random(), 0, 500);
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer t = new MockTokenizer(reader, dfa, lowercase, limit);
return new TokenStreamComponents(t, t);
}
};
checkRandomData(random(), a, 100);
a.close();
}
}
示例13: testBoost
import org.apache.lucene.util.automaton.CharacterRunAutomaton; //导入依赖的package包/类
@Test
public void testBoost() throws Exception {
CharacterRunAutomaton stopWords = new CharacterRunAutomaton(Automata.makeString("on"));
Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords);
QueryParser qp = getParserConfig(oneStopAnalyzer);
Query q = getQuery("on^1.0", qp);
Assert.assertNotNull(q);
q = getQuery("\"hello\"^2.0", qp);
Assert.assertNotNull(q);
Assert.assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5);
q = getQuery("hello^2.0", qp);
Assert.assertNotNull(q);
Assert.assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5);
q = getQuery("\"on\"^1.0", qp);
Assert.assertNotNull(q);
Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
QueryParser qp2 = getParserConfig(a2);
q = getQuery("the^3", qp2);
// "the" is a stop word so the result is an empty query:
Assert.assertNotNull(q);
assertMatchNoDocsQuery(q);
Assert.assertFalse(q instanceof BoostQuery);
}
示例14: testStopwords
import org.apache.lucene.util.automaton.CharacterRunAutomaton; //导入依赖的package包/类
@Test
public void testStopwords() throws Exception {
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
QueryParser qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet));
Query result = getQuery("field:the OR field:foo", qp);
Assert.assertNotNull("result is null and it shouldn't be", result);
Assert.assertTrue("result is not a BooleanQuery",
result instanceof BooleanQuery || result instanceof MatchNoDocsQuery);
if (result instanceof BooleanQuery) {
Assert.assertEquals(0, ((BooleanQuery) result).clauses().size());
}
result = getQuery("field:woo OR field:the", qp);
Assert.assertNotNull("result is null and it shouldn't be", result);
Assert.assertTrue("result is not a TermQuery", result instanceof TermQuery);
result = getQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)", qp);
Assert.assertNotNull("result is null and it shouldn't be", result);
Assert.assertTrue("result is not a BoostQuery", result instanceof BoostQuery);
result = ((BoostQuery) result).getQuery();
Assert.assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
if (VERBOSE)
System.out.println("Result: " + result);
Assert.assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2,
((BooleanQuery) result).clauses().size() == 2);
}
示例15: testMaxSizeEndHighlight
import org.apache.lucene.util.automaton.CharacterRunAutomaton; //导入依赖的package包/类
public void testMaxSizeEndHighlight() throws Exception {
TestHighlightRunner helper = new TestHighlightRunner() {
@Override
public void run() throws Exception {
CharacterRunAutomaton stopWords = new CharacterRunAutomaton(new RegExp("i[nt]").toAutomaton());
TermQuery query = new TermQuery(new Term("text", "searchterm"));
String text = "this is a text with searchterm in it";
SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
Highlighter hg = getHighlighter(query, "text", fm);
hg.setTextFragmenter(new NullFragmenter());
hg.setMaxDocCharsToAnalyze(36);
String match = hg.getBestFragment(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords, true), "text", text);
assertTrue(
"Matched text should contain remainder of text after highlighted query ",
match.endsWith("in it"));
}
};
helper.start();
}