本文整理汇总了Java中org.apache.lucene.analysis.BaseTokenStreamTestCase类的典型用法代码示例。如果您正苦于以下问题:Java BaseTokenStreamTestCase类的具体用法?Java BaseTokenStreamTestCase怎么用?Java BaseTokenStreamTestCase使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
BaseTokenStreamTestCase类属于org.apache.lucene.analysis包,在下文中一共展示了BaseTokenStreamTestCase类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testMailtoSchemeEmails
import org.apache.lucene.analysis.BaseTokenStreamTestCase; //导入依赖的package包/类
public void testMailtoSchemeEmails () throws Exception {
// See LUCENE-3880
BaseTokenStreamTestCase.assertAnalyzesTo(a, "mailto:[email protected]",
new String[] {"mailto", "[email protected]"},
new String[] { "<ALPHANUM>", "<EMAIL>" });
// TODO: Support full mailto: scheme URIs. See RFC 6068: http://tools.ietf.org/html/rfc6068
BaseTokenStreamTestCase.assertAnalyzesTo
(a, "mailto:[email protected],[email protected][email protected]"
+ "&subject=Subjectivity&body=Corpusivity%20or%20something%20like%20that",
new String[] { "mailto",
"[email protected]",
// TODO: recognize ',' address delimiter. Also, see examples of ';' delimiter use at: http://www.mailto.co.uk/
",[email protected]",
"[email protected]", // TODO: split field keys/values
"subject", "Subjectivity",
"body", "Corpusivity", "20or", "20something","20like", "20that" }, // TODO: Hex decoding + re-tokenization
new String[] { "<ALPHANUM>",
"<EMAIL>",
"<EMAIL>",
"<EMAIL>",
"<ALPHANUM>", "<ALPHANUM>",
"<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>" });
}
示例2: testMailtoSchemeEmails
import org.apache.lucene.analysis.BaseTokenStreamTestCase; //导入依赖的package包/类
public void testMailtoSchemeEmails () throws Exception {
// See LUCENE-3880
BaseTokenStreamTestCase.assertAnalyzesTo(a, "MAILTO:[email protected]",
new String[] {"mailto", "[email protected]"},
new String[] { "<ALPHANUM>", "<EMAIL>" });
// TODO: Support full mailto: scheme URIs. See RFC 6068: http://tools.ietf.org/html/rfc6068
BaseTokenStreamTestCase.assertAnalyzesTo
(a, "mailto:[email protected],[email protected][email protected]"
+ "&subject=Subjectivity&body=Corpusivity%20or%20something%20like%20that",
new String[] { "mailto",
"[email protected]",
// TODO: recognize ',' address delimiter. Also, see examples of ';' delimiter use at: http://www.mailto.co.uk/
",[email protected]",
"[email protected]", // TODO: split field keys/values
"subject", "subjectivity",
"body", "corpusivity", "20or", "20something","20like", "20that" }, // TODO: Hex decoding + re-tokenization
new String[] { "<ALPHANUM>",
"<EMAIL>",
"<EMAIL>",
"<EMAIL>",
"<ALPHANUM>", "<ALPHANUM>",
"<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>" });
}
示例3: testThreadSafety
import org.apache.lucene.analysis.BaseTokenStreamTestCase; //导入依赖的package包/类
private void testThreadSafety(TokenFilterFactory factory) throws IOException {
final Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer();
return new TokenStreamComponents(tokenizer, factory.create(tokenizer));
}
};
BaseTokenStreamTestCase.checkRandomData(random(), analyzer, 100);
}
示例4: testStandardAnalyzer
import org.apache.lucene.analysis.BaseTokenStreamTestCase; //导入依赖的package包/类
public void testStandardAnalyzer() throws IOException {
Analyzer analyzer = new JiebaAnalyzer();
checkRandomData(new Random(0), analyzer, 1);
System.out.println(BaseTokenStreamTestCase.toString(analyzer, "工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作"));
System.out.println("==============");
System.out.println(BaseTokenStreamTestCase.toString(analyzer, "hello world,this is my first program"));
System.out.println("==============");
System.out.println(BaseTokenStreamTestCase.toString(analyzer, "这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。"));
}
示例5: testAnalyzerFactory
import org.apache.lucene.analysis.BaseTokenStreamTestCase; //导入依赖的package包/类
public void testAnalyzerFactory() throws Exception {
String text = "Fortieth, Quarantième, Cuadragésimo";
Benchmark benchmark = execBenchmark(getAnalyzerFactoryConfig
("ascii folded, pattern replaced, standard tokenized, downcased, bigrammed.'analyzer'",
"positionIncrementGap:100,offsetGap:1111,"
+"MappingCharFilter(mapping:'test-mapping-ISOLatin1Accent-partial.txt'),"
+"PatternReplaceCharFilterFactory(pattern:'e(\\\\\\\\S*)m',replacement:\"$1xxx$1\"),"
+"StandardTokenizer,LowerCaseFilter,NGramTokenFilter(minGramSize:2,maxGramSize:2)"));
BaseTokenStreamTestCase.assertAnalyzesTo(benchmark.getRunData().getAnalyzer(), text,
new String[] { "fo", "or", "rt", "ti", "ie", "et", "th",
"qu", "ua", "ar", "ra", "an", "nt", "ti", "ix", "xx", "xx", "xe",
"cu", "ua", "ad", "dr", "ra", "ag", "gs", "si", "ix", "xx", "xx", "xs", "si", "io"});
}
示例6: testHugeDoc
import org.apache.lucene.analysis.BaseTokenStreamTestCase; //导入依赖的package包/类
public void testHugeDoc() throws IOException {
StringBuilder sb = new StringBuilder();
char whitespace[] = new char[4094];
Arrays.fill(whitespace, ' ');
sb.append(whitespace);
sb.append("testing 1234");
String input = sb.toString();
UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(newAttributeFactory(), new StringReader(input));
BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
}
示例7: testLUCENE1545
import org.apache.lucene.analysis.BaseTokenStreamTestCase; //导入依赖的package包/类
public void testLUCENE1545() throws Exception {
/*
* Standard analyzer does not correctly tokenize combining character U+0364 COMBINING LATIN SMALL LETTRE E.
* The word "moͤchte" is incorrectly tokenized into "mo" "chte", the combining character is lost.
* Expected result is only on token "moͤchte".
*/
BaseTokenStreamTestCase.assertAnalyzesTo(a, "moͤchte", new String[] { "moͤchte" });
}
示例8: testApostrophesSA
import org.apache.lucene.analysis.BaseTokenStreamTestCase; //导入依赖的package包/类
public void testApostrophesSA() throws Exception {
// internal apostrophes: O'Reilly, you're, O'Reilly's
BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly", new String[]{"O'Reilly"});
BaseTokenStreamTestCase.assertAnalyzesTo(a, "you're", new String[]{"you're"});
BaseTokenStreamTestCase.assertAnalyzesTo(a, "she's", new String[]{"she's"});
BaseTokenStreamTestCase.assertAnalyzesTo(a, "Jim's", new String[]{"Jim's"});
BaseTokenStreamTestCase.assertAnalyzesTo(a, "don't", new String[]{"don't"});
BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly's", new String[]{"O'Reilly's"});
}
示例9: testVariousTextSA
import org.apache.lucene.analysis.BaseTokenStreamTestCase; //导入依赖的package包/类
public void testVariousTextSA() throws Exception {
// various
BaseTokenStreamTestCase.assertAnalyzesTo(a, "C embedded developers wanted", new String[]{"C", "embedded", "developers", "wanted"});
BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo bar FOO BAR", new String[]{"foo", "bar", "FOO", "BAR"});
BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo bar . FOO <> BAR", new String[]{"foo", "bar", "FOO", "BAR"});
BaseTokenStreamTestCase.assertAnalyzesTo(a, "\"QUOTED\" word", new String[]{"QUOTED", "word"});
}
示例10: testHugeDoc
import org.apache.lucene.analysis.BaseTokenStreamTestCase; //导入依赖的package包/类
public void testHugeDoc() throws IOException {
StringBuilder sb = new StringBuilder();
char whitespace[] = new char[4094];
Arrays.fill(whitespace, ' ');
sb.append(whitespace);
sb.append("testing 1234");
String input = sb.toString();
BaseTokenStreamTestCase.assertAnalyzesTo(a, input, new String[]{"testing", "1234"}) ;
}
示例11: testLUCENE1545
import org.apache.lucene.analysis.BaseTokenStreamTestCase; //导入依赖的package包/类
public void testLUCENE1545() throws Exception {
/*
* Standard analyzer does not correctly tokenize combining character U+0364 COMBINING LATIN SMALL LETTER E.
* The word "moͤchte" is incorrectly tokenized into "mo" "chte", the combining character is lost.
* Expected result is only one token "moͤchte".
*/
BaseTokenStreamTestCase.assertAnalyzesTo(a, "moͤchte", new String[] { "moͤchte" });
}
示例12: testApostrophesSA
import org.apache.lucene.analysis.BaseTokenStreamTestCase; //导入依赖的package包/类
public void testApostrophesSA() throws Exception {
// internal apostrophes: O'Reilly, you're, O'Reilly's
BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly", new String[]{"o'reilly"});
BaseTokenStreamTestCase.assertAnalyzesTo(a, "you're", new String[]{"you're"});
BaseTokenStreamTestCase.assertAnalyzesTo(a, "she's", new String[]{"she's"});
BaseTokenStreamTestCase.assertAnalyzesTo(a, "Jim's", new String[]{"jim's"});
BaseTokenStreamTestCase.assertAnalyzesTo(a, "don't", new String[]{"don't"});
BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly's", new String[]{"o'reilly's"});
}
示例13: testNumericSA
import org.apache.lucene.analysis.BaseTokenStreamTestCase; //导入依赖的package包/类
public void testNumericSA() throws Exception {
// floating point, serial, model numbers, ip addresses, etc.
BaseTokenStreamTestCase.assertAnalyzesTo(a, "21.35", new String[]{"21.35"});
BaseTokenStreamTestCase.assertAnalyzesTo(a, "R2D2 C3PO", new String[]{"r2d2", "c3po"});
BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"});
BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"});
}
示例14: testVariousTextSA
import org.apache.lucene.analysis.BaseTokenStreamTestCase; //导入依赖的package包/类
public void testVariousTextSA() throws Exception {
// various
BaseTokenStreamTestCase.assertAnalyzesTo(a, "C embedded developers wanted", new String[]{"c", "embedded", "developers", "wanted"});
BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo bar FOO BAR", new String[]{"foo", "bar", "foo", "bar"});
BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo bar . FOO <> BAR", new String[]{"foo", "bar", "foo", "bar"});
BaseTokenStreamTestCase.assertAnalyzesTo(a, "\"QUOTED\" word", new String[]{"quoted", "word"});
}
示例15: testHugeDoc
import org.apache.lucene.analysis.BaseTokenStreamTestCase; //导入依赖的package包/类
public void testHugeDoc() throws IOException {
StringBuilder sb = new StringBuilder();
char whitespace[] = new char[4094];
Arrays.fill(whitespace, ' ');
sb.append(whitespace);
sb.append("testing 1234");
String input = sb.toString();
StandardTokenizer tokenizer = new StandardTokenizer(new StringReader(input));
BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
}