本文整理汇总了Java中org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer类的典型用法代码示例。如果您正苦于以下问题:Java UAX29URLEmailTokenizer类的具体用法?Java UAX29URLEmailTokenizer怎么用?Java UAX29URLEmailTokenizer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
UAX29URLEmailTokenizer类属于org.apache.lucene.analysis.standard包,在下文中一共展示了UAX29URLEmailTokenizer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testCombiningMarksBackwards
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
/** @deprecated remove this and sophisticated backwards layer in 5.0 */
@Deprecated
public void testCombiningMarksBackwards() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents
(String fieldName, Reader reader) {
Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_3_1, reader);
return new TokenStreamComponents(tokenizer);
}
};
checkOneTerm(a, "ざ", "さ"); // hiragana Bug
checkOneTerm(a, "ザ", "ザ"); // katakana Works
checkOneTerm(a, "壹゙", "壹"); // ideographic Bug
checkOneTerm(a, "아゙", "아゙"); // hangul Works
}
示例2: testCombiningMarksBackwards
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
/** @deprecated remove this and sophisticated backwards layer in 5.0 */
@Deprecated
public void testCombiningMarksBackwards() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents
(String fieldName, Reader reader) {
Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_31, reader);
return new TokenStreamComponents(tokenizer);
}
};
checkOneTerm(a, "ざ", "さ"); // hiragana Bug
checkOneTerm(a, "ザ", "ザ"); // katakana Works
checkOneTerm(a, "壹゙", "壹"); // ideographic Bug
checkOneTerm(a, "아゙", "아゙"); // hangul Works
}
示例3: testLongEMAILatomText
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
public void testLongEMAILatomText() throws Exception {
// EMAILatomText = [A-Za-z0-9!#$%&'*+-/=?\^_`{|}~]
char[] emailAtomChars
= "!#$%&'*+,-./0123456789=?ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz{|}~".toCharArray();
StringBuilder builder = new StringBuilder();
int numChars = TestUtil.nextInt(random(), 100 * 1024, 3 * 1024 * 1024);
for (int i = 0 ; i < numChars ; ++i) {
builder.append(emailAtomChars[random().nextInt(emailAtomChars.length)]);
}
int tokenCount = 0;
String text = builder.toString();
UAX29URLEmailTokenizer ts = new UAX29URLEmailTokenizer(new StringReader(text));
ts.reset();
while (ts.incrementToken()) {
tokenCount++;
}
ts.end();
ts.close();
assertTrue(tokenCount > 0);
tokenCount = 0;
int newBufferSize = TestUtil.nextInt(random(), 200, 8192);
ts.setMaxTokenLength(newBufferSize);
ts.setReader(new StringReader(text));
ts.reset();
while (ts.incrementToken()) {
tokenCount++;
}
ts.end();
ts.close();
assertTrue(tokenCount > 0);
}
示例4: testHugeDoc
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
public void testHugeDoc() throws IOException {
StringBuilder sb = new StringBuilder();
char whitespace[] = new char[4094];
Arrays.fill(whitespace, ' ');
sb.append(whitespace);
sb.append("testing 1234");
String input = sb.toString();
UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(newAttributeFactory(), new StringReader(input));
BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
}
示例5: UAX29URLEmailTokenizer
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents
(String fieldName, Reader reader) {
Tokenizer tokenizer = new UAX29URLEmailTokenizer(newAttributeFactory(), reader);
return new TokenStreamComponents(tokenizer);
}
示例6: incrementToken
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
@Override
public final boolean incrementToken() throws java.io.IOException {
boolean isTokenAvailable = false;
while (input.incrementToken()) {
if (typeAtt.type() == UAX29URLEmailTokenizer.TOKEN_TYPES[UAX29URLEmailTokenizer.URL]) {
isTokenAvailable = true;
break;
}
}
return isTokenAvailable;
}
示例7: createComponents
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(newAttributeFactory(), reader);
tokenizer.setMaxTokenLength(Integer.MAX_VALUE); // Tokenize arbitrary length URLs
TokenFilter filter = new URLFilter(tokenizer);
return new TokenStreamComponents(tokenizer, filter);
}
示例8: testMailtoBackwards
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
/** @deprecated remove this and sophisticated backwards layer in 5.0 */
@Deprecated
public void testMailtoBackwards() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_3_4, reader);
return new TokenStreamComponents(tokenizer);
}
};
assertAnalyzesTo(a, "mailto:[email protected]",
new String[] { "mailto:test", "example.org" });
}
示例9: testVersion36
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
/** @deprecated uses older unicode (6.0). simple test to make sure its basically working */
@Deprecated
public void testVersion36() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_3_6, reader);
return new TokenStreamComponents(tokenizer);
}
};
assertAnalyzesTo(a, "this is just a t\u08E6st [email protected]", // new combining mark in 6.1
new String[] { "this", "is", "just", "a", "t", "st", "[email protected]" });
}
示例10: testVersion40
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
/** @deprecated uses older unicode (6.1). simple test to make sure its basically working */
@Deprecated
public void testVersion40() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_4_0, reader);
return new TokenStreamComponents(tokenizer);
}
};
// U+061C is a new combining mark in 6.3, found using "[[\p{WB:Format}\p{WB:Extend}]&[^\p{Age:6.2}]]"
// on the online UnicodeSet utility: <http://unicode.org/cldr/utility/list-unicodeset.jsp>
assertAnalyzesTo(a, "this is just a t\u061Cst [email protected]",
new String[] { "this", "is", "just", "a", "t", "st", "[email protected]" });
}
示例11: testHugeDoc
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
public void testHugeDoc() throws IOException {
StringBuilder sb = new StringBuilder();
char whitespace[] = new char[4094];
Arrays.fill(whitespace, ' ');
sb.append(whitespace);
sb.append("testing 1234");
String input = sb.toString();
UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
}
示例12: UAX29URLEmailTokenizer
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents
(String fieldName, Reader reader) {
Tokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader);
return new TokenStreamComponents(tokenizer);
}
示例13: createComponents
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader);
tokenizer.setMaxTokenLength(Integer.MAX_VALUE); // Tokenize arbitrary length URLs
TokenFilter filter = new URLFilter(tokenizer);
return new TokenStreamComponents(tokenizer, filter);
}
示例14: testMailtoBackwards
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
/** @deprecated remove this and sophisticated backwards layer in 5.0 */
@Deprecated
public void testMailtoBackwards() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_34, reader);
return new TokenStreamComponents(tokenizer);
}
};
assertAnalyzesTo(a, "mailto:[email protected]",
new String[] { "mailto:test", "example.org" });
}
示例15: testVersion36
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
/** @deprecated uses older unicode (6.0). simple test to make sure its basically working */
@Deprecated
public void testVersion36() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_36, reader);
return new TokenStreamComponents(tokenizer);
}
};
assertAnalyzesTo(a, "this is just a t\u08E6st [email protected]", // new combining mark in 6.1
new String[] { "this", "is", "just", "a", "t", "st", "[email protected]" });
}