本文整理汇总了Java中org.apache.lucene.analysis.core.LowerCaseTokenizer类的典型用法代码示例。如果您正苦于以下问题:Java LowerCaseTokenizer类的具体用法?Java LowerCaseTokenizer怎么用?Java LowerCaseTokenizer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
LowerCaseTokenizer类属于org.apache.lucene.analysis.core包,在下文中一共展示了LowerCaseTokenizer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: url
import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public static void url(String id, URL u, String url_in, SpimeDB db, float pri) {
DObject p = db.get(id);
Long whenCached = p != null ? p.get("url_cached") : null;
try {
if (whenCached == null || whenCached < u.openConnection().getLastModified()) {
String urlString = u.toString();
Set<String> keywords = parseKeywords(new LowerCaseTokenizer(), urlString);
MutableNObject n = new MutableNObject(id)
.withTags(keywords.toArray(new String[keywords.size()]))
.put("url_in", url_in)
.put("url", urlString);
//logger.info("crawl {}", n);
db.addAsync(pri, n);
}
} catch (IOException e) {
e.printStackTrace();
}
}
示例2: testWithKeywordAttribute
import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet( 1, true);
set.add("fischen");
GermanStemFilter filter = new GermanStemFilter(
new SetKeywordMarkerFilter(new LowerCaseTokenizer(new StringReader(
"Fischen Trinken")), set));
assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
}
示例3: testWithKeywordAttribute
import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(1, true);
set.add("Brasília");
BrazilianStemFilter filter = new BrazilianStemFilter(
new SetKeywordMarkerFilter(new LowerCaseTokenizer(new StringReader(
"Brasília Brasilia")), set));
assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
}
示例4: testReadSupplementaryChars
import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testReadSupplementaryChars() throws IOException {
StringBuilder builder = new StringBuilder();
// create random input
int num = 1024 + random().nextInt(1024);
num *= RANDOM_MULTIPLIER;
for (int i = 1; i < num; i++) {
builder.append("\ud801\udc1cabc");
if((i % 10) == 0)
builder.append(" ");
}
// internal buffer size is 1024 make sure we have a surrogate pair right at the border
builder.insert(1023, "\ud801\udc1c");
Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory(), new StringReader(builder.toString()));
assertTokenStreamContents(tokenizer, builder.toString().toLowerCase(Locale.ROOT).split(" "));
}
示例5: testExtendCharBuffer
import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testExtendCharBuffer() throws IOException {
for (int i = 0; i < 40; i++) {
StringBuilder builder = new StringBuilder();
for (int j = 0; j < 1+i; j++) {
builder.append("a");
}
builder.append("\ud801\udc1cabc");
Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory(), new StringReader(builder.toString()));
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT)});
}
}
示例6: testMaxWordLength
import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testMaxWordLength() throws IOException {
StringBuilder builder = new StringBuilder();
for (int i = 0; i < 255; i++) {
builder.append("A");
}
Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory(), new StringReader(builder.toString() + builder.toString()));
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
}
示例7: testMaxWordLengthWithSupplementary
import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testMaxWordLengthWithSupplementary() throws IOException {
StringBuilder builder = new StringBuilder();
for (int i = 0; i < 254; i++) {
builder.append("A");
}
builder.append("\ud801\udc1c");
Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory(), new StringReader(builder.toString() + builder.toString()));
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
}
示例8: testWithKeywordAttribute
import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("fischen");
GermanStemFilter filter = new GermanStemFilter(
new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
"Fischen Trinken")), set));
assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
}
示例9: testWithKeywordAttribute
import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("Brasília");
BrazilianStemFilter filter = new BrazilianStemFilter(
new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
"Brasília Brasilia")), set));
assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
}
示例10: testReadSupplementaryChars
import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testReadSupplementaryChars() throws IOException {
StringBuilder builder = new StringBuilder();
// create random input
int num = 1024 + random().nextInt(1024);
num *= RANDOM_MULTIPLIER;
for (int i = 1; i < num; i++) {
builder.append("\ud801\udc1cabc");
if((i % 10) == 0)
builder.append(" ");
}
// internal buffer size is 1024 make sure we have a surrogate pair right at the border
builder.insert(1023, "\ud801\udc1c");
Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString()));
assertTokenStreamContents(tokenizer, builder.toString().toLowerCase(Locale.ROOT).split(" "));
}
示例11: testExtendCharBuffer
import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testExtendCharBuffer() throws IOException {
for (int i = 0; i < 40; i++) {
StringBuilder builder = new StringBuilder();
for (int j = 0; j < 1+i; j++) {
builder.append("a");
}
builder.append("\ud801\udc1cabc");
Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString()));
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT)});
}
}
示例12: testMaxWordLength
import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testMaxWordLength() throws IOException {
StringBuilder builder = new StringBuilder();
for (int i = 0; i < 255; i++) {
builder.append("A");
}
Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
}
示例13: testMaxWordLengthWithSupplementary
import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testMaxWordLengthWithSupplementary() throws IOException {
StringBuilder builder = new StringBuilder();
for (int i = 0; i < 254; i++) {
builder.append("A");
}
builder.append("\ud801\udc1c");
Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
}
示例14: testWithKeywordAttribute
import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("fischen");
GermanStemFilter filter = new GermanStemFilter(
new SetKeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
"Fischen Trinken")), set));
assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
}
示例15: testWithKeywordAttribute
import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("Brasília");
BrazilianStemFilter filter = new BrazilianStemFilter(
new SetKeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
"Brasília Brasilia")), set));
assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
}