当前位置: 首页>>代码示例>>Java>>正文


Java LowerCaseTokenizer类代码示例

本文整理汇总了Java中org.apache.lucene.analysis.core.LowerCaseTokenizer的典型用法代码示例。如果您正苦于以下问题:Java LowerCaseTokenizer类的具体用法?Java LowerCaseTokenizer怎么用?Java LowerCaseTokenizer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


LowerCaseTokenizer类属于org.apache.lucene.analysis.core包,在下文中一共展示了LowerCaseTokenizer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: url

import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public static void url(String id, URL u, String url_in, SpimeDB db, float pri) {


        DObject p = db.get(id);
        Long whenCached = p != null ? p.get("url_cached") : null;
        try {
            if (whenCached == null || whenCached < u.openConnection().getLastModified()) {
                String urlString = u.toString();
                Set<String> keywords = parseKeywords(new LowerCaseTokenizer(), urlString);

                MutableNObject n = new MutableNObject(id)
                        .withTags(keywords.toArray(new String[keywords.size()]))
                        .put("url_in", url_in)
                        .put("url", urlString);

                //logger.info("crawl {}", n);

                db.addAsync(pri, n);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
 
开发者ID:automenta,项目名称:spimedb,代码行数:24,代码来源:Crawl.java

示例2: testWithKeywordAttribute

import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testWithKeywordAttribute() throws IOException {
  CharArraySet set = new CharArraySet( 1, true);
  set.add("fischen");
  GermanStemFilter filter = new GermanStemFilter(
      new SetKeywordMarkerFilter(new LowerCaseTokenizer(new StringReader(
          "Fischen Trinken")), set));
  assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
}
 
开发者ID:europeana,项目名称:search,代码行数:9,代码来源:TestGermanAnalyzer.java

示例3: testWithKeywordAttribute

import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testWithKeywordAttribute() throws IOException {
  CharArraySet set = new CharArraySet(1, true);
  set.add("Brasília");
  BrazilianStemFilter filter = new BrazilianStemFilter(
      new SetKeywordMarkerFilter(new LowerCaseTokenizer(new StringReader(
          "Brasília Brasilia")), set));
  assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
}
 
开发者ID:europeana,项目名称:search,代码行数:9,代码来源:TestBrazilianStemmer.java

示例4: testReadSupplementaryChars

import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testReadSupplementaryChars() throws IOException {
  StringBuilder builder = new StringBuilder();
  // create random input
  int num = 1024 + random().nextInt(1024);
  num *= RANDOM_MULTIPLIER;
  for (int i = 1; i < num; i++) {
    builder.append("\ud801\udc1cabc");
    if((i % 10) == 0)
      builder.append(" ");
  }
  // internal buffer size is 1024 make sure we have a surrogate pair right at the border
  builder.insert(1023, "\ud801\udc1c");
  Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory(), new StringReader(builder.toString()));
  assertTokenStreamContents(tokenizer, builder.toString().toLowerCase(Locale.ROOT).split(" "));
}
 
开发者ID:europeana,项目名称:search,代码行数:16,代码来源:TestCharTokenizers.java

示例5: testExtendCharBuffer

import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testExtendCharBuffer() throws IOException {
  for (int i = 0; i < 40; i++) {
    StringBuilder builder = new StringBuilder();
    for (int j = 0; j < 1+i; j++) {
      builder.append("a");
    }
    builder.append("\ud801\udc1cabc");
    Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory(), new StringReader(builder.toString()));
    assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT)});
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:12,代码来源:TestCharTokenizers.java

示例6: testMaxWordLength

import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testMaxWordLength() throws IOException {
  StringBuilder builder = new StringBuilder();

  for (int i = 0; i < 255; i++) {
    builder.append("A");
  }
  Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory(), new StringReader(builder.toString() + builder.toString()));
  assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
}
 
开发者ID:europeana,项目名称:search,代码行数:10,代码来源:TestCharTokenizers.java

示例7: testMaxWordLengthWithSupplementary

import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testMaxWordLengthWithSupplementary() throws IOException {
  StringBuilder builder = new StringBuilder();

  for (int i = 0; i < 254; i++) {
    builder.append("A");
  }
  builder.append("\ud801\udc1c");
  Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory(), new StringReader(builder.toString() + builder.toString()));
  assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
}
 
开发者ID:europeana,项目名称:search,代码行数:11,代码来源:TestCharTokenizers.java

示例8: testWithKeywordAttribute

import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testWithKeywordAttribute() throws IOException {
  CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
  set.add("fischen");
  GermanStemFilter filter = new GermanStemFilter(
      new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader( 
          "Fischen Trinken")), set));
  assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:9,代码来源:TestGermanAnalyzer.java

示例9: testWithKeywordAttribute

import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testWithKeywordAttribute() throws IOException {
  CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
  set.add("Brasília");
  BrazilianStemFilter filter = new BrazilianStemFilter(
      new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
          "Brasília Brasilia")), set));
  assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:9,代码来源:TestBrazilianStemmer.java

示例10: testReadSupplementaryChars

import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testReadSupplementaryChars() throws IOException {
  StringBuilder builder = new StringBuilder();
  // create random input
  int num = 1024 + random().nextInt(1024);
  num *= RANDOM_MULTIPLIER;
  for (int i = 1; i < num; i++) {
    builder.append("\ud801\udc1cabc");
    if((i % 10) == 0)
      builder.append(" ");
  }
  // internal buffer size is 1024 make sure we have a surrogate pair right at the border
  builder.insert(1023, "\ud801\udc1c");
  Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString()));
  assertTokenStreamContents(tokenizer, builder.toString().toLowerCase(Locale.ROOT).split(" "));
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:16,代码来源:TestCharTokenizers.java

示例11: testExtendCharBuffer

import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testExtendCharBuffer() throws IOException {
  for (int i = 0; i < 40; i++) {
    StringBuilder builder = new StringBuilder();
    for (int j = 0; j < 1+i; j++) {
      builder.append("a");
    }
    builder.append("\ud801\udc1cabc");
    Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString()));
    assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT)});
  }
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:12,代码来源:TestCharTokenizers.java

示例12: testMaxWordLength

import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testMaxWordLength() throws IOException {
  StringBuilder builder = new StringBuilder();

  for (int i = 0; i < 255; i++) {
    builder.append("A");
  }
  Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
  assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:10,代码来源:TestCharTokenizers.java

示例13: testMaxWordLengthWithSupplementary

import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testMaxWordLengthWithSupplementary() throws IOException {
  StringBuilder builder = new StringBuilder();

  for (int i = 0; i < 254; i++) {
    builder.append("A");
  }
  builder.append("\ud801\udc1c");
  Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
  assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:11,代码来源:TestCharTokenizers.java

示例14: testWithKeywordAttribute

import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testWithKeywordAttribute() throws IOException {
  CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
  set.add("fischen");
  GermanStemFilter filter = new GermanStemFilter(
      new SetKeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader( 
          "Fischen Trinken")), set));
  assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
}
 
开发者ID:jimaguere,项目名称:Maskana-Gestor-de-Conocimiento,代码行数:9,代码来源:TestGermanAnalyzer.java

示例15: testWithKeywordAttribute

import org.apache.lucene.analysis.core.LowerCaseTokenizer; //导入依赖的package包/类
public void testWithKeywordAttribute() throws IOException {
  CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
  set.add("Brasília");
  BrazilianStemFilter filter = new BrazilianStemFilter(
      new SetKeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(
          "Brasília Brasilia")), set));
  assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
}
 
开发者ID:jimaguere,项目名称:Maskana-Gestor-de-Conocimiento,代码行数:9,代码来源:TestBrazilianStemmer.java


注:本文中的org.apache.lucene.analysis.core.LowerCaseTokenizer类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。