当前位置: 首页>>代码示例>>Java>>正文


Java UAX29URLEmailTokenizer类代码示例

本文整理汇总了Java中org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer的典型用法代码示例。如果您正苦于以下问题:Java UAX29URLEmailTokenizer类的具体用法?Java UAX29URLEmailTokenizer怎么用?Java UAX29URLEmailTokenizer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


UAX29URLEmailTokenizer类属于org.apache.lucene.analysis.standard包,在下文中一共展示了UAX29URLEmailTokenizer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testCombiningMarksBackwards

import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
/** @deprecated remove this and sophisticated backwards layer in 5.0 */
@Deprecated
public void testCombiningMarksBackwards() throws Exception {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents
      (String fieldName, Reader reader) {

      Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_3_1, reader);
      return new TokenStreamComponents(tokenizer);
    }
  };
  checkOneTerm(a, "ざ", "さ"); // hiragana Bug
  checkOneTerm(a, "ザ", "ザ"); // katakana Works
  checkOneTerm(a, "壹゙", "壹"); // ideographic Bug
  checkOneTerm(a, "아゙",  "아゙"); // hangul Works
}
 
开发者ID:europeana,项目名称:search,代码行数:18,代码来源:TestUAX29URLEmailTokenizer.java

示例2: testCombiningMarksBackwards

import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
/** @deprecated remove this and sophisticated backwards layer in 5.0 */
@Deprecated
public void testCombiningMarksBackwards() throws Exception {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents
      (String fieldName, Reader reader) {

      Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_31, reader);
      return new TokenStreamComponents(tokenizer);
    }
  };
  checkOneTerm(a, "ざ", "さ"); // hiragana Bug
  checkOneTerm(a, "ザ", "ザ"); // katakana Works
  checkOneTerm(a, "壹゙", "壹"); // ideographic Bug
  checkOneTerm(a, "아゙",  "아゙"); // hangul Works
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:18,代码来源:TestUAX29URLEmailTokenizer.java

示例3: testLongEMAILatomText

import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
public void testLongEMAILatomText() throws Exception {
  // EMAILatomText = [A-Za-z0-9!#$%&'*+-/=?\^_`{|}~]
  char[] emailAtomChars
      = "!#$%&'*+,-./0123456789=?ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz{|}~".toCharArray();
  StringBuilder builder = new StringBuilder();
  int numChars = TestUtil.nextInt(random(), 100 * 1024, 3 * 1024 * 1024);
  for (int i = 0 ; i < numChars ; ++i) {
    builder.append(emailAtomChars[random().nextInt(emailAtomChars.length)]);
  }
  int tokenCount = 0;
  String text = builder.toString();
  UAX29URLEmailTokenizer ts = new UAX29URLEmailTokenizer(new StringReader(text));
  ts.reset();
  while (ts.incrementToken()) {
    tokenCount++;
  }
  ts.end();
  ts.close();
  assertTrue(tokenCount > 0);

  tokenCount = 0;
  int newBufferSize = TestUtil.nextInt(random(), 200, 8192);
  ts.setMaxTokenLength(newBufferSize);
  ts.setReader(new StringReader(text));
  ts.reset();
  while (ts.incrementToken()) {
    tokenCount++;
  }
  ts.end();
  ts.close();
  assertTrue(tokenCount > 0);
}
 
开发者ID:europeana,项目名称:search,代码行数:33,代码来源:TestUAX29URLEmailTokenizer.java

示例4: testHugeDoc

import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
public void testHugeDoc() throws IOException {
  StringBuilder sb = new StringBuilder();
  char whitespace[] = new char[4094];
  Arrays.fill(whitespace, ' ');
  sb.append(whitespace);
  sb.append("testing 1234");
  String input = sb.toString();
  UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(newAttributeFactory(), new StringReader(input));
  BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
}
 
开发者ID:europeana,项目名称:search,代码行数:11,代码来源:TestUAX29URLEmailTokenizer.java

示例5: UAX29URLEmailTokenizer

import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents
  (String fieldName, Reader reader) {

  Tokenizer tokenizer = new UAX29URLEmailTokenizer(newAttributeFactory(), reader);
  return new TokenStreamComponents(tokenizer);
}
 
开发者ID:europeana,项目名称:search,代码行数:8,代码来源:TestUAX29URLEmailTokenizer.java

示例6: incrementToken

import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
@Override
public final boolean incrementToken() throws java.io.IOException {
  boolean isTokenAvailable = false;
  while (input.incrementToken()) {
    if (typeAtt.type() == UAX29URLEmailTokenizer.TOKEN_TYPES[UAX29URLEmailTokenizer.URL]) {
      isTokenAvailable = true;
      break;
    }
  }
  return isTokenAvailable;
}
 
开发者ID:europeana,项目名称:search,代码行数:12,代码来源:TestUAX29URLEmailTokenizer.java

示例7: createComponents

import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
  UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(newAttributeFactory(), reader);
  tokenizer.setMaxTokenLength(Integer.MAX_VALUE);  // Tokenize arbitrary length URLs
  TokenFilter filter = new URLFilter(tokenizer);
  return new TokenStreamComponents(tokenizer, filter);
}
 
开发者ID:europeana,项目名称:search,代码行数:8,代码来源:TestUAX29URLEmailTokenizer.java

示例8: testMailtoBackwards

import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
/** @deprecated remove this and sophisticated backwards layer in 5.0 */
@Deprecated
public void testMailtoBackwards()  throws Exception {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_3_4, reader);
      return new TokenStreamComponents(tokenizer);
    }
  };
  assertAnalyzesTo(a, "mailto:[email protected]",
      new String[] { "mailto:test", "example.org" });
}
 
开发者ID:europeana,项目名称:search,代码行数:14,代码来源:TestUAX29URLEmailTokenizer.java

示例9: testVersion36

import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
/** @deprecated uses older unicode (6.0). simple test to make sure its basically working */
@Deprecated
public void testVersion36() throws Exception {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_3_6, reader);
      return new TokenStreamComponents(tokenizer);
    }
  };
  assertAnalyzesTo(a, "this is just a t\u08E6st [email protected]", // new combining mark in 6.1
      new String[] { "this", "is", "just", "a", "t", "st", "[email protected]" });
}
 
开发者ID:europeana,项目名称:search,代码行数:14,代码来源:TestUAX29URLEmailTokenizer.java

示例10: testVersion40

import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
/** @deprecated uses older unicode (6.1). simple test to make sure its basically working */
@Deprecated
public void testVersion40() throws Exception {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_4_0, reader);
      return new TokenStreamComponents(tokenizer);
    }
  };
  // U+061C is a new combining mark in 6.3, found using "[[\p{WB:Format}\p{WB:Extend}]&[^\p{Age:6.2}]]"
  // on the online UnicodeSet utility: <http://unicode.org/cldr/utility/list-unicodeset.jsp>
  assertAnalyzesTo(a, "this is just a t\u061Cst [email protected]",
      new String[] { "this", "is", "just", "a", "t", "st", "[email protected]" });
}
 
开发者ID:europeana,项目名称:search,代码行数:16,代码来源:TestUAX29URLEmailTokenizer.java

示例11: testHugeDoc

import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
public void testHugeDoc() throws IOException {
  StringBuilder sb = new StringBuilder();
  char whitespace[] = new char[4094];
  Arrays.fill(whitespace, ' ');
  sb.append(whitespace);
  sb.append("testing 1234");
  String input = sb.toString();
  UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
  BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:11,代码来源:TestUAX29URLEmailTokenizer.java

示例12: UAX29URLEmailTokenizer

import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents
  (String fieldName, Reader reader) {

  Tokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader);
  return new TokenStreamComponents(tokenizer);
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:8,代码来源:TestUAX29URLEmailTokenizer.java

示例13: createComponents

import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
  UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader);
  tokenizer.setMaxTokenLength(Integer.MAX_VALUE);  // Tokenize arbitrary length URLs
  TokenFilter filter = new URLFilter(tokenizer);
  return new TokenStreamComponents(tokenizer, filter);
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:8,代码来源:TestUAX29URLEmailTokenizer.java

示例14: testMailtoBackwards

import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
/** @deprecated remove this and sophisticated backwards layer in 5.0 */
@Deprecated
public void testMailtoBackwards()  throws Exception {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_34, reader);
      return new TokenStreamComponents(tokenizer);
    }
  };
  assertAnalyzesTo(a, "mailto:[email protected]",
      new String[] { "mailto:test", "example.org" });
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:14,代码来源:TestUAX29URLEmailTokenizer.java

示例15: testVersion36

import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; //导入依赖的package包/类
/** @deprecated uses older unicode (6.0). simple test to make sure its basically working */
@Deprecated
public void testVersion36() throws Exception {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_36, reader);
      return new TokenStreamComponents(tokenizer);
    }
  };
  assertAnalyzesTo(a, "this is just a t\u08E6st [email protected]", // new combining mark in 6.1
      new String[] { "this", "is", "just", "a", "t", "st", "[email protected]" });
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:14,代码来源:TestUAX29URLEmailTokenizer.java


注:本文中的org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。