当前位置: 首页>>代码示例>>Java>>正文


Java ASCIIFoldingFilterFactory类代码示例

本文整理汇总了Java中org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory的典型用法代码示例。如果您正苦于以下问题:Java ASCIIFoldingFilterFactory类的具体用法?Java ASCIIFoldingFilterFactory怎么用?Java ASCIIFoldingFilterFactory使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


ASCIIFoldingFilterFactory类属于org.apache.lucene.analysis.miscellaneous包,在下文中一共展示了ASCIIFoldingFilterFactory类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testDefaultCopiedToMulti

import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory; //导入依赖的package包/类
@Test
public void testDefaultCopiedToMulti() {
  SchemaField field = h.getCore().getLatestSchema().getField("content_ws");
  Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer();
  assertTrue(analyzer instanceof TokenizerChain);
  assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory);
  TokenizerChain tc = (TokenizerChain) analyzer;
  for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
    assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
  }

  assertTrue(tc.getCharFilterFactories() == null);

}
 
开发者ID:europeana,项目名称:search,代码行数:15,代码来源:MultiTermTest.java

示例2: testASCIIFolding

import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory; //导入依赖的package包/类
/**
 * Ensure the ASCIIFoldingFilterFactory works
 */
public void testASCIIFolding() throws Exception {
  Reader reader = new StringReader("Česká");
  Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  ASCIIFoldingFilterFactory factory = new ASCIIFoldingFilterFactory();
  factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
  Map<String, String> args = Collections.emptyMap();
  factory.init(args);
  TokenStream stream = factory.create(tokenizer);
  assertTokenStreamContents(stream, new String[] { "Ceska" });
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:14,代码来源:TestStandardFactories.java

示例3: testDefaultCopiedToMulti

import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory; //导入依赖的package包/类
@Test
public void testDefaultCopiedToMulti() {
  SchemaField field = h.getCore().getSchema().getField("content_ws");
  Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer();
  assertTrue(analyzer instanceof TokenizerChain);
  assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory);
  TokenizerChain tc = (TokenizerChain) analyzer;
  for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
    assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
  }

  assertTrue(tc.getCharFilterFactories() == null);

}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:15,代码来源:MultiTermTest.java

示例4: registerWithPrefix

import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory; //导入依赖的package包/类
protected void registerWithPrefix(String prefix, LuceneAnalyzerDefinitionRegistryBuilder builder) {
	builder.analyzer(prefix + HibernateSearchAnalyzer.KEYWORD).tokenizer(KeywordTokenizerFactory.class);
	
	builder.analyzer(prefix + HibernateSearchAnalyzer.KEYWORD_CLEAN).tokenizer(KeywordTokenizerFactory.class)
		.tokenFilter(ASCIIFoldingFilterFactory.class)
		.tokenFilter(LowerCaseFilterFactory.class);
	
	builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT).tokenizer(WhitespaceTokenizerFactory.class)
			.tokenFilter(ASCIIFoldingFilterFactory.class)
			.tokenFilter(WordDelimiterFilterFactory.class)
					.param("generateWordParts", "1")
					.param("generateNumberParts", "1")
					.param("catenateWords", "0")
					.param("catenateNumbers", "0")
					.param("catenateAll", "0")
					.param("splitOnCaseChange", "0")
					.param("splitOnNumerics", "0")
					.param("preserveOriginal", "1")
			.tokenFilter(LowerCaseFilterFactory.class);
	
	builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT_STEMMING).tokenizer(WhitespaceTokenizerFactory.class)
			.tokenFilter(ASCIIFoldingFilterFactory.class)
			.tokenFilter(WordDelimiterFilterFactory.class)
					.param("generateWordParts", "1")
					.param("generateNumberParts", "1")
					.param("catenateWords", "0")
					.param("catenateNumbers", "0")
					.param("catenateAll", "0")
					.param("splitOnCaseChange", "0")
					.param("splitOnNumerics", "0")
					.param("preserveOriginal", "1")
			.tokenFilter(LowerCaseFilterFactory.class)
			.tokenFilter(CoreFrenchMinimalStemFilterFactory.class);
	
	builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT_SORT).tokenizer(KeywordTokenizerFactory.class)
			.tokenFilter(ASCIIFoldingFilterFactory.class)
			.tokenFilter(LowerCaseFilterFactory.class)
			.tokenFilter(PatternReplaceFilterFactory.class)
					.param("pattern", "('-&\\.,\\(\\))")
					.param("replacement", " ")
					.param("replace", "all")
			.tokenFilter(PatternReplaceFilterFactory.class)
					.param("pattern", "([^0-9\\p{L} ])")
					.param("replacement", "")
					.param("replace", "all")
			.tokenFilter(TrimFilterFactory.class);
	
}
 
开发者ID:openwide-java,项目名称:owsi-core-parent,代码行数:49,代码来源:CoreLuceneAnalyzersDefinitionProvider.java


注:本文中的org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。