当前位置: 首页>>代码示例>>Java>>正文


Java WordDelimiterFilterFactory类代码示例

本文整理汇总了Java中org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory的典型用法代码示例。如果您正苦于以下问题:Java WordDelimiterFilterFactory类的具体用法?Java WordDelimiterFilterFactory怎么用?Java WordDelimiterFilterFactory使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


WordDelimiterFilterFactory类属于org.apache.lucene.analysis.miscellaneous包,在下文中一共展示了WordDelimiterFilterFactory类的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getSearchMapping

import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory; //导入依赖的package包/类
@Factory
public SearchMapping getSearchMapping() {
	SearchMapping mapping = new SearchMapping();

	mapping.analyzerDef("autocompleteEdgeAnalyzer", PatternTokenizerFactory.class)
			.tokenizerParam("pattern", "(.*)")
			.tokenizerParam("group", "1")
			.filter(LowerCaseFilterFactory.class)
			.filter(StopFilterFactory.class)
			.filter(EdgeNGramFilterFactory.class)
			.param("minGramSize", "3")
			.param("maxGramSize", "50")
		.analyzerDef("autocompletePhoneticAnalyzer", StandardTokenizerFactory.class)
			.filter(StandardFilterFactory.class)
			.filter(StopFilterFactory.class)
			.filter(PhoneticFilterFactory.class)
			.param("encoder", "DoubleMetaphone")
			.filter(SnowballPorterFilterFactory.class)
			.param("language", "English")
		.analyzerDef("autocompleteNGramAnalyzer", StandardTokenizerFactory.class)
			.filter(WordDelimiterFilterFactory.class)
			.filter(LowerCaseFilterFactory.class)
			.filter(NGramFilterFactory.class)
			.param("minGramSize", "3")
			.param("maxGramSize", "20")
		.analyzerDef("standardAnalyzer", StandardTokenizerFactory.class)
			.filter(LowerCaseFilterFactory.class)
		.analyzerDef("exactAnalyzer", StandardTokenizerFactory.class)
		.analyzerDef("conceptParentPidsAnalyzer", WhitespaceTokenizerFactory.class);

	return mapping;
}
 
开发者ID:jamesagnew,项目名称:hapi-fhir,代码行数:33,代码来源:LuceneSearchMappingFactory.java

示例2: IAViewTextGenAnalyser

import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory; //导入依赖的package包/类
/**
    * Creates a new tokenizer
    *
    */
   public IAViewTextGenAnalyser(SynonymFilterFactory synonymFilterFactory,
                                WordDelimiterFilterFactory wordDelimiterFilterFactory, AnalyzerType analyzerType) {
       this.synonymFilterFactory = synonymFilterFactory;
this.wordDelimiterFilterFactory = wordDelimiterFilterFactory;
this.analyzerType = analyzerType;
   }
 
开发者ID:nationalarchives,项目名称:taxonomy,代码行数:11,代码来源:IAViewTextGenAnalyser.java

示例3: IAViewTextCasNoPuncAnalyser

import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory; //导入依赖的package包/类
/**
    * Creates a new tokenizer
    *
    */
   public IAViewTextCasNoPuncAnalyser(SynonymFilterFactory synonymFilterFactory,
                                      WordDelimiterFilterFactory wordDelimiterFilterFactory, AnalyzerType analyzerType) {
       this.synonymFilterFactory = synonymFilterFactory;
this.wordDelimiterFilterFactory = wordDelimiterFilterFactory;
this.analyzerType = analyzerType;
   }
 
开发者ID:nationalarchives,项目名称:taxonomy,代码行数:11,代码来源:IAViewTextCasNoPuncAnalyser.java

示例4: IAViewTextNoCasNoPuncAnalyser

import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory; //导入依赖的package包/类
/**
    * Creates a new tokenizer
    *
    */
   public IAViewTextNoCasNoPuncAnalyser(SynonymFilterFactory synonymFilterFactory,
                                        WordDelimiterFilterFactory wordDelimiterFilterFactory, AnalyzerType analyzerType) {
       this.synonymFilterFactory = synonymFilterFactory;
this.wordDelimiterFilterFactory = wordDelimiterFilterFactory;
this.analyzerType = analyzerType;
   }
 
开发者ID:nationalarchives,项目名称:taxonomy,代码行数:11,代码来源:IAViewTextNoCasNoPuncAnalyser.java

示例5: registerWithPrefix

import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory; //导入依赖的package包/类
protected void registerWithPrefix(String prefix, LuceneAnalyzerDefinitionRegistryBuilder builder) {
	builder.analyzer(prefix + HibernateSearchAnalyzer.KEYWORD).tokenizer(KeywordTokenizerFactory.class);
	
	builder.analyzer(prefix + HibernateSearchAnalyzer.KEYWORD_CLEAN).tokenizer(KeywordTokenizerFactory.class)
		.tokenFilter(ASCIIFoldingFilterFactory.class)
		.tokenFilter(LowerCaseFilterFactory.class);
	
	builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT).tokenizer(WhitespaceTokenizerFactory.class)
			.tokenFilter(ASCIIFoldingFilterFactory.class)
			.tokenFilter(WordDelimiterFilterFactory.class)
					.param("generateWordParts", "1")
					.param("generateNumberParts", "1")
					.param("catenateWords", "0")
					.param("catenateNumbers", "0")
					.param("catenateAll", "0")
					.param("splitOnCaseChange", "0")
					.param("splitOnNumerics", "0")
					.param("preserveOriginal", "1")
			.tokenFilter(LowerCaseFilterFactory.class);
	
	builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT_STEMMING).tokenizer(WhitespaceTokenizerFactory.class)
			.tokenFilter(ASCIIFoldingFilterFactory.class)
			.tokenFilter(WordDelimiterFilterFactory.class)
					.param("generateWordParts", "1")
					.param("generateNumberParts", "1")
					.param("catenateWords", "0")
					.param("catenateNumbers", "0")
					.param("catenateAll", "0")
					.param("splitOnCaseChange", "0")
					.param("splitOnNumerics", "0")
					.param("preserveOriginal", "1")
			.tokenFilter(LowerCaseFilterFactory.class)
			.tokenFilter(CoreFrenchMinimalStemFilterFactory.class);
	
	builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT_SORT).tokenizer(KeywordTokenizerFactory.class)
			.tokenFilter(ASCIIFoldingFilterFactory.class)
			.tokenFilter(LowerCaseFilterFactory.class)
			.tokenFilter(PatternReplaceFilterFactory.class)
					.param("pattern", "('-&\\.,\\(\\))")
					.param("replacement", " ")
					.param("replace", "all")
			.tokenFilter(PatternReplaceFilterFactory.class)
					.param("pattern", "([^0-9\\p{L} ])")
					.param("replacement", "")
					.param("replace", "all")
			.tokenFilter(TrimFilterFactory.class);
	
}
 
开发者ID:openwide-java,项目名称:owsi-core-parent,代码行数:49,代码来源:CoreLuceneAnalyzersDefinitionProvider.java

示例6: testCustomTypes

import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory; //导入依赖的package包/类
@Test
public void testCustomTypes() throws Exception {
  String testText = "I borrowed $5,400.00 at 25% interest-rate";
  ResourceLoader loader = new SolrResourceLoader("solr/collection1");
  Map<String,String> args = new HashMap<>();
  args.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString());
  args.put("generateWordParts", "1");
  args.put("generateNumberParts", "1");
  args.put("catenateWords", "1");
  args.put("catenateNumbers", "1");
  args.put("catenateAll", "0");
  args.put("splitOnCaseChange", "1");
  
  /* default behavior */
  WordDelimiterFilterFactory factoryDefault = new WordDelimiterFilterFactory(args);
  factoryDefault.inform(loader);
  
  TokenStream ts = factoryDefault.create(
      new MockTokenizer(new StringReader(testText), MockTokenizer.WHITESPACE, false));
  BaseTokenStreamTestCase.assertTokenStreamContents(ts, 
      new String[] { "I", "borrowed", "5", "540000", "400", "00", "at", "25", "interest", "interestrate", "rate" });

  ts = factoryDefault.create(
      new MockTokenizer(new StringReader("foo\u200Dbar"), MockTokenizer.WHITESPACE, false));
  BaseTokenStreamTestCase.assertTokenStreamContents(ts, 
      new String[] { "foo", "foobar", "bar" });

  
  /* custom behavior */
  args = new HashMap<>();
  // use a custom type mapping
  args.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString());
  args.put("generateWordParts", "1");
  args.put("generateNumberParts", "1");
  args.put("catenateWords", "1");
  args.put("catenateNumbers", "1");
  args.put("catenateAll", "0");
  args.put("splitOnCaseChange", "1");
  args.put("types", "wdftypes.txt");
  WordDelimiterFilterFactory factoryCustom = new WordDelimiterFilterFactory(args);
  factoryCustom.inform(loader);
  
  ts = factoryCustom.create(
      new MockTokenizer(new StringReader(testText), MockTokenizer.WHITESPACE, false));
  BaseTokenStreamTestCase.assertTokenStreamContents(ts, 
      new String[] { "I", "borrowed", "$5,400.00", "at", "25%", "interest", "interestrate", "rate" });
  
  /* test custom behavior with a char > 0x7F, because we had to make a larger byte[] */
  ts = factoryCustom.create(
      new MockTokenizer(new StringReader("foo\u200Dbar"), MockTokenizer.WHITESPACE, false));
  BaseTokenStreamTestCase.assertTokenStreamContents(ts, 
      new String[] { "foo\u200Dbar" });
}
 
开发者ID:europeana,项目名称:search,代码行数:54,代码来源:TestWordDelimiterFilterFactory.java

示例7: testCustomTypes

import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory; //导入依赖的package包/类
@Test
public void testCustomTypes() throws Exception {
  String testText = "I borrowed $5,400.00 at 25% interest-rate";
  WordDelimiterFilterFactory factoryDefault = new WordDelimiterFilterFactory();
  ResourceLoader loader = new SolrResourceLoader("solr/collection1");
  Map<String,String> args = new HashMap<String,String>();
  args.put("generateWordParts", "1");
  args.put("generateNumberParts", "1");
  args.put("catenateWords", "1");
  args.put("catenateNumbers", "1");
  args.put("catenateAll", "0");
  args.put("splitOnCaseChange", "1");
  
  /* default behavior */
  factoryDefault.init(args);
  factoryDefault.inform(loader);
  
  TokenStream ts = factoryDefault.create(
      new MockTokenizer(new StringReader(testText), MockTokenizer.WHITESPACE, false));
  BaseTokenStreamTestCase.assertTokenStreamContents(ts, 
      new String[] { "I", "borrowed", "5", "400", "00", "540000", "at", "25", "interest", "rate", "interestrate" });

  ts = factoryDefault.create(
      new MockTokenizer(new StringReader("foo\u200Dbar"), MockTokenizer.WHITESPACE, false));
  BaseTokenStreamTestCase.assertTokenStreamContents(ts, 
      new String[] { "foo", "bar", "foobar" });

  
  /* custom behavior */
  WordDelimiterFilterFactory factoryCustom = new WordDelimiterFilterFactory();
  // use a custom type mapping
  args.put("types", "wdftypes.txt");
  factoryCustom.init(args);
  factoryCustom.inform(loader);
  
  ts = factoryCustom.create(
      new MockTokenizer(new StringReader(testText), MockTokenizer.WHITESPACE, false));
  BaseTokenStreamTestCase.assertTokenStreamContents(ts, 
      new String[] { "I", "borrowed", "$5,400.00", "at", "25%", "interest", "rate", "interestrate" });
  
  /* test custom behavior with a char > 0x7F, because we had to make a larger byte[] */
  ts = factoryCustom.create(
      new MockTokenizer(new StringReader("foo\u200Dbar"), MockTokenizer.WHITESPACE, false));
  BaseTokenStreamTestCase.assertTokenStreamContents(ts, 
      new String[] { "foo\u200Dbar" });
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:47,代码来源:TestWordDelimiterFilterFactory.java


注:本文中的org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。