当前位置: 首页>>代码示例>>Java>>正文


Java HyphenationTree类代码示例

本文整理汇总了Java中org.apache.lucene.analysis.compound.hyphenation.HyphenationTree的典型用法代码示例。如果您正苦于以下问题:Java HyphenationTree类的具体用法?Java HyphenationTree怎么用?Java HyphenationTree使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


HyphenationTree类属于org.apache.lucene.analysis.compound.hyphenation包,在下文中一共展示了HyphenationTree类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testHyphenationCompoundWordsDA

import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree; //导入依赖的package包/类
public void testHyphenationCompoundWordsDA() throws Exception {
  CharArraySet dict = makeDictionary("læse", "hest");

  InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
  HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
      .getHyphenationTree(is);

  HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
      new MockTokenizer(new StringReader("min veninde som er lidt af en læsehest"), MockTokenizer.WHITESPACE, false), 
      hyphenator,
      dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
  assertTokenStreamContents(tf, 
      new String[] { "min", "veninde", "som", "er", "lidt", "af", "en", "læsehest", "læse", "hest" },
      new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 }
  );
}
 
开发者ID:europeana,项目名称:search,代码行数:19,代码来源:TestCompoundWordTokenFilter.java

示例2: testHyphenationCompoundWordsDELongestMatch

import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree; //导入依赖的package包/类
public void testHyphenationCompoundWordsDELongestMatch() throws Exception {
  CharArraySet dict = makeDictionary("basketball", "basket", "ball", "kurv");

  InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
  HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
      .getHyphenationTree(is);

  // the word basket will not be added due to the longest match option
  HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
      new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), 
      hyphenator, dict,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
  assertTokenStreamContents(tf, 
      new String[] { "basketballkurv", "basketball", "ball", "kurv" },
      new int[] { 1, 0, 0, 0 }
  );

}
 
开发者ID:europeana,项目名称:search,代码行数:20,代码来源:TestCompoundWordTokenFilter.java

示例3: testHyphenationCompoundWordsDA

import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree; //导入依赖的package包/类
public void testHyphenationCompoundWordsDA() throws Exception {
  CharArraySet dict = makeDictionary("læse", "hest");

  InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
  HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
      .getHyphenationTree(is);

  HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, 
      new MockTokenizer(new StringReader("min veninde som er lidt af en læsehest"), MockTokenizer.WHITESPACE, false), 
      hyphenator,
      dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
  assertTokenStreamContents(tf, 
      new String[] { "min", "veninde", "som", "er", "lidt", "af", "en", "læsehest", "læse", "hest" },
      new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 }
  );
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:19,代码来源:TestCompoundWordTokenFilter.java

示例4: testHyphenationCompoundWordsDELongestMatch

import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree; //导入依赖的package包/类
public void testHyphenationCompoundWordsDELongestMatch() throws Exception {
  CharArraySet dict = makeDictionary("basketball", "basket", "ball", "kurv");

  InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
  HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
      .getHyphenationTree(is);

  // the word basket will not be added due to the longest match option
  HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, 
      new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), 
      hyphenator, dict,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
  assertTokenStreamContents(tf, 
      new String[] { "basketballkurv", "basketball", "ball", "kurv" },
      new int[] { 1, 0, 0, 0 }
  );

}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:20,代码来源:TestCompoundWordTokenFilter.java

示例5: HyphenationCompoundWordTokenFilter

import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree; //导入依赖的package包/类
/**
 * @deprecated Use {@link #HyphenationCompoundWordTokenFilter(TokenStream,HyphenationTree,CharArraySet)}
 */
@Deprecated
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
    HyphenationTree hyphenator, CharArraySet dictionary) {
  this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
      DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false);
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:10,代码来源:HyphenationCompoundWordTokenFilter.java

示例6: create

import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree; //导入依赖的package包/类
@Override public Object create(Random random) {
  // TODO: make nastier
  try {
    InputSource is = new InputSource(TestCompoundWordTokenFilter.class.getResource("da_UTF8.xml").toExternalForm());
    HyphenationTree hyphenator = Lucene43HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
    return hyphenator;
  } catch (Exception ex) {
    Rethrow.rethrow(ex);
    return null; // unreachable code
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:12,代码来源:TestRandomChains.java

示例7: create

import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree; //导入依赖的package包/类
@Override public Object create(Random random) {
  // TODO: make nastier
  try {
    InputSource is = new InputSource(TestCompoundWordTokenFilter.class.getResource("da_UTF8.xml").toExternalForm());
    HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
    return hyphenator;
  } catch (Exception ex) {
    Rethrow.rethrow(ex);
    return null; // unreachable code
  }
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:12,代码来源:TestRandomChains.java

示例8: testHyphenationOnly

import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree; //导入依赖的package包/类
/**
 * With hyphenation-only, you can get a lot of nonsense tokens.
 * This can be controlled with the min/max subword size.
 */
public void testHyphenationOnly() throws Exception {
  InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
  HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
      .getHyphenationTree(is);
  
  HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(

      new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
      hyphenator,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      2, 4);
  
  // min=2, max=4
  assertTokenStreamContents(tf,
      new String[] { "basketballkurv", "ba", "sket", "bal", "ball", "kurv" }
  );
  
  tf = new HyphenationCompoundWordTokenFilter(

      new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
      hyphenator,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      4, 6);
  
  // min=4, max=6
  assertTokenStreamContents(tf,
      new String[] { "basketballkurv", "basket", "sket", "ball", "lkurv", "kurv" }
  );
  
  tf = new HyphenationCompoundWordTokenFilter(

      new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
      hyphenator,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      4, 10);
  
  // min=4, max=10
  assertTokenStreamContents(tf,
      new String[] { "basketballkurv", "basket", "basketbal", "basketball", "sket", 
                     "sketbal", "sketball", "ball", "ballkurv", "lkurv", "kurv" }
  );
  
}
 
开发者ID:europeana,项目名称:search,代码行数:48,代码来源:TestCompoundWordTokenFilter.java

示例9: testHyphenationOnly

import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree; //导入依赖的package包/类
/**
 * With hyphenation-only, you can get a lot of nonsense tokens.
 * This can be controlled with the min/max subword size.
 */
public void testHyphenationOnly() throws Exception {
  InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
  HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
      .getHyphenationTree(is);
  
  HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
      TEST_VERSION_CURRENT,
      new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
      hyphenator,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      2, 4);
  
  // min=2, max=4
  assertTokenStreamContents(tf,
      new String[] { "basketballkurv", "ba", "sket", "bal", "ball", "kurv" }
  );
  
  tf = new HyphenationCompoundWordTokenFilter(
      TEST_VERSION_CURRENT,
      new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
      hyphenator,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      4, 6);
  
  // min=4, max=6
  assertTokenStreamContents(tf,
      new String[] { "basketballkurv", "basket", "sket", "ball", "lkurv", "kurv" }
  );
  
  tf = new HyphenationCompoundWordTokenFilter(
      TEST_VERSION_CURRENT,
      new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
      hyphenator,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      4, 10);
  
  // min=4, max=10
  assertTokenStreamContents(tf,
      new String[] { "basketballkurv", "basket", "basketbal", "basketball", "sket", 
                     "sketbal", "sketball", "ball", "ballkurv", "lkurv", "kurv" }
  );
  
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:48,代码来源:TestCompoundWordTokenFilter.java

示例10: Lucene43HyphenationCompoundWordTokenFilter

import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree; //导入依赖的package包/类
/**
 * Creates a new {@link Lucene43HyphenationCompoundWordTokenFilter} instance.
 *
 * @param input
 *          the {@link TokenStream} to process
 * @param hyphenator
 *          the hyphenation pattern tree to use for hyphenation
 * @param dictionary
 *          the word dictionary to match against.
 * @param minWordSize
 *          only words longer than this get processed
 * @param minSubwordSize
 *          only subwords longer than this get to the output stream
 * @param maxSubwordSize
 *          only subwords shorter than this get to the output stream
 * @param onlyLongestMatch
 *          Add only the longest matching subword to the stream
 */
public Lucene43HyphenationCompoundWordTokenFilter(TokenStream input,
                                                  HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize,
                                                  int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
  super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
      onlyLongestMatch);

  this.hyphenator = hyphenator;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:27,代码来源:Lucene43HyphenationCompoundWordTokenFilter.java

示例11: getHyphenationTree

import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree; //导入依赖的package包/类
/**
 * Create a hyphenator tree
 * 
 * @param hyphenationSource the InputSource pointing to the XML grammar
 * @return An object representing the hyphenation patterns
 * @throws IOException If there is a low-level I/O error.
 */
public static HyphenationTree getHyphenationTree(InputSource hyphenationSource)
    throws IOException {
  HyphenationTree tree = new HyphenationTree();
  tree.loadPatterns(hyphenationSource);
  return tree;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:14,代码来源:Lucene43HyphenationCompoundWordTokenFilter.java

示例12: getHyphenationTree

import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree; //导入依赖的package包/类
/**
 * Create a hyphenator tree
 *
 * @param hyphenationSource the InputSource pointing to the XML grammar
 * @return An object representing the hyphenation patterns
 * @throws IOException If there is a low-level I/O error.
 */
public static HyphenationTree getHyphenationTree(InputSource hyphenationSource)
    throws IOException {
  HyphenationTree tree = new HyphenationTree();
  tree.loadPatterns(hyphenationSource);
  return tree;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:14,代码来源:HyphenationCompoundWordTokenFilter.java

示例13: HyphenationCompoundWordTokenFilter

import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree; //导入依赖的package包/类
/**
 * Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
 * 
 * @param matchVersion
 *          Lucene version to enable correct Unicode 4.0 behavior in the
 *          dictionaries if Version > 3.0. See <a
 *          href="CompoundWordTokenFilterBase.html#version"
 *          >CompoundWordTokenFilterBase</a> for details.
 * @param input
 *          the {@link TokenStream} to process
 * @param hyphenator
 *          the hyphenation pattern tree to use for hyphenation
 * @param dictionary
 *          the word dictionary to match against.
 * @param minWordSize
 *          only words longer than this get processed
 * @param minSubwordSize
 *          only subwords longer than this get to the output stream
 * @param maxSubwordSize
 *          only subwords shorter than this get to the output stream
 * @param onlyLongestMatch
 *          Add only the longest matching subword to the stream
 */
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
    HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize,
    int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
  super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
      onlyLongestMatch);

  this.hyphenator = hyphenator;
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:32,代码来源:HyphenationCompoundWordTokenFilter.java


注:本文中的org.apache.lucene.analysis.compound.hyphenation.HyphenationTree类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。