当前位置: 首页>>代码示例>>Java>>正文


Java UnicodeUtil.newString方法代码示例

本文整理汇总了Java中org.apache.lucene.util.UnicodeUtil.newString方法的典型用法代码示例。如果您正苦于以下问题:Java UnicodeUtil.newString方法的具体用法?Java UnicodeUtil.newString怎么用?Java UnicodeUtil.newString使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.util.UnicodeUtil的用法示例。


在下文中一共展示了UnicodeUtil.newString方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: initAutomata

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance &&
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i, prefix);
      //System.out.println("compute automaton n=" + i);
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:19,代码来源:FuzzyTermsEnum.java

示例2: initAutomata

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance && 
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i);
      //System.out.println("compute automaton n=" + i);
      // constant prefix
      if (realPrefixLength > 0) {
        Automaton prefix = BasicAutomata.makeString(
          UnicodeUtil.newString(termText, 0, realPrefixLength));
        a = BasicOperations.concatenate(prefix, a);
      }
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:24,代码来源:FuzzyTermsEnum.java

示例3: LinearFuzzyTermsEnum

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/**
 * Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
 * length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity &gt;
 * <code>minSimilarity</code>.
 * <p>
 * After calling the constructor the enumeration is already pointing to the first 
 * valid term if such a term exists.
 *
 * @throws IOException If there is a low-level I/O error.
 */
public LinearFuzzyTermsEnum() throws IOException {
  super(terms.iterator(null));

  this.text = new int[termLength - realPrefixLength];
  System.arraycopy(termText, realPrefixLength, text, 0, text.length);
  final String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
  prefixBytesRef = new BytesRef(prefix);
  this.d = new int[this.text.length + 1];
  this.p = new int[this.text.length + 1];
  
  setInitialSeekTerm(prefixBytesRef);
}
 
开发者ID:europeana,项目名称:search,代码行数:23,代码来源:SlowFuzzyTermsEnum.java

示例4: inputToString

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
static String inputToString(int inputMode, IntsRef term, boolean isValidUnicode) {
  if (!isValidUnicode) {
    return term.toString();
  } else if (inputMode == 0) {
    // utf8
    return toBytesRef(term).utf8ToString() + " " + term;
  } else {
    // utf32
    return UnicodeUtil.newString(term.ints, term.offset, term.length) + " " + term;
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:12,代码来源:FSTTester.java

示例5: assertAutomaton

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
private void assertAutomaton(Automaton automaton) throws Exception {
  CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
  ByteRunAutomaton bra = new ByteRunAutomaton(automaton);
  final AutomatonTestUtil.RandomAcceptedStrings ras = new AutomatonTestUtil.RandomAcceptedStrings(automaton);
  
  int num = atLeast(1000);
  for (int i = 0; i < num; i++) {
    final String string;
    if (random().nextBoolean()) {
      // likely not accepted
      string = TestUtil.randomUnicodeString(random());
    } else {
      // will be accepted
      int[] codepoints = ras.getRandomAcceptedString(random());
      try {
        string = UnicodeUtil.newString(codepoints, 0, codepoints.length);
      } catch (Exception e) {
        System.out.println(codepoints.length + " codepoints:");
        for(int j=0;j<codepoints.length;j++) {
          System.out.println("  " + Integer.toHexString(codepoints[j]));
        }
        throw e;
      }
    }
    byte bytes[] = string.getBytes(StandardCharsets.UTF_8);
    assertEquals(cra.run(string), bra.run(bytes, 0, bytes.length));
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:29,代码来源:TestUTF32ToUTF8.java

示例6: assertAutomaton

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
private void assertAutomaton(Automaton automaton) throws Exception {
  CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
  ByteRunAutomaton bra = new ByteRunAutomaton(automaton);
  final AutomatonTestUtil.RandomAcceptedStrings ras = new AutomatonTestUtil.RandomAcceptedStrings(automaton);
  
  int num = atLeast(1000);
  for (int i = 0; i < num; i++) {
    final String string;
    if (random().nextBoolean()) {
      // likely not accepted
      string = _TestUtil.randomUnicodeString(random());
    } else {
      // will be accepted
      int[] codepoints = ras.getRandomAcceptedString(random());
      try {
        string = UnicodeUtil.newString(codepoints, 0, codepoints.length);
      } catch (Exception e) {
        System.out.println(codepoints.length + " codepoints:");
        for(int j=0;j<codepoints.length;j++) {
          System.out.println("  " + Integer.toHexString(codepoints[j]));
        }
        throw e;
      }
    }
    byte bytes[] = string.getBytes("UTF-8");
    assertEquals(cra.run(string), bra.run(bytes, 0, bytes.length));
  }
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:29,代码来源:TestUTF32ToUTF8.java

示例7: testNonBMPChar

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
public void testNonBMPChar() throws Exception {
  CharFilter cs = new MappingCharFilter( normMap, new StringReader( UnicodeUtil.newString(new int[] {0x1D122}, 0, 1) ) );
  TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
  assertTokenStreamContents(ts, new String[]{"fclef"}, new int[]{0}, new int[]{2}, 2);
}
 
开发者ID:europeana,项目名称:search,代码行数:6,代码来源:TestMappingCharFilter.java


注:本文中的org.apache.lucene.util.UnicodeUtil.newString方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。