当前位置: 首页>>代码示例>>Java>>正文


Java Util.toUTF32方法代码示例

本文整理汇总了Java中org.apache.lucene.util.fst.Util.toUTF32方法的典型用法代码示例。如果您正苦于以下问题:Java Util.toUTF32方法的具体用法?Java Util.toUTF32怎么用?Java Util.toUTF32使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.util.fst.Util的用法示例。


在下文中一共展示了Util.toUTF32方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: affixFST

import org.apache.lucene.util.fst.Util; //导入方法依赖的package包/类
private FST<IntsRef> affixFST(TreeMap<String,List<Integer>> affixes) throws IOException {
  IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
  Builder<IntsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, outputs);
  IntsRefBuilder scratch = new IntsRefBuilder();
  for (Map.Entry<String,List<Integer>> entry : affixes.entrySet()) {
    Util.toUTF32(entry.getKey(), scratch);
    List<Integer> entries = entry.getValue();
    IntsRef output = new IntsRef(entries.size());
    for (Integer c : entries) {
      output.ints[output.length++] = c;
    }
    builder.add(scratch.get(), output);
  }
  return builder.finish();
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:16,代码来源:Dictionary.java

示例2: testFiniteStringsEatsStack

import org.apache.lucene.util.fst.Util; //导入方法依赖的package包/类
public void testFiniteStringsEatsStack() {
  char[] chars = new char[50000];
  TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
  String bigString1 = new String(chars);
  TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
  String bigString2 = new String(chars);
  Automaton a = Operations.union(Automata.makeString(bigString1), Automata.makeString(bigString2));
  Set<IntsRef> strings = getFiniteStrings(a, -1, false);
  assertEquals(2, strings.size());
  IntsRefBuilder scratch = new IntsRefBuilder();
  Util.toUTF32(bigString1.toCharArray(), 0, bigString1.length(), scratch);
  assertTrue(strings.contains(scratch.get()));
  Util.toUTF32(bigString2.toCharArray(), 0, bigString2.length(), scratch);
  assertTrue(strings.contains(scratch.get()));
}
 
开发者ID:europeana,项目名称:search,代码行数:16,代码来源:TestOperations.java

示例3: testSingletonNoLimit

import org.apache.lucene.util.fst.Util; //导入方法依赖的package包/类
public void testSingletonNoLimit() {
  Set<IntsRef> result = Operations.getFiniteStrings(Automata.makeString("foobar"), -1);
  assertEquals(1, result.size());
  IntsRefBuilder scratch = new IntsRefBuilder();
  Util.toUTF32("foobar".toCharArray(), 0, 6, scratch);
  assertTrue(result.contains(scratch.get()));
}
 
开发者ID:europeana,项目名称:search,代码行数:8,代码来源:TestOperations.java

示例4: testSingletonLimit1

import org.apache.lucene.util.fst.Util; //导入方法依赖的package包/类
public void testSingletonLimit1() {
  Set<IntsRef> result = Operations.getFiniteStrings(Automata.makeString("foobar"), 1);
  assertEquals(1, result.size());
  IntsRefBuilder scratch = new IntsRefBuilder();
  Util.toUTF32("foobar".toCharArray(), 0, 6, scratch);
  assertTrue(result.contains(scratch.get()));
}
 
开发者ID:europeana,项目名称:search,代码行数:8,代码来源:TestOperations.java

示例5: matchToken

import org.apache.lucene.util.fst.Util; //导入方法依赖的package包/类
private FST.Arc<Long> matchToken(FST.Arc<Long> asc, CharsRef text) throws IOException {
	IntsRef intsRef = Util.toUTF32(text, scratchInfs);
	Long pendingOutput = asc.output;
	for (int intsRefIndex = 0; intsRefIndex < intsRef.length; ++intsRefIndex) {
		if (fst.findTargetArc(intsRef.ints[intsRef.offset + intsRefIndex], asc, asc, fstReader) == null) {
			return null;
		}
		pendingOutput = fst.outputs.add(pendingOutput, asc.output);
	}
	asc.output = pendingOutput;
	return asc;
}
 
开发者ID:thihy,项目名称:cc-analysis,代码行数:13,代码来源:CcWordsFilter.java

示例6: testRandomFiniteStrings1

import org.apache.lucene.util.fst.Util; //导入方法依赖的package包/类
public void testRandomFiniteStrings1() {

    int numStrings = atLeast(100);
    if (VERBOSE) {
      System.out.println("TEST: numStrings=" + numStrings);
    }

    Set<IntsRef> strings = new HashSet<IntsRef>();
    List<Automaton> automata = new ArrayList<>();
    IntsRefBuilder scratch = new IntsRefBuilder();
    for(int i=0;i<numStrings;i++) {
      String s = TestUtil.randomSimpleString(random(), 1, 200);
      automata.add(Automata.makeString(s));
      Util.toUTF32(s.toCharArray(), 0, s.length(), scratch);
      strings.add(scratch.toIntsRef());
      if (VERBOSE) {
        System.out.println("  add string=" + s);
      }
    }

    // TODO: we could sometimes use
    // DaciukMihovAutomatonBuilder here

    // TODO: what other random things can we do here...
    Automaton a = Operations.union(automata);
    if (random().nextBoolean()) {
      a = MinimizationOperations.minimize(a, 1000000);
      if (VERBOSE) {
        System.out.println("TEST: a.minimize numStates=" + a.getNumStates());
      }
    } else if (random().nextBoolean()) {
      if (VERBOSE) {
        System.out.println("TEST: a.determinize");
      }
      a = Operations.determinize(a, 1000000);
    } else if (random().nextBoolean()) {
      if (VERBOSE) {
        System.out.println("TEST: a.removeDeadStates");
      }
      a = Operations.removeDeadStates(a);
    }

    Set<IntsRef> actual = getFiniteStrings(a, -1, true);
    if (strings.equals(actual) == false) {
      System.out.println("strings.size()=" + strings.size() + " actual.size=" + actual.size());
      List<IntsRef> x = new ArrayList<>(strings);
      Collections.sort(x);
      List<IntsRef> y = new ArrayList<>(actual);
      Collections.sort(y);
      int end = Math.min(x.size(), y.size());
      for(int i=0;i<end;i++) {
        System.out.println("  i=" + i + " string=" + toString(x.get(i)) + " actual=" + toString(y.get(i)));
      }
      fail("wrong strings found");
    }
  }
 
开发者ID:europeana,项目名称:search,代码行数:57,代码来源:TestOperations.java


注:本文中的org.apache.lucene.util.fst.Util.toUTF32方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。