本文整理汇总了Java中org.apache.lucene.util.UnicodeUtil.newString方法的典型用法代码示例。如果您正苦于以下问题:Java UnicodeUtil.newString方法的具体用法?Java UnicodeUtil.newString怎么用?Java UnicodeUtil.newString使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.util.UnicodeUtil
的用法示例。
在下文中一共展示了UnicodeUtil.newString方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: initAutomata
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
//System.out.println("cached automata size: " + runAutomata.size());
if (runAutomata.size() <= maxDistance &&
maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
LevenshteinAutomata builder =
new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);
String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
for (int i = runAutomata.size(); i <= maxDistance; i++) {
Automaton a = builder.toAutomaton(i, prefix);
//System.out.println("compute automaton n=" + i);
runAutomata.add(new CompiledAutomaton(a, true, false));
}
}
return runAutomata;
}
示例2: initAutomata
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
//System.out.println("cached automata size: " + runAutomata.size());
if (runAutomata.size() <= maxDistance &&
maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
LevenshteinAutomata builder =
new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);
for (int i = runAutomata.size(); i <= maxDistance; i++) {
Automaton a = builder.toAutomaton(i);
//System.out.println("compute automaton n=" + i);
// constant prefix
if (realPrefixLength > 0) {
Automaton prefix = BasicAutomata.makeString(
UnicodeUtil.newString(termText, 0, realPrefixLength));
a = BasicOperations.concatenate(prefix, a);
}
runAutomata.add(new CompiledAutomaton(a, true, false));
}
}
return runAutomata;
}
示例3: LinearFuzzyTermsEnum
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/**
* Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
* length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity >
* <code>minSimilarity</code>.
* <p>
* After calling the constructor the enumeration is already pointing to the first
* valid term if such a term exists.
*
* @throws IOException If there is a low-level I/O error.
*/
public LinearFuzzyTermsEnum() throws IOException {
super(terms.iterator(null));
this.text = new int[termLength - realPrefixLength];
System.arraycopy(termText, realPrefixLength, text, 0, text.length);
final String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
prefixBytesRef = new BytesRef(prefix);
this.d = new int[this.text.length + 1];
this.p = new int[this.text.length + 1];
setInitialSeekTerm(prefixBytesRef);
}
示例4: inputToString
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
static String inputToString(int inputMode, IntsRef term, boolean isValidUnicode) {
if (!isValidUnicode) {
return term.toString();
} else if (inputMode == 0) {
// utf8
return toBytesRef(term).utf8ToString() + " " + term;
} else {
// utf32
return UnicodeUtil.newString(term.ints, term.offset, term.length) + " " + term;
}
}
示例5: assertAutomaton
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
private void assertAutomaton(Automaton automaton) throws Exception {
CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
ByteRunAutomaton bra = new ByteRunAutomaton(automaton);
final AutomatonTestUtil.RandomAcceptedStrings ras = new AutomatonTestUtil.RandomAcceptedStrings(automaton);
int num = atLeast(1000);
for (int i = 0; i < num; i++) {
final String string;
if (random().nextBoolean()) {
// likely not accepted
string = TestUtil.randomUnicodeString(random());
} else {
// will be accepted
int[] codepoints = ras.getRandomAcceptedString(random());
try {
string = UnicodeUtil.newString(codepoints, 0, codepoints.length);
} catch (Exception e) {
System.out.println(codepoints.length + " codepoints:");
for(int j=0;j<codepoints.length;j++) {
System.out.println(" " + Integer.toHexString(codepoints[j]));
}
throw e;
}
}
byte bytes[] = string.getBytes(StandardCharsets.UTF_8);
assertEquals(cra.run(string), bra.run(bytes, 0, bytes.length));
}
}
示例6: assertAutomaton
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
private void assertAutomaton(Automaton automaton) throws Exception {
CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
ByteRunAutomaton bra = new ByteRunAutomaton(automaton);
final AutomatonTestUtil.RandomAcceptedStrings ras = new AutomatonTestUtil.RandomAcceptedStrings(automaton);
int num = atLeast(1000);
for (int i = 0; i < num; i++) {
final String string;
if (random().nextBoolean()) {
// likely not accepted
string = _TestUtil.randomUnicodeString(random());
} else {
// will be accepted
int[] codepoints = ras.getRandomAcceptedString(random());
try {
string = UnicodeUtil.newString(codepoints, 0, codepoints.length);
} catch (Exception e) {
System.out.println(codepoints.length + " codepoints:");
for(int j=0;j<codepoints.length;j++) {
System.out.println(" " + Integer.toHexString(codepoints[j]));
}
throw e;
}
}
byte bytes[] = string.getBytes("UTF-8");
assertEquals(cra.run(string), bra.run(bytes, 0, bytes.length));
}
}
示例7: testNonBMPChar
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
public void testNonBMPChar() throws Exception {
CharFilter cs = new MappingCharFilter( normMap, new StringReader( UnicodeUtil.newString(new int[] {0x1D122}, 0, 1) ) );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"fclef"}, new int[]{0}, new int[]{2}, 2);
}