本文整理汇总了Java中org.apache.lucene.util.UnicodeUtil.UTF8toUTF32方法的典型用法代码示例。如果您正苦于以下问题:Java UnicodeUtil.UTF8toUTF32方法的具体用法?Java UnicodeUtil.UTF8toUTF32怎么用?Java UnicodeUtil.UTF8toUTF32使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.util.UnicodeUtil
的用法示例。
在下文中一共展示了UnicodeUtil.UTF8toUTF32方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: build
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/**
* Returns an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
* @return an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
* @throws IOException if an {@link IOException} occurs;
*/
public StemmerOverrideMap build() throws IOException {
ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
org.apache.lucene.util.fst.Builder<BytesRef> builder = new org.apache.lucene.util.fst.Builder<BytesRef>(
FST.INPUT_TYPE.BYTE4, outputs);
final int[] sort = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
IntsRef intsSpare = new IntsRef();
final int size = hash.size();
for (int i = 0; i < size; i++) {
int id = sort[i];
BytesRef bytesRef = hash.get(id, spare);
UnicodeUtil.UTF8toUTF32(bytesRef, intsSpare);
builder.add(intsSpare, new BytesRef(outputValues.get(id)));
}
return new StemmerOverrideMap(builder.finish(), ignoreCase);
}
示例2: accept
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/**
* The termCompare method in FuzzyTermEnum uses Levenshtein distance to
* calculate the distance between the given term and the comparing term.
*/
@Override
protected final AcceptStatus accept(BytesRef term) {
if (StringHelper.startsWith(term, prefixBytesRef)) {
UnicodeUtil.UTF8toUTF32(term, utf32);
final float similarity = similarity(utf32.ints, realPrefixLength, utf32.length - realPrefixLength);
if (similarity > minSimilarity) {
boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
return AcceptStatus.YES;
} else return AcceptStatus.NO;
} else {
return AcceptStatus.END;
}
}
示例3: accept
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/**
* <p>The termCompare method in FuzzyTermEnum uses Levenshtein distance to
* calculate the distance between the given term and the comparing term.
* </p>
* <p>If the minSimilarity is >= 1.0, this uses the maxEdits as the comparison.
* Otherwise, this method uses the following logic to calculate similarity.
* <pre>
* similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen)));
* </pre>
* where distance is the Levenshtein distance for the two words.
* </p>
*
*/
@Override
protected final AcceptStatus accept(BytesRef term) {
if (StringHelper.startsWith(term, prefixBytesRef)) {
UnicodeUtil.UTF8toUTF32(term, utf32);
final int distance = calcDistance(utf32.ints, realPrefixLength, utf32.length - realPrefixLength);
//Integer.MIN_VALUE is the sentinel that Levenshtein stopped early
if (distance == Integer.MIN_VALUE){
return AcceptStatus.NO;
}
//no need to calc similarity, if raw is true and distance > maxEdits
if (raw == true && distance > maxEdits){
return AcceptStatus.NO;
}
final float similarity = calcSimilarity(distance, (utf32.length - realPrefixLength), text.length);
//if raw is true, then distance must also be <= maxEdits by now
//given the previous if statement
if (raw == true ||
(raw == false && similarity > minSimilarity)) {
boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
return AcceptStatus.YES;
} else {
return AcceptStatus.NO;
}
} else {
return AcceptStatus.END;
}
}