本文整理汇总了Java中org.apache.lucene.util.UnicodeUtil.UTF8toUTF16方法的典型用法代码示例。如果您正苦于以下问题:Java UnicodeUtil.UTF8toUTF16方法的具体用法?Java UnicodeUtil.UTF8toUTF16怎么用?Java UnicodeUtil.UTF8toUTF16使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.util.UnicodeUtil
的用法示例。
在下文中一共展示了UnicodeUtil.UTF8toUTF16方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: build
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/**
* Build a minimal, deterministic automaton from a sorted list of {@link BytesRef} representing
* strings in UTF-8. These strings must be binary-sorted.
*/
public static Automaton build(Collection<BytesRef> input) {
final DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder();
char[] chars = new char[0];
CharsRef ref = new CharsRef();
for (BytesRef b : input) {
chars = ArrayUtil.grow(chars, b.length);
final int len = UnicodeUtil.UTF8toUTF16(b, chars);
ref.chars = chars;
ref.length = len;
builder.add(ref);
}
Automaton.Builder a = new Automaton.Builder();
convert(a,
builder.complete(),
new IdentityHashMap<State,Integer>());
return a.finish();
}
示例2: evaluate
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public BytesRef evaluate(Input<Object>... args) {
Object stringValue = args[0].value();
if (stringValue == null) {
return null;
}
BytesRef inputByteRef = BytesRefs.toBytesRef(stringValue);
char[] ref = new char[inputByteRef.length];
int len = UnicodeUtil.UTF8toUTF16(inputByteRef.bytes, inputByteRef.offset, inputByteRef.length, ref);
charUtils.toLowerCase(ref, 0, len);
byte[] res = new byte[UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * len];
len = UnicodeUtil.UTF16toUTF8(ref, 0, len, res);
return new BytesRef(res, 0, len);
}
示例3: evaluate
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public BytesRef evaluate(Input<Object>... args) {
Object stringValue = args[0].value();
if (stringValue == null) {
return null;
}
BytesRef inputByteRef = BytesRefs.toBytesRef(stringValue);
char[] ref = new char[inputByteRef.length];
int len = UnicodeUtil.UTF8toUTF16(inputByteRef.bytes, inputByteRef.offset, inputByteRef.length, ref);
charUtils.toUpperCase(ref, 0, len);
byte[] res = new byte[UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * len];
len = UnicodeUtil.UTF16toUTF8(ref, 0, len, res);
return new BytesRef(res, 0, len);
}
示例4: addTermFrequencies
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/**
* Adds terms and frequencies found in vector into the Map termFreqMap
*
* @param termFreqMap a Map of terms and their frequencies
* @param vector List of terms and their frequencies for a doc/field
*/
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector) throws IOException {
final TermsEnum termsEnum = vector.iterator(null);
final CharsRef spare = new CharsRef();
BytesRef text;
while((text = termsEnum.next()) != null) {
UnicodeUtil.UTF8toUTF16(text, spare);
final String term = spare.toString();
if (isNoiseWord(term)) {
continue;
}
final int freq = (int) termsEnum.totalTermFreq();
// increment frequency
Int cnt = termFreqMap.get(term);
if (cnt == null) {
cnt = new Int();
termFreqMap.put(term, cnt);
cnt.x = freq;
} else {
cnt.x += freq;
}
}
}
示例5: build
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public void build(TermFreqIterator tfit) throws IOException {
root = new TernaryTreeNode();
// buffer first
if (tfit.getComparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) {
// make sure it's sorted and the comparator uses UTF16 sort order
tfit = new SortedTermFreqIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator());
}
ArrayList<String> tokens = new ArrayList<String>();
ArrayList<Number> vals = new ArrayList<Number>();
BytesRef spare;
CharsRef charsSpare = new CharsRef();
while ((spare = tfit.next()) != null) {
charsSpare.grow(spare.length);
UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
tokens.add(charsSpare.toString());
vals.add(Long.valueOf(tfit.weight()));
}
autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
}
示例6: build
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/**
* Build a minimal, deterministic automaton from a sorted list of {@link BytesRef} representing
* strings in UTF-8. These strings must be binary-sorted.
*/
public static Automaton build(Collection<BytesRef> input) {
final DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder();
CharsRef scratch = new CharsRef();
for (BytesRef b : input) {
UnicodeUtil.UTF8toUTF16(b, scratch);
builder.add(scratch);
}
Automaton a = new Automaton();
a.initial = convert(
builder.complete(),
new IdentityHashMap<State,org.apache.lucene.util.automaton.State>());
a.deterministic = true;
return a;
}
示例7: decompressString
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/** Decompress the byte array previously returned by
* compressString back into a String */
public static String decompressString(byte[] value, int offset, int length) throws DataFormatException {
final byte[] bytes = decompress(value, offset, length);
final char[] result = new char[bytes.length];
final int len = UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.length, result);
return new String(result, 0, len);
}
示例8: codePoint
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public int codePoint(int index) {
//FIXME: is this the correct behaviour?
this.tmpByte[0] = this.contents.bytes[index];
UnicodeUtil.UTF8toUTF16( this.tmpByte, 0, 1, this.tmpChar );
return this.tmpChar[0] & 0xFFFF;
}
示例9: marshalStringSortValue
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/**
* Marshals a string-based field value.
*/
protected static Object marshalStringSortValue(Object value) {
if (null == value) {
return null;
}
CharsRef spare = new CharsRef();
UnicodeUtil.UTF8toUTF16((BytesRef)value, spare);
return spare.toString();
}
示例10: marshalSortValue
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public Object marshalSortValue(Object value) {
if (null == value) {
return null;
}
CharsRef chars = new CharsRef();
UnicodeUtil.UTF8toUTF16((BytesRef)value, chars);
return NumberUtils.SortableStr2int(chars.toString());
}
示例11: marshalSortValue
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public Object marshalSortValue(Object value) {
if (null == value) {
return null;
}
CharsRef chars = new CharsRef();
UnicodeUtil.UTF8toUTF16((BytesRef)value, chars);
return NumberUtils.SortableStr2long(chars.toString());
}
示例12: marshalSortValue
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public Object marshalSortValue(Object value) {
if (null == value) {
return null;
}
CharsRef chars = new CharsRef();
UnicodeUtil.UTF8toUTF16((BytesRef)value, chars);
return NumberUtils.SortableStr2float(chars.toString());
}
示例13: marshalSortValue
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public Object marshalSortValue(Object value) {
if (null == value) {
return null;
}
CharsRef chars = new CharsRef();
UnicodeUtil.UTF8toUTF16((BytesRef)value, chars);
return NumberUtils.SortableStr2double(chars.toString());
}
示例14: serializeSearchGroup
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
private NamedList serializeSearchGroup(Collection<SearchGroup<BytesRef>> data, Sort groupSort) {
NamedList<Comparable[]> result = new NamedList<Comparable[]>();
CharsRef spare = new CharsRef();
for (SearchGroup<BytesRef> searchGroup : data) {
Comparable[] convertedSortValues = new Comparable[searchGroup.sortValues.length];
for (int i = 0; i < searchGroup.sortValues.length; i++) {
Comparable sortValue = (Comparable) searchGroup.sortValues[i];
SchemaField field = groupSort.getSort()[i].getField() != null ? searcher.getSchema().getFieldOrNull(groupSort.getSort()[i].getField()) : null;
if (field != null) {
FieldType fieldType = field.getType();
if (sortValue instanceof BytesRef) {
UnicodeUtil.UTF8toUTF16((BytesRef)sortValue, spare);
String indexedValue = spare.toString();
sortValue = (Comparable) fieldType.toObject(field.createField(fieldType.indexedToReadable(indexedValue), 1.0f));
} else if (sortValue instanceof String) {
sortValue = (Comparable) fieldType.toObject(field.createField(fieldType.indexedToReadable((String) sortValue), 1.0f));
}
}
convertedSortValues[i] = sortValue;
}
String groupValue = searchGroup.groupValue != null ? searchGroup.groupValue.utf8ToString() : null;
result.add(groupValue, convertedSortValues);
}
return result;
}
示例15: collect
import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public boolean collect(BytesRef term, int count) {
if (count > min) {
// NOTE: we use c>min rather than c>=min as an optimization because we are going in
// index order, so we already know that the keys are ordered. This can be very
// important if a lot of the counts are repeated (like zero counts would be).
UnicodeUtil.UTF8toUTF16(term, spare);
queue.add(new SimpleFacets.CountPair<String,Integer>(spare.toString(), count));
if (queue.size()>=maxsize) min=queue.last().val;
}
return false;
}