当前位置: 首页>>代码示例>>Java>>正文


Java UnicodeUtil.UTF8toUTF16方法代码示例

本文整理汇总了Java中org.apache.lucene.util.UnicodeUtil.UTF8toUTF16方法的典型用法代码示例。如果您正苦于以下问题:Java UnicodeUtil.UTF8toUTF16方法的具体用法?Java UnicodeUtil.UTF8toUTF16怎么用?Java UnicodeUtil.UTF8toUTF16使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.util.UnicodeUtil的用法示例。


在下文中一共展示了UnicodeUtil.UTF8toUTF16方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: build

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/**
 * Build a minimal, deterministic automaton from a sorted list of {@link BytesRef} representing
 * strings in UTF-8. These strings must be binary-sorted.
 */
public static Automaton build(Collection<BytesRef> input) {
  final DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder();
  
  char[] chars = new char[0];
  CharsRef ref = new CharsRef();
  for (BytesRef b : input) {
    chars = ArrayUtil.grow(chars, b.length);
    final int len = UnicodeUtil.UTF8toUTF16(b, chars);
    ref.chars = chars;
    ref.length = len;
    builder.add(ref);
  }
  
  Automaton.Builder a = new Automaton.Builder();
  convert(a,
      builder.complete(), 
      new IdentityHashMap<State,Integer>());

  return a.finish();
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:25,代码来源:DaciukMihovAutomatonBuilder.java

示例2: evaluate

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public BytesRef evaluate(Input<Object>... args) {
    Object stringValue = args[0].value();
    if (stringValue == null) {
        return null;
    }

    BytesRef inputByteRef = BytesRefs.toBytesRef(stringValue);

    char[] ref = new char[inputByteRef.length];
    int len = UnicodeUtil.UTF8toUTF16(inputByteRef.bytes, inputByteRef.offset, inputByteRef.length, ref);
    charUtils.toLowerCase(ref, 0, len);

    byte[] res = new byte[UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * len];
    len = UnicodeUtil.UTF16toUTF8(ref, 0, len, res);
    return new BytesRef(res, 0, len);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:18,代码来源:LowerFunction.java

示例3: evaluate

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public BytesRef evaluate(Input<Object>... args) {
    Object stringValue = args[0].value();
    if (stringValue == null) {
        return null;
    }

    BytesRef inputByteRef = BytesRefs.toBytesRef(stringValue);

    char[] ref = new char[inputByteRef.length];
    int len = UnicodeUtil.UTF8toUTF16(inputByteRef.bytes, inputByteRef.offset, inputByteRef.length, ref);
    charUtils.toUpperCase(ref, 0, len);

    byte[] res = new byte[UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * len];
    len = UnicodeUtil.UTF16toUTF8(ref, 0, len, res);
    return new BytesRef(res, 0, len);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:18,代码来源:UpperFunction.java

示例4: addTermFrequencies

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/**
 * Adds terms and frequencies found in vector into the Map termFreqMap
 *
 * @param termFreqMap a Map of terms and their frequencies
 * @param vector List of terms and their frequencies for a doc/field
 */
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector) throws IOException {
  final TermsEnum termsEnum = vector.iterator(null);
  final CharsRef spare = new CharsRef();
  BytesRef text;
  while((text = termsEnum.next()) != null) {
    UnicodeUtil.UTF8toUTF16(text, spare);
    final String term = spare.toString();
    if (isNoiseWord(term)) {
      continue;
    }
    final int freq = (int) termsEnum.totalTermFreq();

    // increment frequency
    Int cnt = termFreqMap.get(term);
    if (cnt == null) {
      cnt = new Int();
      termFreqMap.put(term, cnt);
      cnt.x = freq;
    } else {
      cnt.x += freq;
    }
  }
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:30,代码来源:MoreLikeThis.java

示例5: build

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public void build(TermFreqIterator tfit) throws IOException {
  root = new TernaryTreeNode();
  // buffer first
  if (tfit.getComparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) {
    // make sure it's sorted and the comparator uses UTF16 sort order
    tfit = new SortedTermFreqIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator());
  }

  ArrayList<String> tokens = new ArrayList<String>();
  ArrayList<Number> vals = new ArrayList<Number>();
  BytesRef spare;
  CharsRef charsSpare = new CharsRef();
  while ((spare = tfit.next()) != null) {
    charsSpare.grow(spare.length);
    UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
    tokens.add(charsSpare.toString());
    vals.add(Long.valueOf(tfit.weight()));
  }
  autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:22,代码来源:TSTLookup.java

示例6: build

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/**
 * Build a minimal, deterministic automaton from a sorted list of {@link BytesRef} representing
 * strings in UTF-8. These strings must be binary-sorted.
 */
public static Automaton build(Collection<BytesRef> input) {
  final DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder();
  
  CharsRef scratch = new CharsRef();
  for (BytesRef b : input) {
    UnicodeUtil.UTF8toUTF16(b, scratch);
    builder.add(scratch);
  }
  
  Automaton a = new Automaton();
  a.initial = convert(
      builder.complete(), 
      new IdentityHashMap<State,org.apache.lucene.util.automaton.State>());
  a.deterministic = true;
  return a;
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:21,代码来源:DaciukMihovAutomatonBuilder.java

示例7: decompressString

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/** Decompress the byte array previously returned by
 *  compressString back into a String */
public static String decompressString(byte[] value, int offset, int length) throws DataFormatException {
  final byte[] bytes = decompress(value, offset, length);
  final char[] result = new char[bytes.length];
  final int len = UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.length, result);
  return new String(result, 0, len);
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:9,代码来源:CompressionTools.java

示例8: codePoint

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public int codePoint(int index) {
	//FIXME: is this the correct behaviour?
	this.tmpByte[0] = this.contents.bytes[index];
	UnicodeUtil.UTF8toUTF16( this.tmpByte, 0, 1, this.tmpChar );
	return this.tmpChar[0] & 0xFFFF;
}
 
开发者ID:s4ke,项目名称:moar,代码行数:8,代码来源:ByteCharSeq.java

示例9: marshalStringSortValue

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/**
 * Marshals a string-based field value.
 */
protected static Object marshalStringSortValue(Object value) {
  if (null == value) {
    return null;
  }
  CharsRef spare = new CharsRef();
  UnicodeUtil.UTF8toUTF16((BytesRef)value, spare);
  return spare.toString();
}
 
开发者ID:europeana,项目名称:search,代码行数:12,代码来源:FieldType.java

示例10: marshalSortValue

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public Object marshalSortValue(Object value) {
  if (null == value) { 
    return null;
  }
  CharsRef chars = new CharsRef();
  UnicodeUtil.UTF8toUTF16((BytesRef)value, chars);
  return NumberUtils.SortableStr2int(chars.toString());
}
 
开发者ID:europeana,项目名称:search,代码行数:10,代码来源:SortableIntField.java

示例11: marshalSortValue

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public Object marshalSortValue(Object value) {
  if (null == value) {
    return null;
  }
  CharsRef chars = new CharsRef();
  UnicodeUtil.UTF8toUTF16((BytesRef)value, chars);
  return NumberUtils.SortableStr2long(chars.toString());
}
 
开发者ID:europeana,项目名称:search,代码行数:10,代码来源:SortableLongField.java

示例12: marshalSortValue

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public Object marshalSortValue(Object value) {
  if (null == value) {
    return null;
  }
  CharsRef chars = new CharsRef();
  UnicodeUtil.UTF8toUTF16((BytesRef)value, chars);
  return NumberUtils.SortableStr2float(chars.toString());
}
 
开发者ID:europeana,项目名称:search,代码行数:10,代码来源:SortableFloatField.java

示例13: marshalSortValue

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public Object marshalSortValue(Object value) {
  if (null == value) {
    return null;
  }
  CharsRef chars = new CharsRef();
  UnicodeUtil.UTF8toUTF16((BytesRef)value, chars);
  return NumberUtils.SortableStr2double(chars.toString());
}
 
开发者ID:europeana,项目名称:search,代码行数:10,代码来源:SortableDoubleField.java

示例14: serializeSearchGroup

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
private NamedList serializeSearchGroup(Collection<SearchGroup<BytesRef>> data, Sort groupSort) {
  NamedList<Comparable[]> result = new NamedList<Comparable[]>();
  CharsRef spare = new CharsRef();

  for (SearchGroup<BytesRef> searchGroup : data) {
    Comparable[] convertedSortValues = new Comparable[searchGroup.sortValues.length];
    for (int i = 0; i < searchGroup.sortValues.length; i++) {
      Comparable sortValue = (Comparable) searchGroup.sortValues[i];
      SchemaField field = groupSort.getSort()[i].getField() != null ? searcher.getSchema().getFieldOrNull(groupSort.getSort()[i].getField()) : null;
      if (field != null) {
        FieldType fieldType = field.getType();
        if (sortValue instanceof BytesRef) {
          UnicodeUtil.UTF8toUTF16((BytesRef)sortValue, spare);
          String indexedValue = spare.toString();
          sortValue = (Comparable) fieldType.toObject(field.createField(fieldType.indexedToReadable(indexedValue), 1.0f));
        } else if (sortValue instanceof String) {
          sortValue = (Comparable) fieldType.toObject(field.createField(fieldType.indexedToReadable((String) sortValue), 1.0f));
        }
      }
      convertedSortValues[i] = sortValue;
    }
    String groupValue = searchGroup.groupValue != null ? searchGroup.groupValue.utf8ToString() : null;
    result.add(groupValue, convertedSortValues);
  }

  return result;
}
 
开发者ID:netboynb,项目名称:search-core,代码行数:28,代码来源:SearchGroupsResultTransformer.java

示例15: collect

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public boolean collect(BytesRef term, int count) {
  if (count > min) {
    // NOTE: we use c>min rather than c>=min as an optimization because we are going in
    // index order, so we already know that the keys are ordered.  This can be very
    // important if a lot of the counts are repeated (like zero counts would be).
    UnicodeUtil.UTF8toUTF16(term, spare);
    queue.add(new SimpleFacets.CountPair<String,Integer>(spare.toString(), count));
    if (queue.size()>=maxsize) min=queue.last().val;
  }
  return false;
}
 
开发者ID:yintaoxue,项目名称:read-open-source-code,代码行数:13,代码来源:PerSegmentSingleValuedFaceting.java


注:本文中的org.apache.lucene.util.UnicodeUtil.UTF8toUTF16方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。