当前位置: 首页>>代码示例>>Java>>正文


Java UnicodeUtil.UTF16toUTF8方法代码示例

本文整理汇总了Java中org.apache.lucene.util.UnicodeUtil.UTF16toUTF8方法的典型用法代码示例。如果您正苦于以下问题:Java UnicodeUtil.UTF16toUTF8方法的具体用法?Java UnicodeUtil.UTF16toUTF8怎么用?Java UnicodeUtil.UTF16toUTF8使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.util.UnicodeUtil的用法示例。


在下文中一共展示了UnicodeUtil.UTF16toUTF8方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: evaluate

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public BytesRef evaluate(Input<Object>... args) {
    Object stringValue = args[0].value();
    if (stringValue == null) {
        return null;
    }

    BytesRef inputByteRef = BytesRefs.toBytesRef(stringValue);

    char[] ref = new char[inputByteRef.length];
    int len = UnicodeUtil.UTF8toUTF16(inputByteRef.bytes, inputByteRef.offset, inputByteRef.length, ref);
    charUtils.toLowerCase(ref, 0, len);

    byte[] res = new byte[UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * len];
    len = UnicodeUtil.UTF16toUTF8(ref, 0, len, res);
    return new BytesRef(res, 0, len);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:18,代码来源:LowerFunction.java

示例2: evaluate

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public BytesRef evaluate(Input<Object>... args) {
    Object stringValue = args[0].value();
    if (stringValue == null) {
        return null;
    }

    BytesRef inputByteRef = BytesRefs.toBytesRef(stringValue);

    char[] ref = new char[inputByteRef.length];
    int len = UnicodeUtil.UTF8toUTF16(inputByteRef.bytes, inputByteRef.offset, inputByteRef.length, ref);
    charUtils.toUpperCase(ref, 0, len);

    byte[] res = new byte[UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * len];
    len = UnicodeUtil.UTF16toUTF8(ref, 0, len, res);
    return new BytesRef(res, 0, len);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:18,代码来源:UpperFunction.java

示例3: testRandomUnicodeStrings

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
public void testRandomUnicodeStrings() throws Throwable {
  char[] buffer = new char[20];
  char[] expected = new char[20];

  BytesRef utf8 = new BytesRef(20);
  CharsRef utf16 = new CharsRef(20);

  int num = atLeast(100000);
  for (int iter = 0; iter < num; iter++) {
    boolean hasIllegal = fillUnicode(buffer, expected, 0, 20);

    UnicodeUtil.UTF16toUTF8(buffer, 0, 20, utf8);
    if (!hasIllegal) {
      byte[] b = new String(buffer, 0, 20).getBytes("UTF-8");
      assertEquals(b.length, utf8.length);
      for(int i=0;i<b.length;i++)
        assertEquals(b[i], utf8.bytes[i]);
    }

    UnicodeUtil.UTF8toUTF16(utf8.bytes, 0, utf8.length, utf16);
    assertEquals(utf16.length, 20);
    for(int i=0;i<20;i++)
      assertEquals(expected[i], utf16.chars[i]);
  }
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:26,代码来源:TestIndexWriterUnicode.java

示例4: add

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/**
 * Adds an input string and it's stemmer override output to this builder.
 * 
 * @param input the input char sequence 
 * @param output the stemmer override output char sequence
 * @return <code>false</code> iff the input has already been added to this builder otherwise <code>true</code>.
 */
public boolean add(CharSequence input, CharSequence output) {
  final int length = input.length();
  if (ignoreCase) {
    // convert on the fly to lowercase
    charsSpare.grow(length);
    final char[] buffer = charsSpare.chars;
    for (int i = 0; i < length; ) {
        i += Character.toChars(
                Character.toLowerCase(
                    Character.codePointAt(input, i)), buffer, i);
    }
    UnicodeUtil.UTF16toUTF8(buffer, 0, length, spare);
  } else {
    UnicodeUtil.UTF16toUTF8(input, 0, length, spare);
  }
  if (hash.add(spare) >= 0) {
    outputValues.add(output);
    return true;
  }
  return false;
}
 
开发者ID:yintaoxue,项目名称:read-open-source-code,代码行数:29,代码来源:StemmerOverrideFilter.java

示例5: writeStr

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public void writeStr(String name, String val, boolean needsEscaping) throws IOException {
  // serialized PHP strings don't need to be escaped at all, however the 
  // string size reported needs be the number of bytes rather than chars.
  utf8 = ArrayUtil.grow(utf8, val.length() * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR);
  final int nBytes = UnicodeUtil.UTF16toUTF8(val, 0, val.length(), utf8);

  writer.write("s:");
  writer.write(Integer.toString(nBytes));
  writer.write(":\"");
  writer.write(val);
  writer.write("\";");
}
 
开发者ID:europeana,项目名称:search,代码行数:14,代码来源:PHPSerializedResponseWriter.java

示例6: unmarshalStringSortValue

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/**
 * Unmarshals a string-based field value.
 */
protected static Object unmarshalStringSortValue(Object value) {
  if (null == value) {
    return null;
  }
  BytesRef spare = new BytesRef();
  String stringVal = (String)value;
  UnicodeUtil.UTF16toUTF8(stringVal, spare);
  return spare;
}
 
开发者ID:europeana,项目名称:search,代码行数:13,代码来源:FieldType.java

示例7: unmarshalSortValue

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public Object unmarshalSortValue(Object value) {
  if (null == value) {
    return null;
  }
  String sortableString = NumberUtils.int2sortableStr(value.toString());
  BytesRef bytes = new BytesRef();
  UnicodeUtil.UTF16toUTF8(sortableString, bytes);
  return bytes;
}
 
开发者ID:europeana,项目名称:search,代码行数:11,代码来源:SortableIntField.java

示例8: unmarshalSortValue

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public Object unmarshalSortValue(Object value) {
  if (null == value) {
    return null;
  }
  String sortableString = NumberUtils.long2sortableStr(value.toString());
  BytesRef bytes = new BytesRef();
  UnicodeUtil.UTF16toUTF8(sortableString, bytes);
  return bytes;
}
 
开发者ID:europeana,项目名称:search,代码行数:11,代码来源:SortableLongField.java

示例9: unmarshalSortValue

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public Object unmarshalSortValue(Object value) {
  if (null == value) {
    return null;
  }
  String sortableString = NumberUtils.float2sortableStr(value.toString());
  BytesRef bytes = new BytesRef();
  UnicodeUtil.UTF16toUTF8(sortableString, bytes);
  return bytes;
}
 
开发者ID:europeana,项目名称:search,代码行数:11,代码来源:SortableFloatField.java

示例10: unmarshalSortValue

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public Object unmarshalSortValue(Object value) {
  if (null == value) {
    return null;
  }
  String sortableString = NumberUtils.double2sortableStr(value.toString());
  BytesRef bytes = new BytesRef();
  UnicodeUtil.UTF16toUTF8(sortableString, bytes);
  return bytes;
}
 
开发者ID:europeana,项目名称:search,代码行数:11,代码来源:SortableDoubleField.java

示例11: testAllUnicodeChars

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
public void testAllUnicodeChars() throws Throwable {

    BytesRef utf8 = new BytesRef(10);
    CharsRef utf16 = new CharsRef(10);
    char[] chars = new char[2];
    for(int ch=0;ch<0x0010FFFF;ch++) {

      if (ch == 0xd800)
        // Skip invalid code points
        ch = 0xe000;

      int len = 0;
      if (ch <= 0xffff) {
        chars[len++] = (char) ch;
      } else {
        chars[len++] = (char) (((ch-0x0010000) >> 10) + UnicodeUtil.UNI_SUR_HIGH_START);
        chars[len++] = (char) (((ch-0x0010000) & 0x3FFL) + UnicodeUtil.UNI_SUR_LOW_START);
      }

      UnicodeUtil.UTF16toUTF8(chars, 0, len, utf8);

      String s1 = new String(chars, 0, len);
      String s2 = new String(utf8.bytes, 0, utf8.length, "UTF-8");
      assertEquals("codepoint " + ch, s1, s2);

      UnicodeUtil.UTF8toUTF16(utf8.bytes, 0, utf8.length, utf16);
      assertEquals("codepoint " + ch, s1, new String(utf16.chars, 0, utf16.length));

      byte[] b = s1.getBytes("UTF-8");
      assertEquals(utf8.length, b.length);
      for(int j=0;j<utf8.length;j++)
        assertEquals(utf8.bytes[j], b[j]);
    }
  }
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:35,代码来源:TestIndexWriterUnicode.java

示例12: writeStr

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public void writeStr(String name, String val, boolean needsEscaping) throws IOException {
  // serialized PHP strings don't need to be escaped at all, however the 
  // string size reported needs be the number of bytes rather than chars.
  UnicodeUtil.UTF16toUTF8(val, 0, val.length(), utf8);
  int nBytes = utf8.length;

  writer.write("s:");
  writer.write(Integer.toString(nBytes));
  writer.write(":\"");
  writer.write(val);
  writer.write("\";");
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:14,代码来源:PHPSerializedResponseWriter.java

示例13: unmarshalSortValue

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public Object unmarshalSortValue(Object value) {
  if (null == value) {
    return null;
  }
  String sortableString = NumberUtils.int2sortableStr(value.toString());
  BytesRef bytes = new BytesRef();
  UnicodeUtil.UTF16toUTF8(sortableString, 0, sortableString.length(), bytes);
  return bytes;
}
 
开发者ID:yintaoxue,项目名称:read-open-source-code,代码行数:11,代码来源:SortableIntField.java

示例14: unmarshalSortValue

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
@Override
public Object unmarshalSortValue(Object value) {
  if (null == value) {
    return null;
  }
  String sortableString = NumberUtils.float2sortableStr(value.toString());
  BytesRef bytes = new BytesRef();
  UnicodeUtil.UTF16toUTF8(sortableString, 0, sortableString.length(), bytes);
  return bytes;
}
 
开发者ID:yintaoxue,项目名称:read-open-source-code,代码行数:11,代码来源:SortableFloatField.java

示例15: unmarshalStringSortValue

import org.apache.lucene.util.UnicodeUtil; //导入方法依赖的package包/类
/**
 * Unmarshals a string-based field value.
 */
protected static Object unmarshalStringSortValue(Object value) {
  if (null == value) {
    return null;
  }
  BytesRef spare = new BytesRef();
  String stringVal = (String)value;
  UnicodeUtil.UTF16toUTF8(stringVal, 0, stringVal.length(), spare);
  return spare;
}
 
开发者ID:yintaoxue,项目名称:read-open-source-code,代码行数:13,代码来源:FieldType.java


注:本文中的org.apache.lucene.util.UnicodeUtil.UTF16toUTF8方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。