当前位置: 首页>>代码示例>>Java>>正文


Java Dictionary.getMaxId方法代码示例

本文整理汇总了Java中org.apache.parquet.column.Dictionary.getMaxId方法的典型用法代码示例。如果您正苦于以下问题:Java Dictionary.getMaxId方法的具体用法?Java Dictionary.getMaxId怎么用?Java Dictionary.getMaxId使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.parquet.column.Dictionary的用法示例。


在下文中一共展示了Dictionary.getMaxId方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: buildIntegerGlobalDictionary

import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
private static VectorContainer buildIntegerGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
  final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.Int(32, true), null);
  final VectorContainer input = new VectorContainer(bufferAllocator);
  final NullableIntVector intVector = input.addOrGet(field);
  intVector.allocateNew();
  final SortedSet<Integer> values = Sets.newTreeSet();
  for (Dictionary dictionary : dictionaries) {
    for (int i = 0; i <= dictionary.getMaxId(); ++i) {
      values.add(dictionary.decodeToInt(i));
    }
  }
  if (existingDict != null) {
    final NullableIntVector existingDictValues = existingDict.getValueAccessorById(NullableIntVector.class, 0).getValueVector();
    for (int i = 0; i < existingDict.getRecordCount(); ++i) {
      values.add(existingDictValues.getAccessor().get(i));
    }
  }
  final Iterator<Integer> iter = values.iterator();
  int recordCount = 0;
  while (iter.hasNext()) {
    intVector.getMutator().setSafe(recordCount++, iter.next());
  }
  intVector.getMutator().setValueCount(recordCount);
  input.setRecordCount(recordCount);
  input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
  return input;
}
 
开发者ID:dremio,项目名称:dremio-oss,代码行数:28,代码来源:GlobalDictionaryBuilder.java

示例2: buildLongGlobalDictionary

import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
private static VectorContainer buildLongGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
  final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.Int(64, true), null);
  final VectorContainer input = new VectorContainer(bufferAllocator);
  final NullableBigIntVector longVector = input.addOrGet(field);
  longVector.allocateNew();
  SortedSet<Long> values = Sets.newTreeSet();
  for (Dictionary dictionary : dictionaries) {
    for (int i = 0; i <= dictionary.getMaxId(); ++i) {
      values.add(dictionary.decodeToLong(i));
    }
  }
  if (existingDict != null) {
    final NullableBigIntVector existingDictValues = existingDict.getValueAccessorById(NullableBigIntVector.class, 0).getValueVector();
    for (int i = 0; i < existingDict.getRecordCount(); ++i) {
      values.add(existingDictValues.getAccessor().get(i));
    }
  }
  final Iterator<Long> iter = values.iterator();
  int recordCount = 0;
  while (iter.hasNext()) {
    longVector.getMutator().setSafe(recordCount++, iter.next());
  }
  longVector.getMutator().setValueCount(recordCount);
  input.setRecordCount(recordCount);
  input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
  return input;
}
 
开发者ID:dremio,项目名称:dremio-oss,代码行数:28,代码来源:GlobalDictionaryBuilder.java

示例3: buildDoubleGlobalDictionary

import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
private static VectorContainer buildDoubleGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
  final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), null);
  final VectorContainer input = new VectorContainer(bufferAllocator);
  final NullableFloat8Vector doubleVector = input.addOrGet(field);
  doubleVector.allocateNew();
  SortedSet<Double> values = Sets.newTreeSet();
  for (Dictionary dictionary : dictionaries) {
    for (int i = 0; i <= dictionary.getMaxId(); ++i) {
      values.add(dictionary.decodeToDouble(i));
    }
  }
  if (existingDict != null) {
    final NullableFloat8Vector existingDictValues = existingDict.getValueAccessorById(NullableFloat8Vector.class, 0).getValueVector();
    for (int i = 0; i < existingDict.getRecordCount(); ++i) {
      values.add(existingDictValues.getAccessor().get(i));
    }
  }
  final Iterator<Double> iter = values.iterator();
  int recordCount = 0;
  while (iter.hasNext()) {
    doubleVector.getMutator().setSafe(recordCount++, iter.next());
  }
  doubleVector.getMutator().setValueCount(recordCount);
  input.setRecordCount(recordCount);
  input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
  return input;
}
 
开发者ID:dremio,项目名称:dremio-oss,代码行数:28,代码来源:GlobalDictionaryBuilder.java

示例4: buildFloatGlobalDictionary

import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
private static VectorContainer buildFloatGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
  final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), null);
  final VectorContainer input = new VectorContainer(bufferAllocator);
  final NullableFloat4Vector floatVector = input.addOrGet(field);
  floatVector.allocateNew();
  SortedSet<Float> values = Sets.newTreeSet();
  for (Dictionary dictionary : dictionaries) {
    for (int i = 0; i <= dictionary.getMaxId(); ++i) {
      values.add(dictionary.decodeToFloat(i));
    }
  }
  if (existingDict != null) {
    final NullableFloat4Vector existingDictValues = existingDict.getValueAccessorById(NullableFloat4Vector.class, 0).getValueVector();
    for (int i = 0; i < existingDict.getRecordCount(); ++i) {
      values.add(existingDictValues.getAccessor().get(i));
    }
  }
  final Iterator<Float> iter = values.iterator();
  int recordCount = 0;
  while (iter.hasNext()) {
    floatVector.getMutator().setSafe(recordCount++, iter.next());
  }
  floatVector.getMutator().setValueCount(recordCount);
  input.setRecordCount(recordCount);
  input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
  return input;
}
 
开发者ID:dremio,项目名称:dremio-oss,代码行数:28,代码来源:GlobalDictionaryBuilder.java

示例5: buildBinaryGlobalDictionary

import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
private static VectorContainer buildBinaryGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
  final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.Binary(), null);
  final VectorContainer input = new VectorContainer(bufferAllocator);
  final NullableVarBinaryVector binaryVector = input.addOrGet(field);
  binaryVector.allocateNew();
  final SortedSet<Binary> values = new TreeSet<>();
  for (Dictionary dictionary : dictionaries) {
    for (int i = 0; i <= dictionary.getMaxId(); ++i) {
      values.add(dictionary.decodeToBinary(i));
    }
  }
  if (existingDict != null) {
    final NullableVarBinaryVector existingDictValues = existingDict.getValueAccessorById(NullableVarBinaryVector.class, 0).getValueVector();
    for (int i = 0; i < existingDict.getRecordCount(); ++i) {
      values.add(Binary.fromConstantByteArray(existingDictValues.getAccessor().get(i)));
    }
  }
  final Iterator<Binary> iter = values.iterator();
  int recordCount = 0;
  while (iter.hasNext()) {
    final byte[] data = iter.next().getBytes();
    binaryVector.getMutator().setSafe(recordCount++, data, 0, data.length);
  }
  binaryVector.getMutator().setValueCount(recordCount);
  input.setRecordCount(recordCount);
  input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
  return input;
}
 
开发者ID:dremio,项目名称:dremio-oss,代码行数:29,代码来源:GlobalDictionaryBuilder.java

示例6: printDictionary

import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
public static void printDictionary(ColumnDescriptor columnDescriptor, Dictionary localDictionary) {
  System.out.println("Dictionary for column " + columnDescriptor.toString());
  for (int i = 0; i < localDictionary.getMaxId(); ++i) {
    switch (columnDescriptor.getType()) {
      case INT32:
        System.out.println(format("%d: %d", i, localDictionary.decodeToInt(i)));
        break;
      case INT64:
        System.out.println(format("%d: %d", i, localDictionary.decodeToLong(i)));
        break;
      case INT96:
      case BINARY:
      case FIXED_LEN_BYTE_ARRAY:
        System.out.println(format("%d: %s", i, new String(localDictionary.decodeToBinary(i).getBytesUnsafe())));
        break;
      case FLOAT:
        System.out.println(format("%d: %f", i, localDictionary.decodeToFloat(i)));
        break;
      case DOUBLE:
        System.out.println(format("%d: %f", i, localDictionary.decodeToDouble(i)));
        break;
      case BOOLEAN:
        System.out.println(format("%d: %b", i, localDictionary.decodeToBoolean(i)));
        break;
      default:
        break;
    }
  }
}
 
开发者ID:dremio,项目名称:dremio-oss,代码行数:30,代码来源:LocalDictionariesReader.java

示例7: setDictionary

import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
@Override
public void setDictionary(Dictionary dictionary)
{
    expandedDictionary = new Value[dictionary.getMaxId() + 1];
    for (int id = 0; id <= dictionary.getMaxId(); id++) {
        // This is copied array. Copying at ValueFactory#newString is not necessary.
        byte[] bytes = dictionary.decodeToBinary(id).getBytes();
        expandedDictionary[id] = ValueFactory.newString(bytes);
    }
}
 
开发者ID:CyberAgent,项目名称:embulk-input-parquet_hadoop,代码行数:11,代码来源:ParquetStringConverter.java

示例8: setDictionary

import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
@Override
public void setDictionary(Dictionary dictionary)
{
    expandedDictionary = new Value[dictionary.getMaxId() + 1];
    for (int id = 0; id <= dictionary.getMaxId(); id++) {
        expandedDictionary[id] = decimalFromLong(dictionary.decodeToInt(id));
    }
}
 
开发者ID:CyberAgent,项目名称:embulk-input-parquet_hadoop,代码行数:9,代码来源:ParquetDecimalConverter.java

示例9: setDictionary

import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
@Override
public void setDictionary(Dictionary dictionary) {
  _dict = new String[dictionary.getMaxId() + 1];
  for (int i = 0; i <= dictionary.getMaxId(); i++) {
    _dict[i] = dictionary.decodeToBinary(i).toStringUsingUTF8();
  }
}
 
开发者ID:h2oai,项目名称:h2o-3,代码行数:8,代码来源:ChunkConverter.java

示例10: expandDictionary

import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
@SuppressWarnings("unchecked")
private <T extends Comparable<T>> Set<T> expandDictionary(ColumnChunkMetaData meta) throws IOException {
  ColumnDescriptor col = new ColumnDescriptor(meta.getPath().toArray(), meta.getPrimitiveType(), -1, -1);
  DictionaryPage page = dictionaries.readDictionaryPage(col);

  // the chunk may not be dictionary-encoded
  if (page == null) {
    return null;
  }

  Dictionary dict = page.getEncoding().initDictionary(col, page);

  Set dictSet = new HashSet<T>();

  for (int i=0; i<=dict.getMaxId(); i++) {
    switch(meta.getType()) {
      case BINARY: dictSet.add(dict.decodeToBinary(i));
        break;
      case INT32: dictSet.add(dict.decodeToInt(i));
        break;
      case INT64: dictSet.add(dict.decodeToLong(i));
        break;
      case FLOAT: dictSet.add(dict.decodeToFloat(i));
        break;
      case DOUBLE: dictSet.add(dict.decodeToDouble(i));
        break;
      default:
        LOG.warn("Unknown dictionary type{}", meta.getType());
    }
  }

  return (Set<T>) dictSet;
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:34,代码来源:DictionaryFilter.java

示例11: setDictionary

import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
@Override
public void setDictionary(Dictionary dictionary) {
  dict = new  Descriptors.EnumValueDescriptor[dictionary.getMaxId() + 1];
  for (int i = 0; i <= dictionary.getMaxId(); i++) {
    Binary binaryValue = dictionary.decodeToBinary(i);
    dict[i] = translateEnumValue(binaryValue);
  }
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:9,代码来源:ProtoMessageConverter.java

示例12: setDictionary

import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
@Override
public void setDictionary(Dictionary dictionary) {
  dict = new String[dictionary.getMaxId() + 1];
  for (int i = 0; i <= dictionary.getMaxId(); i++) {
    dict[i] = dictionary.decodeToBinary(i).toStringUsingUTF8();
  }
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:8,代码来源:TupleConverter.java

示例13: setDictionary

import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
@Override
@SuppressWarnings("unchecked")
public void setDictionary(Dictionary dictionary) {
  dict = (T[]) new Object[dictionary.getMaxId() + 1];
  for (int i = 0; i <= dictionary.getMaxId(); i++) {
    dict[i] = convert(dictionary.decodeToBinary(i));
  }
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:9,代码来源:AvroConverters.java

示例14: run

import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
  Preconditions.checkArgument(targets != null && targets.size() >= 1,
      "A Parquet file is required.");
  Preconditions.checkArgument(targets.size() == 1,
      "Cannot process multiple Parquet files.");

  String source = targets.get(0);

  ParquetFileReader reader = ParquetFileReader.open(getConf(), qualifiedPath(source));
  MessageType schema = reader.getFileMetaData().getSchema();
  ColumnDescriptor descriptor = Util.descriptor(column, schema);
  PrimitiveType type = Util.primitive(column, schema);
  Preconditions.checkNotNull(type);

  DictionaryPageReadStore dictionaryReader;
  int rowGroup = 0;
  while ((dictionaryReader = reader.getNextDictionaryReader()) != null) {
    DictionaryPage page = dictionaryReader.readDictionaryPage(descriptor);

    Dictionary dict = page.getEncoding().initDictionary(descriptor, page);

    console.info("\nRow group {} dictionary for \"{}\":", rowGroup, column, page.getCompressedSize());
    for (int i = 0; i <= dict.getMaxId(); i += 1) {
      switch(type.getPrimitiveTypeName()) {
        case BINARY:
          if (type.getOriginalType() == OriginalType.UTF8) {
            console.info("{}: {}", String.format("%6d", i),
                Util.humanReadable(dict.decodeToBinary(i).toStringUsingUTF8(), 70));
          } else {
            console.info("{}: {}", String.format("%6d", i),
                Util.humanReadable(dict.decodeToBinary(i).getBytesUnsafe(), 70));
          }
          break;
        case INT32:
          console.info("{}: {}", String.format("%6d", i),
            dict.decodeToInt(i));
          break;
        case INT64:
          console.info("{}: {}", String.format("%6d", i),
              dict.decodeToLong(i));
          break;
        case FLOAT:
          console.info("{}: {}", String.format("%6d", i),
              dict.decodeToFloat(i));
          break;
        case DOUBLE:
          console.info("{}: {}", String.format("%6d", i),
              dict.decodeToDouble(i));
          break;
        default:
          throw new IllegalArgumentException(
              "Unknown dictionary type: " + type.getPrimitiveTypeName());
      }
    }

    reader.skipNextRowGroup();

    rowGroup += 1;
  }

  console.info("");

  return 0;
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:67,代码来源:ShowDictionaryCommand.java


注:本文中的org.apache.parquet.column.Dictionary.getMaxId方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。