本文整理汇总了Java中org.apache.parquet.column.Dictionary.getMaxId方法的典型用法代码示例。如果您正苦于以下问题:Java Dictionary.getMaxId方法的具体用法?Java Dictionary.getMaxId怎么用?Java Dictionary.getMaxId使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.parquet.column.Dictionary
的用法示例。
在下文中一共展示了Dictionary.getMaxId方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: buildIntegerGlobalDictionary
import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
private static VectorContainer buildIntegerGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.Int(32, true), null);
final VectorContainer input = new VectorContainer(bufferAllocator);
final NullableIntVector intVector = input.addOrGet(field);
intVector.allocateNew();
final SortedSet<Integer> values = Sets.newTreeSet();
for (Dictionary dictionary : dictionaries) {
for (int i = 0; i <= dictionary.getMaxId(); ++i) {
values.add(dictionary.decodeToInt(i));
}
}
if (existingDict != null) {
final NullableIntVector existingDictValues = existingDict.getValueAccessorById(NullableIntVector.class, 0).getValueVector();
for (int i = 0; i < existingDict.getRecordCount(); ++i) {
values.add(existingDictValues.getAccessor().get(i));
}
}
final Iterator<Integer> iter = values.iterator();
int recordCount = 0;
while (iter.hasNext()) {
intVector.getMutator().setSafe(recordCount++, iter.next());
}
intVector.getMutator().setValueCount(recordCount);
input.setRecordCount(recordCount);
input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
return input;
}
示例2: buildLongGlobalDictionary
import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
private static VectorContainer buildLongGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.Int(64, true), null);
final VectorContainer input = new VectorContainer(bufferAllocator);
final NullableBigIntVector longVector = input.addOrGet(field);
longVector.allocateNew();
SortedSet<Long> values = Sets.newTreeSet();
for (Dictionary dictionary : dictionaries) {
for (int i = 0; i <= dictionary.getMaxId(); ++i) {
values.add(dictionary.decodeToLong(i));
}
}
if (existingDict != null) {
final NullableBigIntVector existingDictValues = existingDict.getValueAccessorById(NullableBigIntVector.class, 0).getValueVector();
for (int i = 0; i < existingDict.getRecordCount(); ++i) {
values.add(existingDictValues.getAccessor().get(i));
}
}
final Iterator<Long> iter = values.iterator();
int recordCount = 0;
while (iter.hasNext()) {
longVector.getMutator().setSafe(recordCount++, iter.next());
}
longVector.getMutator().setValueCount(recordCount);
input.setRecordCount(recordCount);
input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
return input;
}
示例3: buildDoubleGlobalDictionary
import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
private static VectorContainer buildDoubleGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), null);
final VectorContainer input = new VectorContainer(bufferAllocator);
final NullableFloat8Vector doubleVector = input.addOrGet(field);
doubleVector.allocateNew();
SortedSet<Double> values = Sets.newTreeSet();
for (Dictionary dictionary : dictionaries) {
for (int i = 0; i <= dictionary.getMaxId(); ++i) {
values.add(dictionary.decodeToDouble(i));
}
}
if (existingDict != null) {
final NullableFloat8Vector existingDictValues = existingDict.getValueAccessorById(NullableFloat8Vector.class, 0).getValueVector();
for (int i = 0; i < existingDict.getRecordCount(); ++i) {
values.add(existingDictValues.getAccessor().get(i));
}
}
final Iterator<Double> iter = values.iterator();
int recordCount = 0;
while (iter.hasNext()) {
doubleVector.getMutator().setSafe(recordCount++, iter.next());
}
doubleVector.getMutator().setValueCount(recordCount);
input.setRecordCount(recordCount);
input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
return input;
}
示例4: buildFloatGlobalDictionary
import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
private static VectorContainer buildFloatGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), null);
final VectorContainer input = new VectorContainer(bufferAllocator);
final NullableFloat4Vector floatVector = input.addOrGet(field);
floatVector.allocateNew();
SortedSet<Float> values = Sets.newTreeSet();
for (Dictionary dictionary : dictionaries) {
for (int i = 0; i <= dictionary.getMaxId(); ++i) {
values.add(dictionary.decodeToFloat(i));
}
}
if (existingDict != null) {
final NullableFloat4Vector existingDictValues = existingDict.getValueAccessorById(NullableFloat4Vector.class, 0).getValueVector();
for (int i = 0; i < existingDict.getRecordCount(); ++i) {
values.add(existingDictValues.getAccessor().get(i));
}
}
final Iterator<Float> iter = values.iterator();
int recordCount = 0;
while (iter.hasNext()) {
floatVector.getMutator().setSafe(recordCount++, iter.next());
}
floatVector.getMutator().setValueCount(recordCount);
input.setRecordCount(recordCount);
input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
return input;
}
示例5: buildBinaryGlobalDictionary
import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
private static VectorContainer buildBinaryGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.Binary(), null);
final VectorContainer input = new VectorContainer(bufferAllocator);
final NullableVarBinaryVector binaryVector = input.addOrGet(field);
binaryVector.allocateNew();
final SortedSet<Binary> values = new TreeSet<>();
for (Dictionary dictionary : dictionaries) {
for (int i = 0; i <= dictionary.getMaxId(); ++i) {
values.add(dictionary.decodeToBinary(i));
}
}
if (existingDict != null) {
final NullableVarBinaryVector existingDictValues = existingDict.getValueAccessorById(NullableVarBinaryVector.class, 0).getValueVector();
for (int i = 0; i < existingDict.getRecordCount(); ++i) {
values.add(Binary.fromConstantByteArray(existingDictValues.getAccessor().get(i)));
}
}
final Iterator<Binary> iter = values.iterator();
int recordCount = 0;
while (iter.hasNext()) {
final byte[] data = iter.next().getBytes();
binaryVector.getMutator().setSafe(recordCount++, data, 0, data.length);
}
binaryVector.getMutator().setValueCount(recordCount);
input.setRecordCount(recordCount);
input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
return input;
}
示例6: printDictionary
import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
public static void printDictionary(ColumnDescriptor columnDescriptor, Dictionary localDictionary) {
System.out.println("Dictionary for column " + columnDescriptor.toString());
for (int i = 0; i < localDictionary.getMaxId(); ++i) {
switch (columnDescriptor.getType()) {
case INT32:
System.out.println(format("%d: %d", i, localDictionary.decodeToInt(i)));
break;
case INT64:
System.out.println(format("%d: %d", i, localDictionary.decodeToLong(i)));
break;
case INT96:
case BINARY:
case FIXED_LEN_BYTE_ARRAY:
System.out.println(format("%d: %s", i, new String(localDictionary.decodeToBinary(i).getBytesUnsafe())));
break;
case FLOAT:
System.out.println(format("%d: %f", i, localDictionary.decodeToFloat(i)));
break;
case DOUBLE:
System.out.println(format("%d: %f", i, localDictionary.decodeToDouble(i)));
break;
case BOOLEAN:
System.out.println(format("%d: %b", i, localDictionary.decodeToBoolean(i)));
break;
default:
break;
}
}
}
示例7: setDictionary
import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
@Override
public void setDictionary(Dictionary dictionary)
{
expandedDictionary = new Value[dictionary.getMaxId() + 1];
for (int id = 0; id <= dictionary.getMaxId(); id++) {
// This is copied array. Copying at ValueFactory#newString is not necessary.
byte[] bytes = dictionary.decodeToBinary(id).getBytes();
expandedDictionary[id] = ValueFactory.newString(bytes);
}
}
示例8: setDictionary
import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
@Override
public void setDictionary(Dictionary dictionary)
{
expandedDictionary = new Value[dictionary.getMaxId() + 1];
for (int id = 0; id <= dictionary.getMaxId(); id++) {
expandedDictionary[id] = decimalFromLong(dictionary.decodeToInt(id));
}
}
示例9: setDictionary
import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
@Override
public void setDictionary(Dictionary dictionary) {
_dict = new String[dictionary.getMaxId() + 1];
for (int i = 0; i <= dictionary.getMaxId(); i++) {
_dict[i] = dictionary.decodeToBinary(i).toStringUsingUTF8();
}
}
示例10: expandDictionary
import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
@SuppressWarnings("unchecked")
private <T extends Comparable<T>> Set<T> expandDictionary(ColumnChunkMetaData meta) throws IOException {
ColumnDescriptor col = new ColumnDescriptor(meta.getPath().toArray(), meta.getPrimitiveType(), -1, -1);
DictionaryPage page = dictionaries.readDictionaryPage(col);
// the chunk may not be dictionary-encoded
if (page == null) {
return null;
}
Dictionary dict = page.getEncoding().initDictionary(col, page);
Set dictSet = new HashSet<T>();
for (int i=0; i<=dict.getMaxId(); i++) {
switch(meta.getType()) {
case BINARY: dictSet.add(dict.decodeToBinary(i));
break;
case INT32: dictSet.add(dict.decodeToInt(i));
break;
case INT64: dictSet.add(dict.decodeToLong(i));
break;
case FLOAT: dictSet.add(dict.decodeToFloat(i));
break;
case DOUBLE: dictSet.add(dict.decodeToDouble(i));
break;
default:
LOG.warn("Unknown dictionary type{}", meta.getType());
}
}
return (Set<T>) dictSet;
}
示例11: setDictionary
import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
@Override
public void setDictionary(Dictionary dictionary) {
dict = new Descriptors.EnumValueDescriptor[dictionary.getMaxId() + 1];
for (int i = 0; i <= dictionary.getMaxId(); i++) {
Binary binaryValue = dictionary.decodeToBinary(i);
dict[i] = translateEnumValue(binaryValue);
}
}
示例12: setDictionary
import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
@Override
public void setDictionary(Dictionary dictionary) {
dict = new String[dictionary.getMaxId() + 1];
for (int i = 0; i <= dictionary.getMaxId(); i++) {
dict[i] = dictionary.decodeToBinary(i).toStringUsingUTF8();
}
}
示例13: setDictionary
import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
@Override
@SuppressWarnings("unchecked")
public void setDictionary(Dictionary dictionary) {
dict = (T[]) new Object[dictionary.getMaxId() + 1];
for (int i = 0; i <= dictionary.getMaxId(); i++) {
dict[i] = convert(dictionary.decodeToBinary(i));
}
}
示例14: run
import org.apache.parquet.column.Dictionary; //导入方法依赖的package包/类
@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
Preconditions.checkArgument(targets != null && targets.size() >= 1,
"A Parquet file is required.");
Preconditions.checkArgument(targets.size() == 1,
"Cannot process multiple Parquet files.");
String source = targets.get(0);
ParquetFileReader reader = ParquetFileReader.open(getConf(), qualifiedPath(source));
MessageType schema = reader.getFileMetaData().getSchema();
ColumnDescriptor descriptor = Util.descriptor(column, schema);
PrimitiveType type = Util.primitive(column, schema);
Preconditions.checkNotNull(type);
DictionaryPageReadStore dictionaryReader;
int rowGroup = 0;
while ((dictionaryReader = reader.getNextDictionaryReader()) != null) {
DictionaryPage page = dictionaryReader.readDictionaryPage(descriptor);
Dictionary dict = page.getEncoding().initDictionary(descriptor, page);
console.info("\nRow group {} dictionary for \"{}\":", rowGroup, column, page.getCompressedSize());
for (int i = 0; i <= dict.getMaxId(); i += 1) {
switch(type.getPrimitiveTypeName()) {
case BINARY:
if (type.getOriginalType() == OriginalType.UTF8) {
console.info("{}: {}", String.format("%6d", i),
Util.humanReadable(dict.decodeToBinary(i).toStringUsingUTF8(), 70));
} else {
console.info("{}: {}", String.format("%6d", i),
Util.humanReadable(dict.decodeToBinary(i).getBytesUnsafe(), 70));
}
break;
case INT32:
console.info("{}: {}", String.format("%6d", i),
dict.decodeToInt(i));
break;
case INT64:
console.info("{}: {}", String.format("%6d", i),
dict.decodeToLong(i));
break;
case FLOAT:
console.info("{}: {}", String.format("%6d", i),
dict.decodeToFloat(i));
break;
case DOUBLE:
console.info("{}: {}", String.format("%6d", i),
dict.decodeToDouble(i));
break;
default:
throw new IllegalArgumentException(
"Unknown dictionary type: " + type.getPrimitiveTypeName());
}
}
reader.skipNextRowGroup();
rowGroup += 1;
}
console.info("");
return 0;
}