本文整理汇总了Java中parquet.hadoop.metadata.BlockMetaData.getStartingPos方法的典型用法代码示例。如果您正苦于以下问题:Java BlockMetaData.getStartingPos方法的具体用法?Java BlockMetaData.getStartingPos怎么用?Java BlockMetaData.getStartingPos使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类parquet.hadoop.metadata.BlockMetaData
的用法示例。
在下文中一共展示了BlockMetaData.getStartingPos方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: checkBelongingToANewHDFSBlock
import parquet.hadoop.metadata.BlockMetaData; //导入方法依赖的package包/类
/**
* @param rowGroupMetadata
* @return true if the mid point of row group is in a new hdfs block, and also move the currentHDFSBlock pointer to the correct index that contains the row group;
* return false if the mid point of row group is in the same hdfs block
*/
private boolean checkBelongingToANewHDFSBlock(BlockMetaData rowGroupMetadata) {
boolean isNewHdfsBlock = false;
long rowGroupMidPoint = rowGroupMetadata.getStartingPos() + (rowGroupMetadata.getCompressedSize() / 2);
//if mid point is not in the current HDFS block any more, return true
while (rowGroupMidPoint > getHDFSBlockEndingPosition(currentMidPointHDFSBlockIndex)) {
isNewHdfsBlock = true;
currentMidPointHDFSBlockIndex++;
if (currentMidPointHDFSBlockIndex >= hdfsBlocks.length)
throw new ParquetDecodingException("the row group is not in hdfs blocks in the file: midpoint of row groups is "
+ rowGroupMidPoint
+ ", the end of the hdfs block is "
+ getHDFSBlockEndingPosition(currentMidPointHDFSBlockIndex - 1));
}
while (rowGroupMetadata.getStartingPos() > getHDFSBlockEndingPosition(currentStartHdfsBlockIndex)) {
currentStartHdfsBlockIndex++;
if (currentStartHdfsBlockIndex >= hdfsBlocks.length)
throw new ParquetDecodingException("The row group does not start in this file: row group offset is "
+ rowGroupMetadata.getStartingPos()
+ " but the end of hdfs blocks of file is "
+ getHDFSBlockEndingPosition(currentStartHdfsBlockIndex));
}
return isNewHdfsBlock;
}
示例2: generateSplitByDeprecatedConstructor
import parquet.hadoop.metadata.BlockMetaData; //导入方法依赖的package包/类
private List<ParquetInputSplit> generateSplitByDeprecatedConstructor(long min, long max) throws
IOException {
List<ParquetInputSplit> splits = new ArrayList<ParquetInputSplit>();
List<ClientSideMetadataSplitStrategy.SplitInfo> splitInfos = ClientSideMetadataSplitStrategy
.generateSplitInfo(blocks, hdfsBlocks, min, max);
for (ClientSideMetadataSplitStrategy.SplitInfo splitInfo : splitInfos) {
BlockMetaData lastRowGroup = splitInfo.getRowGroups().get(splitInfo.getRowGroupCount() - 1);
long end = lastRowGroup.getStartingPos() + lastRowGroup.getTotalByteSize();
ParquetInputSplit split = new ParquetInputSplit(fileStatus.getPath(),
splitInfo.hdfsBlock.getOffset(), end, splitInfo.hdfsBlock.getHosts(),
splitInfo.rowGroups, schema.toString(), null, null, extramd);
splits.add(split);
}
return splits;
}
示例3: getParquetInputSplit
import parquet.hadoop.metadata.BlockMetaData; //导入方法依赖的package包/类
public ParquetInputSplit getParquetInputSplit(FileStatus fileStatus, String requestedSchema, Map<String, String> readSupportMetadata) throws IOException {
MessageType requested = MessageTypeParser.parseMessageType(requestedSchema);
long length = 0;
for (BlockMetaData block : this.getRowGroups()) {
List<ColumnChunkMetaData> columns = block.getColumns();
for (ColumnChunkMetaData column : columns) {
if (requested.containsPath(column.getPath().toArray())) {
length += column.getTotalSize();
}
}
}
BlockMetaData lastRowGroup = this.getRowGroups().get(this.getRowGroupCount() - 1);
long end = lastRowGroup.getStartingPos() + lastRowGroup.getTotalByteSize();
long[] rowGroupOffsets = new long[this.getRowGroupCount()];
for (int i = 0; i < rowGroupOffsets.length; i++) {
rowGroupOffsets[i] = this.getRowGroups().get(i).getStartingPos();
}
return new ParquetInputSplit(
fileStatus.getPath(),
hdfsBlock.getOffset(),
end,
length,
hdfsBlock.getHosts(),
rowGroupOffsets
);
}
示例4: checkSorted
import parquet.hadoop.metadata.BlockMetaData; //导入方法依赖的package包/类
private static void checkSorted(List<BlockMetaData> rowGroupBlocks) {
long previousOffset = 0L;
for (BlockMetaData rowGroup : rowGroupBlocks) {
long currentOffset = rowGroup.getStartingPos();
if (currentOffset < previousOffset) {
throw new ParquetDecodingException("row groups are not sorted: previous row groups starts at " + previousOffset + ", current row group starts at " + currentOffset);
}
}
}