本文整理汇总了Java中org.apache.parquet.hadoop.ParquetInputSplit类的典型用法代码示例。如果您正苦于以下问题:Java ParquetInputSplit类的具体用法?Java ParquetInputSplit怎么用?Java ParquetInputSplit使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
ParquetInputSplit类属于org.apache.parquet.hadoop包,在下文中一共展示了ParquetInputSplit类的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getSplits
import org.apache.parquet.hadoop.ParquetInputSplit; //导入依赖的package包/类
@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
if (isTaskSideMetaData(job)) {
return super.getSplits(job, numSplits);
}
List<Footer> footers = getFooters(job);
List<ParquetInputSplit> splits = realInputFormat.getSplits(job, footers);
if (splits == null) {
return null;
}
InputSplit[] resultSplits = new InputSplit[splits.size()];
int i = 0;
for (ParquetInputSplit split : splits) {
resultSplits[i++] = new ParquetInputSplitWrapper(split);
}
return resultSplits;
}
示例2: getSplit
import org.apache.parquet.hadoop.ParquetInputSplit; //导入依赖的package包/类
/**
* gets a ParquetInputSplit corresponding to a split given by Hive
*
* @param oldSplit The split given by Hive
* @param conf The JobConf of the Hive job
* @return a ParquetInputSplit corresponding to the oldSplit
* @throws IOException if the config cannot be enhanced or if the footer cannot be read from the file
*/
protected ParquetInputSplit getSplit(
final InputSplit oldSplit,
final JobConf conf
) throws IOException {
if (oldSplit instanceof FileSplit) {
FileSplit fileSplit = (FileSplit) oldSplit;
final long splitStart = fileSplit.getStart();
final long splitLength = fileSplit.getLength();
final Path finalPath = fileSplit.getPath();
final JobConf cloneJob = hiveBinding.pushProjectionsAndFilters(conf, finalPath.getParent());
final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(cloneJob, finalPath, SKIP_ROW_GROUPS);
final FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
final ReadContext readContext =
new DataWritableReadSupport()
.init(cloneJob, fileMetaData.getKeyValueMetaData(), fileMetaData.getSchema());
schemaSize = MessageTypeParser.parseMessageType(
readContext.getReadSupportMetadata().get(DataWritableReadSupport.HIVE_SCHEMA_KEY)
).getFieldCount();
return new ParquetInputSplit(
finalPath,
splitStart,
splitStart + splitLength,
splitLength,
fileSplit.getLocations(),
null);
} else {
throw new IllegalArgumentException("Unknown split type: " + oldSplit);
}
}
示例3: ParquetInputSplitWrapper
import org.apache.parquet.hadoop.ParquetInputSplit; //导入依赖的package包/类
public ParquetInputSplitWrapper(ParquetInputSplit realSplit) {
this.realSplit = realSplit;
}
示例4: readFields
import org.apache.parquet.hadoop.ParquetInputSplit; //导入依赖的package包/类
@Override
public void readFields(DataInput in) throws IOException {
realSplit = new ParquetInputSplit();
realSplit.readFields(in);
}
示例5: ParquetRecordReaderWrapper
import org.apache.parquet.hadoop.ParquetInputSplit; //导入依赖的package包/类
public ParquetRecordReaderWrapper(
final ParquetInputFormat<ArrayWritable> newInputFormat,
final InputSplit oldSplit,
final JobConf oldJobConf,
final Reporter reporter,
final HiveBinding hiveBinding)
throws IOException, InterruptedException {
this.splitLen = oldSplit.getLength();
this.hiveBinding = hiveBinding;
final ParquetInputSplit split = getSplit(oldSplit, oldJobConf);
TaskAttemptID taskAttemptID = TaskAttemptID.forName(oldJobConf.get(IOConstants.MAPRED_TASK_ID));
if (taskAttemptID == null) {
taskAttemptID = new TaskAttemptID();
}
// create a TaskInputOutputContext
final TaskAttemptContext taskContext = ContextUtil.newTaskAttemptContext(oldJobConf, taskAttemptID);
if (split != null) {
try {
realReader = newInputFormat.createRecordReader(split, taskContext);
realReader.initialize(split, taskContext);
// read once to gain access to key and value objects
if (realReader.nextKeyValue()) {
firstRecord = true;
valueObj = realReader.getCurrentValue();
} else {
eof = true;
}
} catch (final InterruptedException e) {
throw new IOException(e);
}
} else {
realReader = null;
eof = true;
}
if (valueObj == null) { // Should initialize the value for createValue
valueObj = new ArrayWritable(Writable.class, new Writable[schemaSize]);
}
}
示例6: readFields
import org.apache.parquet.hadoop.ParquetInputSplit; //导入依赖的package包/类
@Override
public void readFields(DataInput in) throws IOException {
realSplit = new ParquetInputSplit();
realSplit.readFields(in);
}