本文整理匯總了Java中org.apache.parquet.hadoop.ParquetInputSplit類的典型用法代碼示例。如果您正苦於以下問題:Java ParquetInputSplit類的具體用法?Java ParquetInputSplit怎麽用?Java ParquetInputSplit使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
ParquetInputSplit類屬於org.apache.parquet.hadoop包,在下文中一共展示了ParquetInputSplit類的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: getSplits
import org.apache.parquet.hadoop.ParquetInputSplit; //導入依賴的package包/類
@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
if (isTaskSideMetaData(job)) {
return super.getSplits(job, numSplits);
}
List<Footer> footers = getFooters(job);
List<ParquetInputSplit> splits = realInputFormat.getSplits(job, footers);
if (splits == null) {
return null;
}
InputSplit[] resultSplits = new InputSplit[splits.size()];
int i = 0;
for (ParquetInputSplit split : splits) {
resultSplits[i++] = new ParquetInputSplitWrapper(split);
}
return resultSplits;
}
示例2: getSplit
import org.apache.parquet.hadoop.ParquetInputSplit; //導入依賴的package包/類
/**
* gets a ParquetInputSplit corresponding to a split given by Hive
*
* @param oldSplit The split given by Hive
* @param conf The JobConf of the Hive job
* @return a ParquetInputSplit corresponding to the oldSplit
* @throws IOException if the config cannot be enhanced or if the footer cannot be read from the file
*/
protected ParquetInputSplit getSplit(
final InputSplit oldSplit,
final JobConf conf
) throws IOException {
if (oldSplit instanceof FileSplit) {
FileSplit fileSplit = (FileSplit) oldSplit;
final long splitStart = fileSplit.getStart();
final long splitLength = fileSplit.getLength();
final Path finalPath = fileSplit.getPath();
final JobConf cloneJob = hiveBinding.pushProjectionsAndFilters(conf, finalPath.getParent());
final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(cloneJob, finalPath, SKIP_ROW_GROUPS);
final FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
final ReadContext readContext =
new DataWritableReadSupport()
.init(cloneJob, fileMetaData.getKeyValueMetaData(), fileMetaData.getSchema());
schemaSize = MessageTypeParser.parseMessageType(
readContext.getReadSupportMetadata().get(DataWritableReadSupport.HIVE_SCHEMA_KEY)
).getFieldCount();
return new ParquetInputSplit(
finalPath,
splitStart,
splitStart + splitLength,
splitLength,
fileSplit.getLocations(),
null);
} else {
throw new IllegalArgumentException("Unknown split type: " + oldSplit);
}
}
示例3: ParquetInputSplitWrapper
import org.apache.parquet.hadoop.ParquetInputSplit; //導入依賴的package包/類
public ParquetInputSplitWrapper(ParquetInputSplit realSplit) {
this.realSplit = realSplit;
}
示例4: readFields
import org.apache.parquet.hadoop.ParquetInputSplit; //導入依賴的package包/類
@Override
public void readFields(DataInput in) throws IOException {
realSplit = new ParquetInputSplit();
realSplit.readFields(in);
}
示例5: ParquetRecordReaderWrapper
import org.apache.parquet.hadoop.ParquetInputSplit; //導入依賴的package包/類
public ParquetRecordReaderWrapper(
final ParquetInputFormat<ArrayWritable> newInputFormat,
final InputSplit oldSplit,
final JobConf oldJobConf,
final Reporter reporter,
final HiveBinding hiveBinding)
throws IOException, InterruptedException {
this.splitLen = oldSplit.getLength();
this.hiveBinding = hiveBinding;
final ParquetInputSplit split = getSplit(oldSplit, oldJobConf);
TaskAttemptID taskAttemptID = TaskAttemptID.forName(oldJobConf.get(IOConstants.MAPRED_TASK_ID));
if (taskAttemptID == null) {
taskAttemptID = new TaskAttemptID();
}
// create a TaskInputOutputContext
final TaskAttemptContext taskContext = ContextUtil.newTaskAttemptContext(oldJobConf, taskAttemptID);
if (split != null) {
try {
realReader = newInputFormat.createRecordReader(split, taskContext);
realReader.initialize(split, taskContext);
// read once to gain access to key and value objects
if (realReader.nextKeyValue()) {
firstRecord = true;
valueObj = realReader.getCurrentValue();
} else {
eof = true;
}
} catch (final InterruptedException e) {
throw new IOException(e);
}
} else {
realReader = null;
eof = true;
}
if (valueObj == null) { // Should initialize the value for createValue
valueObj = new ArrayWritable(Writable.class, new Writable[schemaSize]);
}
}
示例6: readFields
import org.apache.parquet.hadoop.ParquetInputSplit; //導入依賴的package包/類
@Override
public void readFields(DataInput in) throws IOException {
realSplit = new ParquetInputSplit();
realSplit.readFields(in);
}