本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.CombineFileSplit.getPaths方法的典型用法代码示例。如果您正苦于以下问题:Java CombineFileSplit.getPaths方法的具体用法?Java CombineFileSplit.getPaths怎么用?Java CombineFileSplit.getPaths使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.input.CombineFileSplit
的用法示例。
在下文中一共展示了CombineFileSplit.getPaths方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: initialize
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit split, TaskAttemptContext context)
throws IOException {
Configuration conf = context.getConfiguration();
CombineFileSplit cSplit = (CombineFileSplit) split;
Path[] path = cSplit.getPaths();
long[] start = cSplit.getStartOffsets();
long[] len = cSplit.getLengths();
FileSystem fs = cSplit.getPath(0).getFileSystem(conf);
long startTS = conf.getLong(RowInputFormat.START_TIME_MILLIS, 0l);
long endTS = conf.getLong(RowInputFormat.END_TIME_MILLIS, 0l);
this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, startTS, endTS);
instantiateGfxdLoner(conf);
}
示例2: FileQueue
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
* @param split Description of input sources.
* @param conf Used to resolve FileSystem instances.
*/
public FileQueue(CombineFileSplit split, Configuration conf)
throws IOException {
this.conf = conf;
paths = split.getPaths();
startoffset = split.getStartOffsets();
lengths = split.getLengths();
nextSource();
}
示例3: initialize
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit split, TaskAttemptContext context)
throws IOException, InterruptedException {
CombineFileSplit cSplit = (CombineFileSplit) split;
Path[] path = cSplit.getPaths();
long[] start = cSplit.getStartOffsets();
long[] len = cSplit.getLengths();
Configuration conf = context.getConfiguration();
FileSystem fs = cSplit.getPath(0).getFileSystem(conf);
this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, 0l, 0l);
}
示例4: TotalTextRecordReader
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
* @param combineFileSplit 待处理的分片器
* @param context 保存任务和系统信息
* @param currentIndex 当前文件在Split中的索引
*/
public TotalTextRecordReader(CombineFileSplit combineFileSplit, TaskAttemptContext context, int currentIndex) {
this.combineFileSplit = combineFileSplit;
this.currentIndex = currentIndex;
this.configuration = context.getConfiguration();
this.totalLength = combineFileSplit.getPaths().length;
this.isFinished = false;
}
示例5: initialize
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
CombineFileSplit split = (CombineFileSplit)inputSplit;
paths = split.getPaths();
configuration = taskAttemptContext.getConfiguration();
id = -1;
}
示例6: getSplits
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
* get and combine all splits of .shp files
* @param job
* @return
* @throws IOException
*/
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
// get original combine split.
CombineFileSplit combineSplit = (CombineFileSplit)super.getSplits(job).get(0);
Path[] paths = combineSplit.getPaths();
// get indexes of all .shp file
List<Integer> shpIds = new ArrayList<>();
for(int i = 0;i < paths.length; ++i){
if(FilenameUtils.getExtension(paths[i].toString()).equals("shp")){
shpIds.add(i);
}
}
// prepare parameters for constructing new combine split
Path[] shpPaths = new Path[shpIds.size()];
long[] shpStarts = new long[shpIds.size()];
long[] shpLengths = new long[shpIds.size()];
for(int i = 0;i < shpIds.size(); ++i){
int id = shpIds.get(i);
shpPaths[i] = combineSplit.getPath(id);
shpStarts[i] = combineSplit.getOffset(id);
shpLengths[i] = combineSplit.getLength(id);
}
//combine all .shp splits as one split.
CombineFileSplit shpSplit = new CombineFileSplit(shpPaths, shpStarts, shpLengths, combineSplit.getLocations());
List<InputSplit> shpSplits = new ArrayList<>();
shpSplits.add(shpSplit);
return shpSplits;
}
示例7: ParserPump
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public ParserPump(CombineFileSplit split, TaskAttemptContext context) {
this.context = context;
this.paths = split.getPaths();
this.size = split.getLength();
this.skipInvalid = context.getConfiguration().getBoolean(SKIP_INVALID_PROPERTY, false);
}
示例8: initialize
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
* cut the combined split into FileSplit for .shp, .shx and .dbf
* @param split
* @param context
* @throws IOException
* @throws InterruptedException
*/
public void initialize(InputSplit split, TaskAttemptContext context)
throws IOException, InterruptedException
{
CombineFileSplit fileSplit = (CombineFileSplit)split;
Path[] paths = fileSplit.getPaths();
for(int i = 0;i < paths.length; ++i){
String suffix = FilenameUtils.getExtension(paths[i].toString());
if(suffix.equals(SHP_SUFFIX)) shpSplit = new FileSplit(paths[i], fileSplit.getOffset(i), fileSplit.getLength(i), fileSplit.getLocations());
else if(suffix.equals(SHX_SUFFIX)) shxSplit = new FileSplit(paths[i], fileSplit.getOffset(i), fileSplit.getLength(i), fileSplit.getLocations());
else if(suffix.equals(DBF_SUFFIX)) dbfSplit = new FileSplit(paths[i], fileSplit.getOffset(i), fileSplit.getLength(i), fileSplit.getLocations());
}
// if shape file doesn't exists, throw an IOException
if(shpSplit == null) throw new IOException("Can't find .shp file.");
else{
if(shxSplit != null){
// shape file exists, extract .shp with .shx
// first read all indexes into memory
Path filePath = shxSplit.getPath();
FileSystem fileSys = filePath.getFileSystem(context.getConfiguration());
FSDataInputStream shxInpuStream = fileSys.open(filePath);
shxInpuStream.skip(24);
int shxFileLength = shxInpuStream.readInt() * 2 - 100; // get length in bytes, exclude header
// skip following 72 bytes in header
shxInpuStream.skip(72);
byte[] bytes = new byte[shxFileLength];
// read all indexes into memory, skip first 50 bytes(header)
shxInpuStream.readFully(bytes, 0, bytes.length);
IntBuffer buffer = ByteBuffer.wrap(bytes).asIntBuffer();
int[] indexes = new int[shxFileLength / 4];
buffer.get(indexes);
shapeFileReader = new ShapeFileReader(indexes);
}else shapeFileReader = new ShapeFileReader(); // no index, construct with no parameter
shapeFileReader.initialize(shpSplit, context);
}
if(dbfSplit != null){
dbfFileReader = new DbfFileReader();
dbfFileReader.initialize(dbfSplit, context);
hasDbf = true;
}else hasDbf = false;
}