本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.CombineFileSplit.getLocations方法的典型用法代码示例。如果您正苦于以下问题:Java CombineFileSplit.getLocations方法的具体用法?Java CombineFileSplit.getLocations怎么用?Java CombineFileSplit.getLocations使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.input.CombineFileSplit
的用法示例。
在下文中一共展示了CombineFileSplit.getLocations方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: cleanSplits
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
* Set the number of locations in the split to SPLIT_MAX_NUM_LOCATIONS if it is larger than
* SPLIT_MAX_NUM_LOCATIONS (MAPREDUCE-5186).
*/
private List<InputSplit> cleanSplits(List<InputSplit> splits) throws IOException {
List<InputSplit> cleanedSplits = Lists.newArrayList();
for (int i = 0; i < splits.size(); i++) {
CombineFileSplit oldSplit = (CombineFileSplit) splits.get(i);
String[] locations = oldSplit.getLocations();
Preconditions.checkNotNull(locations, "CombineFileSplit.getLocations() returned null");
if (locations.length > SPLIT_MAX_NUM_LOCATIONS) {
locations = Arrays.copyOf(locations, SPLIT_MAX_NUM_LOCATIONS);
}
cleanedSplits
.add(new CombineFileSplit(oldSplit.getPaths(), oldSplit.getStartOffsets(), oldSplit.getLengths(), locations));
}
return cleanedSplits;
}
示例2: cleanSplits
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
* Set the number of locations in the split to SPLIT_MAX_NUM_LOCATIONS if it is larger than
* SPLIT_MAX_NUM_LOCATIONS (MAPREDUCE-5186).
*/
private static List<InputSplit> cleanSplits(List<InputSplit> splits) throws IOException {
if (VersionInfo.getVersion().compareTo("2.3.0") >= 0) {
// This issue was fixed in 2.3.0, if newer version, no need to clean up splits
return splits;
}
List<InputSplit> cleanedSplits = Lists.newArrayList();
for (int i = 0; i < splits.size(); i++) {
CombineFileSplit oldSplit = (CombineFileSplit) splits.get(i);
String[] locations = oldSplit.getLocations();
Preconditions.checkNotNull(locations, "CombineFileSplit.getLocations() returned null");
if (locations.length > SPLIT_MAX_NUM_LOCATIONS) {
locations = Arrays.copyOf(locations, SPLIT_MAX_NUM_LOCATIONS);
}
cleanedSplits.add(new CombineFileSplit(oldSplit.getPaths(), oldSplit.getStartOffsets(), oldSplit.getLengths(),
locations));
}
return cleanedSplits;
}
示例3: initialize
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit split, TaskAttemptContext context)
throws IOException, InterruptedException {
CombineFileSplit combineSplit = (CombineFileSplit)split;
Path path = combineSplit.getPath(splitIndex);
this.fileName = path.getName();
FileSplit fileSplit = new FileSplit(
path,
combineSplit.getOffset(splitIndex),
combineSplit.getLength(splitIndex),
combineSplit.getLocations());
delegate.initialize(fileSplit, context);
}
示例4: initialize
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
throws IOException, InterruptedException {
CombineFileSplit combineSplit = (CombineFileSplit)inputSplit;
FileSplit split = new FileSplit(
combineSplit.getPath(splitIndex),
combineSplit.getOffset(splitIndex),
combineSplit.getLength(splitIndex),
combineSplit.getLocations());
// Initialize with the single FileSplit for the current index
delegate.initialize(split, context);
}
示例5: getSplits
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
* get and combine all splits of .shp files
* @param job
* @return
* @throws IOException
*/
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
// get original combine split.
CombineFileSplit combineSplit = (CombineFileSplit)super.getSplits(job).get(0);
Path[] paths = combineSplit.getPaths();
// get indexes of all .shp file
List<Integer> shpIds = new ArrayList<>();
for(int i = 0;i < paths.length; ++i){
if(FilenameUtils.getExtension(paths[i].toString()).equals("shp")){
shpIds.add(i);
}
}
// prepare parameters for constructing new combine split
Path[] shpPaths = new Path[shpIds.size()];
long[] shpStarts = new long[shpIds.size()];
long[] shpLengths = new long[shpIds.size()];
for(int i = 0;i < shpIds.size(); ++i){
int id = shpIds.get(i);
shpPaths[i] = combineSplit.getPath(id);
shpStarts[i] = combineSplit.getOffset(id);
shpLengths[i] = combineSplit.getLength(id);
}
//combine all .shp splits as one split.
CombineFileSplit shpSplit = new CombineFileSplit(shpPaths, shpStarts, shpLengths, combineSplit.getLocations());
List<InputSplit> shpSplits = new ArrayList<>();
shpSplits.add(shpSplit);
return shpSplits;
}
示例6: SequenceFileRecordReaderWrapper
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@SuppressWarnings("unused")
public SequenceFileRecordReaderWrapper(CombineFileSplit split, TaskAttemptContext context, Integer index)
throws IOException {
fileSplit = new FileSplit(split.getPath(index), split.getOffset(index), split.getLength(index),
split.getLocations());
delegate = new SequenceFileInputFormat<Writable, Text>().createRecordReader(fileSplit, context);
}
示例7: initialize
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
* cut the combined split into FileSplit for .shp, .shx and .dbf
* @param split
* @param context
* @throws IOException
* @throws InterruptedException
*/
public void initialize(InputSplit split, TaskAttemptContext context)
throws IOException, InterruptedException
{
CombineFileSplit fileSplit = (CombineFileSplit)split;
Path[] paths = fileSplit.getPaths();
for(int i = 0;i < paths.length; ++i){
String suffix = FilenameUtils.getExtension(paths[i].toString());
if(suffix.equals(SHP_SUFFIX)) shpSplit = new FileSplit(paths[i], fileSplit.getOffset(i), fileSplit.getLength(i), fileSplit.getLocations());
else if(suffix.equals(SHX_SUFFIX)) shxSplit = new FileSplit(paths[i], fileSplit.getOffset(i), fileSplit.getLength(i), fileSplit.getLocations());
else if(suffix.equals(DBF_SUFFIX)) dbfSplit = new FileSplit(paths[i], fileSplit.getOffset(i), fileSplit.getLength(i), fileSplit.getLocations());
}
// if shape file doesn't exists, throw an IOException
if(shpSplit == null) throw new IOException("Can't find .shp file.");
else{
if(shxSplit != null){
// shape file exists, extract .shp with .shx
// first read all indexes into memory
Path filePath = shxSplit.getPath();
FileSystem fileSys = filePath.getFileSystem(context.getConfiguration());
FSDataInputStream shxInpuStream = fileSys.open(filePath);
shxInpuStream.skip(24);
int shxFileLength = shxInpuStream.readInt() * 2 - 100; // get length in bytes, exclude header
// skip following 72 bytes in header
shxInpuStream.skip(72);
byte[] bytes = new byte[shxFileLength];
// read all indexes into memory, skip first 50 bytes(header)
shxInpuStream.readFully(bytes, 0, bytes.length);
IntBuffer buffer = ByteBuffer.wrap(bytes).asIntBuffer();
int[] indexes = new int[shxFileLength / 4];
buffer.get(indexes);
shapeFileReader = new ShapeFileReader(indexes);
}else shapeFileReader = new ShapeFileReader(); // no index, construct with no parameter
shapeFileReader.initialize(shpSplit, context);
}
if(dbfSplit != null){
dbfFileReader = new DbfFileReader();
dbfFileReader.initialize(dbfSplit, context);
hasDbf = true;
}else hasDbf = false;
}