当前位置: 首页>>代码示例>>Java>>正文


Java CombineFileSplit.getLocations方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.CombineFileSplit.getLocations方法的典型用法代码示例。如果您正苦于以下问题:Java CombineFileSplit.getLocations方法的具体用法?Java CombineFileSplit.getLocations怎么用?Java CombineFileSplit.getLocations使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.mapreduce.lib.input.CombineFileSplit的用法示例。


在下文中一共展示了CombineFileSplit.getLocations方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: cleanSplits

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
 * Set the number of locations in the split to SPLIT_MAX_NUM_LOCATIONS if it is larger than
 * SPLIT_MAX_NUM_LOCATIONS (MAPREDUCE-5186).
 */
private List<InputSplit> cleanSplits(List<InputSplit> splits) throws IOException {
  List<InputSplit> cleanedSplits = Lists.newArrayList();

  for (int i = 0; i < splits.size(); i++) {
    CombineFileSplit oldSplit = (CombineFileSplit) splits.get(i);
    String[] locations = oldSplit.getLocations();

    Preconditions.checkNotNull(locations, "CombineFileSplit.getLocations() returned null");

    if (locations.length > SPLIT_MAX_NUM_LOCATIONS) {
      locations = Arrays.copyOf(locations, SPLIT_MAX_NUM_LOCATIONS);
    }

    cleanedSplits
        .add(new CombineFileSplit(oldSplit.getPaths(), oldSplit.getStartOffsets(), oldSplit.getLengths(), locations));
  }
  return cleanedSplits;
}
 
开发者ID:Hanmourang,项目名称:Gobblin,代码行数:23,代码来源:AvroKeyRecursiveCombineFileInputFormat.java

示例2: cleanSplits

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
 * Set the number of locations in the split to SPLIT_MAX_NUM_LOCATIONS if it is larger than
 * SPLIT_MAX_NUM_LOCATIONS (MAPREDUCE-5186).
 */
private static List<InputSplit> cleanSplits(List<InputSplit> splits) throws IOException {
  if (VersionInfo.getVersion().compareTo("2.3.0") >= 0) {
    // This issue was fixed in 2.3.0, if newer version, no need to clean up splits
    return splits;
  }

  List<InputSplit> cleanedSplits = Lists.newArrayList();

  for (int i = 0; i < splits.size(); i++) {
    CombineFileSplit oldSplit = (CombineFileSplit) splits.get(i);
    String[] locations = oldSplit.getLocations();

    Preconditions.checkNotNull(locations, "CombineFileSplit.getLocations() returned null");

    if (locations.length > SPLIT_MAX_NUM_LOCATIONS) {
      locations = Arrays.copyOf(locations, SPLIT_MAX_NUM_LOCATIONS);
    }

    cleanedSplits.add(new CombineFileSplit(oldSplit.getPaths(), oldSplit.getStartOffsets(), oldSplit.getLengths(),
        locations));
  }
  return cleanedSplits;
}
 
开发者ID:apache,项目名称:incubator-gobblin,代码行数:28,代码来源:AvroKeyRecursiveCombineFileInputFormat.java

示例3: initialize

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit split, TaskAttemptContext context)
		throws IOException, InterruptedException {
	CombineFileSplit combineSplit = (CombineFileSplit)split;
	Path path = combineSplit.getPath(splitIndex);
	this.fileName = path.getName();
	FileSplit fileSplit = new FileSplit(
			path,
			combineSplit.getOffset(splitIndex),
			combineSplit.getLength(splitIndex),
			combineSplit.getLocations());
	delegate.initialize(fileSplit, context);

}
 
开发者ID:conversant,项目名称:mara,代码行数:15,代码来源:CombineTextFileInputFormat.java

示例4: initialize

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
		throws IOException, InterruptedException {
	CombineFileSplit combineSplit = (CombineFileSplit)inputSplit;
	FileSplit split = new FileSplit(
			combineSplit.getPath(splitIndex),
			combineSplit.getOffset(splitIndex),
			combineSplit.getLength(splitIndex),
			combineSplit.getLocations());
	// Initialize with the single FileSplit for the current index
	delegate.initialize(split, context);
}
 
开发者ID:conversant,项目名称:mara,代码行数:13,代码来源:CombineAvroKeyFileInputFormat.java

示例5: getSplits

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
 * get and combine all splits of .shp files
 * @param job
 * @return
 * @throws IOException
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    // get original combine split.
    CombineFileSplit combineSplit = (CombineFileSplit)super.getSplits(job).get(0);
    Path[] paths = combineSplit.getPaths();

    // get indexes of all .shp file
    List<Integer> shpIds = new ArrayList<>();
    for(int i = 0;i < paths.length; ++i){
        if(FilenameUtils.getExtension(paths[i].toString()).equals("shp")){
            shpIds.add(i);
        }
    }

    // prepare parameters for constructing new combine split
    Path[] shpPaths = new Path[shpIds.size()];
    long[] shpStarts = new long[shpIds.size()];
    long[] shpLengths = new long[shpIds.size()];

    for(int i = 0;i < shpIds.size(); ++i){
        int id = shpIds.get(i);
        shpPaths[i] = combineSplit.getPath(id);
        shpStarts[i] = combineSplit.getOffset(id);
        shpLengths[i] = combineSplit.getLength(id);
    }

    //combine all .shp splits as one split.
    CombineFileSplit shpSplit = new CombineFileSplit(shpPaths, shpStarts, shpLengths, combineSplit.getLocations());
    List<InputSplit> shpSplits = new ArrayList<>();
    shpSplits.add(shpSplit);
    return shpSplits;
}
 
开发者ID:DataSystemsLab,项目名称:GeoSpark,代码行数:39,代码来源:BoundaryInputFormat.java

示例6: SequenceFileRecordReaderWrapper

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@SuppressWarnings("unused")
public SequenceFileRecordReaderWrapper(CombineFileSplit split, TaskAttemptContext context, Integer index)
    throws IOException {
  fileSplit = new FileSplit(split.getPath(index), split.getOffset(index), split.getLength(index),
      split.getLocations());
  delegate = new SequenceFileInputFormat<Writable, Text>().createRecordReader(fileSplit, context);
}
 
开发者ID:apache,项目名称:incubator-blur,代码行数:8,代码来源:CsvBlurDriver.java

示例7: initialize

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
 * cut the combined split into FileSplit for .shp, .shx and .dbf
 * @param split
 * @param context
 * @throws IOException
 * @throws InterruptedException
 */
public void initialize(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException
{
    CombineFileSplit fileSplit = (CombineFileSplit)split;
    Path[] paths = fileSplit.getPaths();
    for(int i = 0;i < paths.length; ++i){
        String suffix = FilenameUtils.getExtension(paths[i].toString());
        if(suffix.equals(SHP_SUFFIX)) shpSplit = new FileSplit(paths[i], fileSplit.getOffset(i), fileSplit.getLength(i), fileSplit.getLocations());
        else if(suffix.equals(SHX_SUFFIX)) shxSplit = new FileSplit(paths[i], fileSplit.getOffset(i), fileSplit.getLength(i), fileSplit.getLocations());
        else if(suffix.equals(DBF_SUFFIX)) dbfSplit = new FileSplit(paths[i], fileSplit.getOffset(i), fileSplit.getLength(i), fileSplit.getLocations());
    }
    // if shape file doesn't exists, throw an IOException
    if(shpSplit == null) throw new IOException("Can't find .shp file.");
    else{
        if(shxSplit != null){
            // shape file exists, extract .shp with .shx
            // first read all indexes into memory
            Path filePath = shxSplit.getPath();
            FileSystem fileSys = filePath.getFileSystem(context.getConfiguration());
            FSDataInputStream shxInpuStream = fileSys.open(filePath);
            shxInpuStream.skip(24);
            int shxFileLength = shxInpuStream.readInt() * 2 - 100; // get length in bytes, exclude header
            // skip following 72 bytes in header
            shxInpuStream.skip(72);
            byte[] bytes = new byte[shxFileLength];
            // read all indexes into memory, skip first 50 bytes(header)
            shxInpuStream.readFully(bytes, 0, bytes.length);
            IntBuffer buffer = ByteBuffer.wrap(bytes).asIntBuffer();
            int[] indexes = new int[shxFileLength / 4];
            buffer.get(indexes);
            shapeFileReader = new ShapeFileReader(indexes);
        }else shapeFileReader = new ShapeFileReader(); // no index, construct with no parameter
        shapeFileReader.initialize(shpSplit, context);
    }
    if(dbfSplit != null){
        dbfFileReader = new DbfFileReader();
        dbfFileReader.initialize(dbfSplit, context);
        hasDbf = true;
    }else hasDbf = false;

}
 
开发者ID:DataSystemsLab,项目名称:GeoSpark,代码行数:49,代码来源:CombineShapeReader.java


注:本文中的org.apache.hadoop.mapreduce.lib.input.CombineFileSplit.getLocations方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。