Java CombineFileSplit.getPaths方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.CombineFileSplit.getPaths方法的典型用法代码示例。如果您正苦于以下问题：Java CombineFileSplit.getPaths方法的具体用法？Java CombineFileSplit.getPaths怎么用？Java CombineFileSplit.getPaths使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.input.CombineFileSplit的用法示例。

在下文中一共展示了CombineFileSplit.getPaths方法的8个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: initialize

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit split, TaskAttemptContext context)
    throws IOException {
  Configuration conf = context.getConfiguration();
  CombineFileSplit cSplit =  (CombineFileSplit) split;
  Path[] path = cSplit.getPaths();
  long[] start = cSplit.getStartOffsets();
  long[] len = cSplit.getLengths();
  
  FileSystem fs = cSplit.getPath(0).getFileSystem(conf);
  
  long startTS = conf.getLong(RowInputFormat.START_TIME_MILLIS, 0l);
  long endTS = conf.getLong(RowInputFormat.END_TIME_MILLIS, 0l);
  this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, startTS, endTS);

  instantiateGfxdLoner(conf);
}

开发者ID:gemxd，项目名称:gemfirexd-oss，代码行数:18，代码来源:RowRecordReader.java

示例2: FileQueue

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
 * @param split Description of input sources.
 * @param conf Used to resolve FileSystem instances.
 */
public FileQueue(CombineFileSplit split, Configuration conf)
    throws IOException {
  this.conf = conf;
  paths = split.getPaths();
  startoffset = split.getStartOffsets();
  lengths = split.getLengths();
  nextSource();
}

开发者ID:naver，项目名称:hadoop，代码行数:13，代码来源:FileQueue.java

示例3: initialize

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit split, TaskAttemptContext context)
throws IOException, InterruptedException {
  CombineFileSplit cSplit = (CombineFileSplit) split;
  Path[] path = cSplit.getPaths();
  long[] start = cSplit.getStartOffsets();
  long[] len = cSplit.getLengths();

  Configuration conf = context.getConfiguration();
  FileSystem fs = cSplit.getPath(0).getFileSystem(conf);
  
  this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, 0l, 0l);
}

开发者ID:gemxd，项目名称:gemfirexd-oss，代码行数:14，代码来源:AbstractGFRecordReader.java

示例4: TotalTextRecordReader

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
 * @param combineFileSplit 待处理的分片器
 * @param context          保存任务和系统信息
 * @param currentIndex     当前文件在Split中的索引
 */
public TotalTextRecordReader(CombineFileSplit combineFileSplit, TaskAttemptContext context, int currentIndex) {
    this.combineFileSplit = combineFileSplit;
    this.currentIndex = currentIndex;
    this.configuration = context.getConfiguration();
    this.totalLength = combineFileSplit.getPaths().length;
    this.isFinished = false;
}

开发者ID:Hope6537，项目名称:hope-tactical-equipment，代码行数:13，代码来源:TotalTextRecordReader.java

示例5: initialize

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
    CombineFileSplit split = (CombineFileSplit)inputSplit;
    paths = split.getPaths();
    configuration = taskAttemptContext.getConfiguration();
    id = -1;
}

开发者ID:DataSystemsLab，项目名称:GeoSpark，代码行数:8，代码来源:BoundaryRecordReader.java

示例6: getSplits

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
 * get and combine all splits of .shp files
 * @param job
 * @return
 * @throws IOException
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    // get original combine split.
    CombineFileSplit combineSplit = (CombineFileSplit)super.getSplits(job).get(0);
    Path[] paths = combineSplit.getPaths();

    // get indexes of all .shp file
    List<Integer> shpIds = new ArrayList<>();
    for(int i = 0;i < paths.length; ++i){
        if(FilenameUtils.getExtension(paths[i].toString()).equals("shp")){
            shpIds.add(i);
        }
    }

    // prepare parameters for constructing new combine split
    Path[] shpPaths = new Path[shpIds.size()];
    long[] shpStarts = new long[shpIds.size()];
    long[] shpLengths = new long[shpIds.size()];

    for(int i = 0;i < shpIds.size(); ++i){
        int id = shpIds.get(i);
        shpPaths[i] = combineSplit.getPath(id);
        shpStarts[i] = combineSplit.getOffset(id);
        shpLengths[i] = combineSplit.getLength(id);
    }

    //combine all .shp splits as one split.
    CombineFileSplit shpSplit = new CombineFileSplit(shpPaths, shpStarts, shpLengths, combineSplit.getLocations());
    List<InputSplit> shpSplits = new ArrayList<>();
    shpSplits.add(shpSplit);
    return shpSplits;
}

开发者ID:DataSystemsLab，项目名称:GeoSpark，代码行数:39，代码来源:BoundaryInputFormat.java

示例7: ParserPump

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public ParserPump(CombineFileSplit split, TaskAttemptContext context) {
    this.context = context;
    this.paths = split.getPaths();
    this.size = split.getLength();
    this.skipInvalid = context.getConfiguration().getBoolean(SKIP_INVALID_PROPERTY, false);
}

开发者ID:Merck，项目名称:Halyard，代码行数:7，代码来源:HalyardBulkLoad.java

示例8: initialize

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
 * cut the combined split into FileSplit for .shp, .shx and .dbf
 * @param split
 * @param context
 * @throws IOException
 * @throws InterruptedException
 */
public void initialize(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException
{
    CombineFileSplit fileSplit = (CombineFileSplit)split;
    Path[] paths = fileSplit.getPaths();
    for(int i = 0;i < paths.length; ++i){
        String suffix = FilenameUtils.getExtension(paths[i].toString());
        if(suffix.equals(SHP_SUFFIX)) shpSplit = new FileSplit(paths[i], fileSplit.getOffset(i), fileSplit.getLength(i), fileSplit.getLocations());
        else if(suffix.equals(SHX_SUFFIX)) shxSplit = new FileSplit(paths[i], fileSplit.getOffset(i), fileSplit.getLength(i), fileSplit.getLocations());
        else if(suffix.equals(DBF_SUFFIX)) dbfSplit = new FileSplit(paths[i], fileSplit.getOffset(i), fileSplit.getLength(i), fileSplit.getLocations());
    }
    // if shape file doesn't exists, throw an IOException
    if(shpSplit == null) throw new IOException("Can't find .shp file.");
    else{
        if(shxSplit != null){
            // shape file exists, extract .shp with .shx
            // first read all indexes into memory
            Path filePath = shxSplit.getPath();
            FileSystem fileSys = filePath.getFileSystem(context.getConfiguration());
            FSDataInputStream shxInpuStream = fileSys.open(filePath);
            shxInpuStream.skip(24);
            int shxFileLength = shxInpuStream.readInt() * 2 - 100; // get length in bytes, exclude header
            // skip following 72 bytes in header
            shxInpuStream.skip(72);
            byte[] bytes = new byte[shxFileLength];
            // read all indexes into memory, skip first 50 bytes(header)
            shxInpuStream.readFully(bytes, 0, bytes.length);
            IntBuffer buffer = ByteBuffer.wrap(bytes).asIntBuffer();
            int[] indexes = new int[shxFileLength / 4];
            buffer.get(indexes);
            shapeFileReader = new ShapeFileReader(indexes);
        }else shapeFileReader = new ShapeFileReader(); // no index, construct with no parameter
        shapeFileReader.initialize(shpSplit, context);
    }
    if(dbfSplit != null){
        dbfFileReader = new DbfFileReader();
        dbfFileReader.initialize(dbfSplit, context);
        hasDbf = true;
    }else hasDbf = false;

}

开发者ID:DataSystemsLab，项目名称:GeoSpark，代码行数:49，代码来源:CombineShapeReader.java

注：本文中的org.apache.hadoop.mapreduce.lib.input.CombineFileSplit.getPaths方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。