Java CombineFileSplit.getOffset方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.CombineFileSplit.getOffset方法的典型用法代码示例。如果您正苦于以下问题：Java CombineFileSplit.getOffset方法的具体用法？Java CombineFileSplit.getOffset怎么用？Java CombineFileSplit.getOffset使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.input.CombineFileSplit的用法示例。

在下文中一共展示了CombineFileSplit.getOffset方法的9个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: CombineFileLineRecordReader

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public CombineFileLineRecordReader(CombineFileSplit split,
    TaskAttemptContext context, Integer index) throws IOException {
  
  this.path = split.getPath(index);
  fs = this.path.getFileSystem(context.getConfiguration());
  this.startOffset = split.getOffset(index);
  this.end = startOffset + split.getLength(index);
  boolean skipFirstLine = false;
  
  //open the file
  fileIn = fs.open(path);
  if (startOffset != 0) {
    skipFirstLine = true;
    --startOffset;
    fileIn.seek(startOffset);
  }
  reader = new LineReader(fileIn);
  if (skipFirstLine) {  // skip first line and re-establish "startOffset".
    startOffset += reader.readLine(new Text(), 0,
                (int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
  }
  this.pos = startOffset;
}

开发者ID:naver，项目名称:hadoop，代码行数:24，代码来源:MultiFileWordCount.java

示例2: CombineFileLineRecordReader

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public CombineFileLineRecordReader(CombineFileSplit split,
    TaskAttemptContext context, Integer index) throws IOException {
  
  fs = FileSystem.get(context.getConfiguration());
  this.path = split.getPath(index);
  this.startOffset = split.getOffset(index);
  this.end = startOffset + split.getLength(index);
  boolean skipFirstLine = false;
  
  //open the file
  fileIn = fs.open(path);
  if (startOffset != 0) {
    skipFirstLine = true;
    --startOffset;
    fileIn.seek(startOffset);
  }
  reader = new LineReader(fileIn);
  if (skipFirstLine) {  // skip first line and re-establish "startOffset".
    startOffset += reader.readLine(new Text(), 0,
                (int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
  }
  this.pos = startOffset;
}

开发者ID:Nextzero，项目名称:hadoop-2.6.0-cdh5.4.3，代码行数:24，代码来源:MultiFileWordCount.java

示例3: CombineFileLineRecordReader

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public CombineFileLineRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException 
{
	fs = FileSystem.get(context.getConfiguration());
	this.path = split.getPath(index);
	this.startOffset = split.getOffset(index);
	this.end = startOffset + split.getLength(index);
	boolean skipFirstLine = false;

	fileIn = fs.open(path); // open the file
	if (startOffset != 0) {
		skipFirstLine = true;
		--startOffset;
		fileIn.seek(startOffset);
	}
	reader = new LineReader(fileIn);
	if (skipFirstLine) // skip first line and re-establish "startOffset".
	{
		int readNum = reader.readLine(new Text(),0,(int) Math.min((long) Integer.MAX_VALUE, end - startOffset));
		startOffset += readNum;
	}
	this.pos = startOffset;
}

开发者ID:willddy，项目名称:bigdata_pattern，代码行数:23，代码来源:CombineFileLineRecordReader.java

示例4: initialize

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit split, TaskAttemptContext context)
		throws IOException, InterruptedException {
	CombineFileSplit combineSplit = (CombineFileSplit)split;
	Path path = combineSplit.getPath(splitIndex);
	this.fileName = path.getName();
	FileSplit fileSplit = new FileSplit(
			path,
			combineSplit.getOffset(splitIndex),
			combineSplit.getLength(splitIndex),
			combineSplit.getLocations());
	delegate.initialize(fileSplit, context);

}

开发者ID:conversant，项目名称:mara，代码行数:15，代码来源:CombineTextFileInputFormat.java

示例5: initialize

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
		throws IOException, InterruptedException {
	CombineFileSplit combineSplit = (CombineFileSplit)inputSplit;
	FileSplit split = new FileSplit(
			combineSplit.getPath(splitIndex),
			combineSplit.getOffset(splitIndex),
			combineSplit.getLength(splitIndex),
			combineSplit.getLocations());
	// Initialize with the single FileSplit for the current index
	delegate.initialize(split, context);
}

开发者ID:conversant，项目名称:mara，代码行数:13，代码来源:CombineAvroKeyFileInputFormat.java

示例6: getSplits

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
 * get and combine all splits of .shp files
 * @param job
 * @return
 * @throws IOException
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    // get original combine split.
    CombineFileSplit combineSplit = (CombineFileSplit)super.getSplits(job).get(0);
    Path[] paths = combineSplit.getPaths();

    // get indexes of all .shp file
    List<Integer> shpIds = new ArrayList<>();
    for(int i = 0;i < paths.length; ++i){
        if(FilenameUtils.getExtension(paths[i].toString()).equals("shp")){
            shpIds.add(i);
        }
    }

    // prepare parameters for constructing new combine split
    Path[] shpPaths = new Path[shpIds.size()];
    long[] shpStarts = new long[shpIds.size()];
    long[] shpLengths = new long[shpIds.size()];

    for(int i = 0;i < shpIds.size(); ++i){
        int id = shpIds.get(i);
        shpPaths[i] = combineSplit.getPath(id);
        shpStarts[i] = combineSplit.getOffset(id);
        shpLengths[i] = combineSplit.getLength(id);
    }

    //combine all .shp splits as one split.
    CombineFileSplit shpSplit = new CombineFileSplit(shpPaths, shpStarts, shpLengths, combineSplit.getLocations());
    List<InputSplit> shpSplits = new ArrayList<>();
    shpSplits.add(shpSplit);
    return shpSplits;
}

开发者ID:DataSystemsLab，项目名称:GeoSpark，代码行数:39，代码来源:BoundaryInputFormat.java

示例7: SequenceFileRecordReaderWrapper

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@SuppressWarnings("unused")
public SequenceFileRecordReaderWrapper(CombineFileSplit split, TaskAttemptContext context, Integer index)
    throws IOException {
  fileSplit = new FileSplit(split.getPath(index), split.getOffset(index), split.getLength(index),
      split.getLocations());
  delegate = new SequenceFileInputFormat<Writable, Text>().createRecordReader(fileSplit, context);
}

开发者ID:apache，项目名称:incubator-blur，代码行数:8，代码来源:CsvBlurDriver.java

示例8: CFRecordReader

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public CFRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException{
  this.path = split.getPath(index);
  fs = this.path.getFileSystem(context.getConfiguration());
  this.startOffset = split.getOffset(index);
  this.end = startOffset + split.getLength(index);

  fileIn = fs.open(path);
  reader = new LineReader(fileIn);
  this.pos = startOffset;
}

开发者ID:dryman，项目名称:Hadoop-CombineFileInputFormat，代码行数:11，代码来源:CFRecordReader.java

示例9: initialize

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
 * cut the combined split into FileSplit for .shp, .shx and .dbf
 * @param split
 * @param context
 * @throws IOException
 * @throws InterruptedException
 */
public void initialize(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException
{
    CombineFileSplit fileSplit = (CombineFileSplit)split;
    Path[] paths = fileSplit.getPaths();
    for(int i = 0;i < paths.length; ++i){
        String suffix = FilenameUtils.getExtension(paths[i].toString());
        if(suffix.equals(SHP_SUFFIX)) shpSplit = new FileSplit(paths[i], fileSplit.getOffset(i), fileSplit.getLength(i), fileSplit.getLocations());
        else if(suffix.equals(SHX_SUFFIX)) shxSplit = new FileSplit(paths[i], fileSplit.getOffset(i), fileSplit.getLength(i), fileSplit.getLocations());
        else if(suffix.equals(DBF_SUFFIX)) dbfSplit = new FileSplit(paths[i], fileSplit.getOffset(i), fileSplit.getLength(i), fileSplit.getLocations());
    }
    // if shape file doesn't exists, throw an IOException
    if(shpSplit == null) throw new IOException("Can't find .shp file.");
    else{
        if(shxSplit != null){
            // shape file exists, extract .shp with .shx
            // first read all indexes into memory
            Path filePath = shxSplit.getPath();
            FileSystem fileSys = filePath.getFileSystem(context.getConfiguration());
            FSDataInputStream shxInpuStream = fileSys.open(filePath);
            shxInpuStream.skip(24);
            int shxFileLength = shxInpuStream.readInt() * 2 - 100; // get length in bytes, exclude header
            // skip following 72 bytes in header
            shxInpuStream.skip(72);
            byte[] bytes = new byte[shxFileLength];
            // read all indexes into memory, skip first 50 bytes(header)
            shxInpuStream.readFully(bytes, 0, bytes.length);
            IntBuffer buffer = ByteBuffer.wrap(bytes).asIntBuffer();
            int[] indexes = new int[shxFileLength / 4];
            buffer.get(indexes);
            shapeFileReader = new ShapeFileReader(indexes);
        }else shapeFileReader = new ShapeFileReader(); // no index, construct with no parameter
        shapeFileReader.initialize(shpSplit, context);
    }
    if(dbfSplit != null){
        dbfFileReader = new DbfFileReader();
        dbfFileReader.initialize(dbfSplit, context);
        hasDbf = true;
    }else hasDbf = false;

}

开发者ID:DataSystemsLab，项目名称:GeoSpark，代码行数:49，代码来源:CombineShapeReader.java

注：本文中的org.apache.hadoop.mapreduce.lib.input.CombineFileSplit.getOffset方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。