当前位置: 首页>>代码示例>>Java>>正文


Java CombineFileSplit.getLength方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.CombineFileSplit.getLength方法的典型用法代码示例。如果您正苦于以下问题:Java CombineFileSplit.getLength方法的具体用法?Java CombineFileSplit.getLength怎么用?Java CombineFileSplit.getLength使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.mapreduce.lib.input.CombineFileSplit的用法示例。


在下文中一共展示了CombineFileSplit.getLength方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: CombineFileLineRecordReader

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public CombineFileLineRecordReader(CombineFileSplit split,
    TaskAttemptContext context, Integer index) throws IOException {
  
  this.path = split.getPath(index);
  fs = this.path.getFileSystem(context.getConfiguration());
  this.startOffset = split.getOffset(index);
  this.end = startOffset + split.getLength(index);
  boolean skipFirstLine = false;
  
  //open the file
  fileIn = fs.open(path);
  if (startOffset != 0) {
    skipFirstLine = true;
    --startOffset;
    fileIn.seek(startOffset);
  }
  reader = new LineReader(fileIn);
  if (skipFirstLine) {  // skip first line and re-establish "startOffset".
    startOffset += reader.readLine(new Text(), 0,
                (int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
  }
  this.pos = startOffset;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:24,代码来源:MultiFileWordCount.java

示例2: CombineFileLineRecordReader

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public CombineFileLineRecordReader(CombineFileSplit split,
    TaskAttemptContext context, Integer index) throws IOException {
  
  fs = FileSystem.get(context.getConfiguration());
  this.path = split.getPath(index);
  this.startOffset = split.getOffset(index);
  this.end = startOffset + split.getLength(index);
  boolean skipFirstLine = false;
  
  //open the file
  fileIn = fs.open(path);
  if (startOffset != 0) {
    skipFirstLine = true;
    --startOffset;
    fileIn.seek(startOffset);
  }
  reader = new LineReader(fileIn);
  if (skipFirstLine) {  // skip first line and re-establish "startOffset".
    startOffset += reader.readLine(new Text(), 0,
                (int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
  }
  this.pos = startOffset;
}
 
开发者ID:Nextzero,项目名称:hadoop-2.6.0-cdh5.4.3,代码行数:24,代码来源:MultiFileWordCount.java

示例3: CombineFileLineRecordReader

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public CombineFileLineRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException 
{
	fs = FileSystem.get(context.getConfiguration());
	this.path = split.getPath(index);
	this.startOffset = split.getOffset(index);
	this.end = startOffset + split.getLength(index);
	boolean skipFirstLine = false;

	fileIn = fs.open(path); // open the file
	if (startOffset != 0) {
		skipFirstLine = true;
		--startOffset;
		fileIn.seek(startOffset);
	}
	reader = new LineReader(fileIn);
	if (skipFirstLine) // skip first line and re-establish "startOffset".
	{
		int readNum = reader.readLine(new Text(),0,(int) Math.min((long) Integer.MAX_VALUE, end - startOffset));
		startOffset += readNum;
	}
	this.pos = startOffset;
}
 
开发者ID:willddy,项目名称:bigdata_pattern,代码行数:23,代码来源:CombineFileLineRecordReader.java

示例4: checkSplitEq

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
void checkSplitEq(FileSystem fs, CombineFileSplit split, long bytes)
    throws Exception {
  long splitBytes = 0L;
  HashSet<Path> uniq = new HashSet<Path>();
  for (int i = 0; i < split.getNumPaths(); ++i) {
    splitBytes += split.getLength(i);
    assertTrue(
        split.getLength(i) <= fs.getFileStatus(split.getPath(i)).getLen());
    assertFalse(uniq.contains(split.getPath(i)));
    uniq.add(split.getPath(i));
  }
  assertEquals(bytes, splitBytes);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:14,代码来源:TestFilePool.java

示例5: testHfileSplitCompleteness

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public void testHfileSplitCompleteness() throws Exception {
  cluster = initMiniCluster(CLUSTER_PORT, 1);

  int count = 40;
  HdfsSortedOplogOrganizer bucket1 = new HdfsSortedOplogOrganizer(
      regionManager, 1);
  ArrayList<TestEvent> items = new ArrayList<TestEvent>();
  for (int i = 0; i < count; i++) {
    items.add(new TestEvent(("key-" + i), ("value-" + System.nanoTime())));
  }
  bucket1.flush(items.iterator(), count);

  Configuration conf = hdfsStore.getFileSystem().getConf();
  GFInputFormat gfInputFormat = new GFInputFormat();
  Job job = Job.getInstance(conf, "test");

  conf = job.getConfiguration();
  conf.set(GFInputFormat.INPUT_REGION, getName());
  conf.set(GFInputFormat.HOME_DIR, testDataDir.getName());
  conf.setBoolean(GFInputFormat.CHECKPOINT, false);

  List<InputSplit> splits = gfInputFormat.getSplits(job);
  assertTrue(1 < splits.size());

  long lastBytePositionOfPrevious = 0;
  for (InputSplit inputSplit : splits) {
    CombineFileSplit split = (CombineFileSplit) inputSplit;
    assertEquals(1, split.getPaths().length);
    assertEquals(lastBytePositionOfPrevious, split.getOffset(0));
    lastBytePositionOfPrevious += split.getLength();
    assertEquals(1, split.getLocations().length);
  }

  Path bucketPath = new Path(regionPath, "1");
  Path hopPath = new Path(bucketPath, bucket1.getSortedOplogs().iterator()
      .next().get().getFileName());
  FileStatus status = hdfsStore.getFileSystem().getFileStatus(hopPath);
  assertEquals(status.getLen(), lastBytePositionOfPrevious);
}
 
开发者ID:gemxd,项目名称:gemfirexd-oss,代码行数:40,代码来源:GFInputFormatJUnitTest.java

示例6: initialize

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit split, TaskAttemptContext context)
		throws IOException, InterruptedException {
	CombineFileSplit combineSplit = (CombineFileSplit)split;
	Path path = combineSplit.getPath(splitIndex);
	this.fileName = path.getName();
	FileSplit fileSplit = new FileSplit(
			path,
			combineSplit.getOffset(splitIndex),
			combineSplit.getLength(splitIndex),
			combineSplit.getLocations());
	delegate.initialize(fileSplit, context);

}
 
开发者ID:conversant,项目名称:mara,代码行数:15,代码来源:CombineTextFileInputFormat.java

示例7: initialize

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
		throws IOException, InterruptedException {
	CombineFileSplit combineSplit = (CombineFileSplit)inputSplit;
	FileSplit split = new FileSplit(
			combineSplit.getPath(splitIndex),
			combineSplit.getOffset(splitIndex),
			combineSplit.getLength(splitIndex),
			combineSplit.getLocations());
	// Initialize with the single FileSplit for the current index
	delegate.initialize(split, context);
}
 
开发者ID:conversant,项目名称:mara,代码行数:13,代码来源:CombineAvroKeyFileInputFormat.java

示例8: getSplits

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
 * get and combine all splits of .shp files
 * @param job
 * @return
 * @throws IOException
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    // get original combine split.
    CombineFileSplit combineSplit = (CombineFileSplit)super.getSplits(job).get(0);
    Path[] paths = combineSplit.getPaths();

    // get indexes of all .shp file
    List<Integer> shpIds = new ArrayList<>();
    for(int i = 0;i < paths.length; ++i){
        if(FilenameUtils.getExtension(paths[i].toString()).equals("shp")){
            shpIds.add(i);
        }
    }

    // prepare parameters for constructing new combine split
    Path[] shpPaths = new Path[shpIds.size()];
    long[] shpStarts = new long[shpIds.size()];
    long[] shpLengths = new long[shpIds.size()];

    for(int i = 0;i < shpIds.size(); ++i){
        int id = shpIds.get(i);
        shpPaths[i] = combineSplit.getPath(id);
        shpStarts[i] = combineSplit.getOffset(id);
        shpLengths[i] = combineSplit.getLength(id);
    }

    //combine all .shp splits as one split.
    CombineFileSplit shpSplit = new CombineFileSplit(shpPaths, shpStarts, shpLengths, combineSplit.getLocations());
    List<InputSplit> shpSplits = new ArrayList<>();
    shpSplits.add(shpSplit);
    return shpSplits;
}
 
开发者ID:DataSystemsLab,项目名称:GeoSpark,代码行数:39,代码来源:BoundaryInputFormat.java

示例9: initialize

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
		throws IOException, InterruptedException {
	CombineFileSplit split = (CombineFileSplit) genericSplit;
	Configuration job = context.getConfiguration();
	this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength",
			Integer.MAX_VALUE);

	this.start = split.getStartOffsets()[idx];
	this.end = start + split.getLength();
	Path file = split.getPath(idx);
	this.compressionCodescs = new CompressionCodecFactory(job);
	final CompressionCodec codec = compressionCodescs.getCodec(file);

	FileSystem fs = file.getFileSystem(job);
	FSDataInputStream fileIn = fs.open(split.getPath(idx));
	boolean skipFirstLine = false;
	if (codec != null) {
		in = new LineReader(codec.createInputStream(fileIn), job);
		end = Long.MAX_VALUE;
	} else {
		if (start != 0) {
			skipFirstLine = true;
			--start;
			fileIn.seek(start);
		}
		in = new LineReader(fileIn, job);
	}
	if (skipFirstLine) {// skip first line and re-establish "start"
		start += in.readLine(new Text(), 0,
				(int) Math.min((long) Integer.MAX_VALUE, end - start));
	}
	this.pos = start;
}
 
开发者ID:makelove,项目名称:book-hadoop-hacks,代码行数:35,代码来源:CombineFileLineRecordReader.java

示例10: SequenceFileRecordReaderWrapper

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@SuppressWarnings("unused")
public SequenceFileRecordReaderWrapper(CombineFileSplit split, TaskAttemptContext context, Integer index)
    throws IOException {
  fileSplit = new FileSplit(split.getPath(index), split.getOffset(index), split.getLength(index),
      split.getLocations());
  delegate = new SequenceFileInputFormat<Writable, Text>().createRecordReader(fileSplit, context);
}
 
开发者ID:apache,项目名称:incubator-blur,代码行数:8,代码来源:CsvBlurDriver.java

示例11: CFRecordReader

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public CFRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException{
  this.path = split.getPath(index);
  fs = this.path.getFileSystem(context.getConfiguration());
  this.startOffset = split.getOffset(index);
  this.end = startOffset + split.getLength(index);

  fileIn = fs.open(path);
  reader = new LineReader(fileIn);
  this.pos = startOffset;
}
 
开发者ID:dryman,项目名称:Hadoop-CombineFileInputFormat,代码行数:11,代码来源:CFRecordReader.java

示例12: initialize

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit paramInputSplit,
		TaskAttemptContext paramTaskAttemptContext) throws IOException,
		InterruptedException {
	context = paramTaskAttemptContext;
	split = (CombineFileSplit) paramInputSplit;

	if (split.getLength() != 0) {
		initializeNextReader();
	}
}
 
开发者ID:Pivotal-Field-Engineering,项目名称:pmr-common,代码行数:12,代码来源:CombineTextInputFormat.java

示例13: ParserPump

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public ParserPump(CombineFileSplit split, TaskAttemptContext context) {
    this.context = context;
    this.paths = split.getPaths();
    this.size = split.getLength();
    this.skipInvalid = context.getConfiguration().getBoolean(SKIP_INVALID_PROPERTY, false);
}
 
开发者ID:Merck,项目名称:Halyard,代码行数:7,代码来源:HalyardBulkLoad.java

示例14: initialize

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
 * cut the combined split into FileSplit for .shp, .shx and .dbf
 * @param split
 * @param context
 * @throws IOException
 * @throws InterruptedException
 */
public void initialize(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException
{
    CombineFileSplit fileSplit = (CombineFileSplit)split;
    Path[] paths = fileSplit.getPaths();
    for(int i = 0;i < paths.length; ++i){
        String suffix = FilenameUtils.getExtension(paths[i].toString());
        if(suffix.equals(SHP_SUFFIX)) shpSplit = new FileSplit(paths[i], fileSplit.getOffset(i), fileSplit.getLength(i), fileSplit.getLocations());
        else if(suffix.equals(SHX_SUFFIX)) shxSplit = new FileSplit(paths[i], fileSplit.getOffset(i), fileSplit.getLength(i), fileSplit.getLocations());
        else if(suffix.equals(DBF_SUFFIX)) dbfSplit = new FileSplit(paths[i], fileSplit.getOffset(i), fileSplit.getLength(i), fileSplit.getLocations());
    }
    // if shape file doesn't exists, throw an IOException
    if(shpSplit == null) throw new IOException("Can't find .shp file.");
    else{
        if(shxSplit != null){
            // shape file exists, extract .shp with .shx
            // first read all indexes into memory
            Path filePath = shxSplit.getPath();
            FileSystem fileSys = filePath.getFileSystem(context.getConfiguration());
            FSDataInputStream shxInpuStream = fileSys.open(filePath);
            shxInpuStream.skip(24);
            int shxFileLength = shxInpuStream.readInt() * 2 - 100; // get length in bytes, exclude header
            // skip following 72 bytes in header
            shxInpuStream.skip(72);
            byte[] bytes = new byte[shxFileLength];
            // read all indexes into memory, skip first 50 bytes(header)
            shxInpuStream.readFully(bytes, 0, bytes.length);
            IntBuffer buffer = ByteBuffer.wrap(bytes).asIntBuffer();
            int[] indexes = new int[shxFileLength / 4];
            buffer.get(indexes);
            shapeFileReader = new ShapeFileReader(indexes);
        }else shapeFileReader = new ShapeFileReader(); // no index, construct with no parameter
        shapeFileReader.initialize(shpSplit, context);
    }
    if(dbfSplit != null){
        dbfFileReader = new DbfFileReader();
        dbfFileReader.initialize(dbfSplit, context);
        hasDbf = true;
    }else hasDbf = false;

}
 
开发者ID:DataSystemsLab,项目名称:GeoSpark,代码行数:49,代码来源:CombineShapeReader.java


注:本文中的org.apache.hadoop.mapreduce.lib.input.CombineFileSplit.getLength方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。