Java FileSplit.getStart方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.FileSplit.getStart方法的典型用法代码示例。如果您正苦于以下问题：Java FileSplit.getStart方法的具体用法？Java FileSplit.getStart怎么用？Java FileSplit.getStart使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.input.FileSplit的用法示例。

在下文中一共展示了FileSplit.getStart方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: XMLRecordReader

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
/**
 * 初始化读取资源以及相关的参数也可以放到initialize（）方法中去执行
 * @param inputSplit
 * @param context
 * @throws IOException
 */
public XMLRecordReader(InputSplit inputSplit, Configuration context) throws IOException {
    /**
     * 获取开传入的开始和结束标签
     */
    startTag = context.get(START_TAG_KEY).getBytes("UTF-8");
    endTag = context.get(END_TAG_KEY).getBytes("UTF-8");
    FileSplit fileSplit = (FileSplit) inputSplit;
    /**
     * 获取分片的开始位置和结束的位置
     */
    start = fileSplit.getStart();
    end = start + fileSplit.getLength();
    Path file = fileSplit.getPath();
    FileSystem fs = file.getFileSystem(context);
    /**
     * 根据分片打开一个HDFS的文件输入流
     */
    fsin = fs.open(fileSplit.getPath());
    /**
     * 定位到分片开始的位置
     */
    fsin.seek(start);
}

开发者ID:lzmhhh123，项目名称:Wikipedia-Index，代码行数:30，代码来源:XmlInputFormat.java

示例2: checkSplits

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
private void checkSplits(Path listFile, List<InputSplit> splits) throws IOException {
  long lastEnd = 0;

  //Verify if each split's start is matching with the previous end and
  //we are not missing anything
  for (InputSplit split : splits) {
    FileSplit fileSplit = (FileSplit) split;
    long start = fileSplit.getStart();
    Assert.assertEquals(lastEnd, start);
    lastEnd = start + fileSplit.getLength();
  }

  //Verify there is nothing more to read from the input file
  SequenceFile.Reader reader
          = new SequenceFile.Reader(cluster.getFileSystem().getConf(),
                  SequenceFile.Reader.file(listFile));

  try {
    reader.seek(lastEnd);
    CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
    Text srcRelPath = new Text();
    Assert.assertFalse(reader.next(srcRelPath, srcFileStatus));
  } finally {
    IOUtils.closeStream(reader);
  }
}

开发者ID:naver，项目名称:hadoop，代码行数:27，代码来源:TestUniformSizeInputFormat.java

示例3: SingleFastqRecordReader

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
public SingleFastqRecordReader(Configuration conf, FileSplit split) throws IOException {
    file = split.getPath();
    start = split.getStart();
    end = start + split.getLength();

    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream fileIn = fs.open(file);

    CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
    CompressionCodec codec        = codecFactory.getCodec(file);

    if (codec == null) { // no codec.  Uncompressed file.
        positionAtFirstRecord(fileIn);
        inputStream = fileIn;
    } else {
        // compressed file
        if (start != 0) {
            throw new RuntimeException("Start position for compressed file is not 0! (found " + start + ")");
        }

        inputStream = codec.createInputStream(fileIn);
        end = Long.MAX_VALUE; // read until the end of the file
    }

    lineReader = new LineReader(inputStream);
}

开发者ID:PAA-NCIC，项目名称:SparkSeq，代码行数:27，代码来源:SingleFastqInputFormat.java

示例4: initialize

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
@Override
public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException {
    final FileSplit split = (FileSplit) genericSplit;
    final Configuration configuration = context.getConfiguration();
    if (configuration.get(Constants.GREMLIN_HADOOP_GRAPH_FILTER, null) != null)
        this.graphFilter = VertexProgramHelper.deserialize(ConfUtil.makeApacheConfiguration(configuration), Constants.GREMLIN_HADOOP_GRAPH_FILTER);
    KryoShimServiceLoader.applyConfiguration(ConfUtil.makeApacheConfiguration(configuration));
    this.gryoReader = HadoopPools.getGryoPool().takeReader();
    long start = split.getStart();
    final Path file = split.getPath();
    if (null != new CompressionCodecFactory(configuration).getCodec(file)) {
        throw new IllegalStateException("Compression is not supported for the (binary) Gryo format");
    }
    // open the file and seek to the start of the split
    this.inputStream = file.getFileSystem(configuration).open(split.getPath());
    this.splitLength = split.getLength();
    if (this.splitLength > 0) this.splitLength -= (seekToHeader(this.inputStream, start) - start);
}

开发者ID:PKUSilvester，项目名称:LiteGraph，代码行数:19，代码来源:GryoRecordReader.java

示例5: initialize

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    FileSplit fileSplit = (FileSplit) split;
    FSDataInputStream stream = FileSystem.get(context.getConfiguration()).open(fileSplit.getPath());
    if (fileSplit.getStart() != 0) {
        stream.seek(fileSplit.getStart());
    }

    remaining = fileSplit.getLength();

    JsonFactory factory = new JsonFactory().disable(JsonFactory.Feature.CANONICALIZE_FIELD_NAMES);
    parser = factory.createParser(new BufferedInputStream(stream));
    parser.setCodec(new ObjectMapper());
    parser.nextToken();
    if (parser.currentToken() == JsonToken.START_ARRAY) {
        parser.nextToken();
    }
}

开发者ID:vespa-engine，项目名称:vespa，代码行数:19，代码来源:VespaSimpleJsonInputFormat.java

示例6: initialize

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
@Override
public void initialize( final InputSplit inputSplit, final TaskAttemptContext context ) throws IOException, InterruptedException {
  FileSplit fileSplit = (FileSplit)inputSplit;
  Configuration config = context.getConfiguration();
  Path path = fileSplit.getPath();
  FileSystem fs = path.getFileSystem( config );
  long fileLength = fs.getLength( path );
  long start = fileSplit.getStart();
  long length = fileSplit.getLength();
  InputStream in = fs.open( path );
}

开发者ID:yahoojapan，项目名称:multiple-dimension-spread，代码行数:12，代码来源:MDSSpreadReader.java

示例7: initialize

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
    throws IOException, InterruptedException {
  FileSplit split = (FileSplit) genericSplit;
  Configuration conf = context.getConfiguration();
  SeekableInput in = new FsInput(split.getPath(), conf);
  DatumReader<T> datumReader = new GenericDatumReader<T>();
  this.reader = DataFileReader.openReader(in, datumReader);
  reader.sync(split.getStart());                    // sync to start
  this.start = reader.tell();
  this.end = split.getStart() + split.getLength();
}

开发者ID:aliyun，项目名称:aliyun-maxcompute-data-collectors，代码行数:13，代码来源:AvroRecordReader.java

示例8: initialize

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
public void initialize(InputSplit genericSplit, TaskAttemptContext context)  {
    try {
        FileSplit split = (FileSplit)genericSplit;
        Configuration job = context.getConfiguration();
        this.maxLineLength = job.getInt("mapreduce.input.linerecordreader.line.maxlength", 2147483647);
        this.start = split.getStart();
        this.end = this.start + split.getLength();
        Path file = split.getPath();
        FileSystem fs = file.getFileSystem(job);
        this.fileIn = fs.open(file);
        CompressionCodec codec = (new CompressionCodecFactory(job)).getCodec(file);
        if(null != codec) {
            this.isCompressedInput = true;
            this.decompressor = CodecPool.getDecompressor(codec);
            if(codec instanceof SplittableCompressionCodec) {
                SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).createInputStream(this.fileIn, this.decompressor, this.start, this.end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
                this.in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
                this.start = cIn.getAdjustedStart();
                this.end = cIn.getAdjustedEnd();
                this.filePosition = cIn;
            } else {
                this.in = new SplitLineReader(codec.createInputStream(this.fileIn, this.decompressor), job, this.recordDelimiterBytes);
                this.filePosition = this.fileIn;
            }
        } else {
            this.fileIn.seek(this.start);
            this.in = new SplitLineReader(this.fileIn, job, this.recordDelimiterBytes);
            this.filePosition = this.fileIn;
        }

        if(this.start != 0L) {
            this.start += (long)this.in.readLine(new Text(), 0, this.maxBytesToConsume(this.start));
        }

        this.pos = this.start;
    }catch(Exception ex){
        LOG.warn("Exception occurred during initialization {}", ex, ex);
    }

}

开发者ID:Comcast，项目名称:spark-util，代码行数:41，代码来源:ErrorHandlingLineRecordReader.java

示例9: initialize

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit is, TaskAttemptContext tac) throws IOException, InterruptedException {
	FileSplit fileSplit = (FileSplit) is;
	startTag = tac.getConfiguration().get(START_TAG_KEY).getBytes("utf-8");
	endTag = tac.getConfiguration().get(END_TAG_KEY).getBytes("utf-8");

	start = fileSplit.getStart();
	end = start + fileSplit.getLength();
	Path file = fileSplit.getPath();

	FileSystem fs = file.getFileSystem(tac.getConfiguration());
	fsin = fs.open(fileSplit.getPath());
	fsin.seek(start);

}

开发者ID:gatripat，项目名称:InsAdjustment，代码行数:16，代码来源:XmlInputFormat.java

示例10: addIndexedSplits

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
private int addIndexedSplits(List<InputSplit> splits, int i,
		List<InputSplit> newSplits, Configuration cfg) throws IOException {
	Path file = ((FileSplit) splits.get(i)).getPath();

	SplittingBAMIndex idx = new SplittingBAMIndex(file.getFileSystem(cfg)
			.open(getIdxPath(file)));

	int splitsEnd = splits.size();
	for (int j = i; j < splitsEnd; j++) {
		if (!file.equals(((FileSplit) splits.get(j)).getPath()))
			splitsEnd = j;
	}
	for (int j = i; j < splitsEnd; j++) {
		FileSplit fileSplit = (FileSplit) splits.get(j);

		long start = fileSplit.getStart();
		long end = start + fileSplit.getLength();

		Long blockStart = idx.nextAlignment(start);

		Long blockEnd = Long.valueOf(j == splitsEnd - 1 ? idx
				.prevAlignment(end).longValue() | 0xFFFF : idx
				.nextAlignment(end).longValue());

		if (blockStart == null) {
			throw new RuntimeException(
					"Internal error or invalid index: no block start for "
							+ start);
		}
		if (blockEnd == null) {
			throw new RuntimeException(
					"Internal error or invalid index: no block end for "
							+ end);
		}
		newSplits.add(new FileVirtualSplit(file, blockStart.longValue(),
				blockEnd.longValue(), fileSplit.getLocations()));
	}
	return splitsEnd;
}

开发者ID:BGI-flexlab，项目名称:SOAPgaea，代码行数:40，代码来源:GaeaBamInputFormat.java

示例11: initialize

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
/***
 * Initializes readers
 * 
 * @param split Split to be used (asssumed to be a file split)
 * œüaram context context of the job
 * @throws java.io.IOException in case of errors reading from the filestream provided by Hadoop
 * @throws java.lang.InterruptedException in case of thread interruption
 * 
 */

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
	   FileSplit fSplit = (FileSplit)split;
	   // Initialize start and end of split
	      start = fSplit.getStart();
	      end = start + fSplit.getLength();
	      final Path file = fSplit.getPath();
	      codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
	      final FileSystem fs = file.getFileSystem(context.getConfiguration());
	      FSDataInputStream fileIn = fs.open(file);
	      // open stream
	        if (isCompressedInput()) { // decompress
	        	decompressor = CodecPool.getDecompressor(codec);
	        	if (codec instanceof SplittableCompressionCodec) {
	  		
	          	final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, start, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
	  				ebr = new EthereumBlockReader(cIn, this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer);
	  				start = cIn.getAdjustedStart();
	         		end = cIn.getAdjustedEnd();
	          	filePosition = cIn; // take pos from compressed stream
	        } else {
	        	ebr = new EthereumBlockReader(codec.createInputStream(fileIn,decompressor), this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer);
	        	filePosition = fileIn;
	        }
	      } else {
	        fileIn.seek(start);
	        ebr = new EthereumBlockReader(fileIn, this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer);
	        filePosition = fileIn;
	      }
}

开发者ID:ZuInnoTe，项目名称:hadoopcryptoledger，代码行数:41，代码来源:AbstractEthereumRecordReader.java

示例12: initialize

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
/**
* Initializes reader
* @param split Split to use (assumed to be a file split)
* @param context context of the job
*
*
* @throws java.io.IOException in case of errors reading from the filestream provided by Hadoop
* @throws java.lang.InterruptedException in case of thread interruption
*
*/
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
   FileSplit fSplit = (FileSplit)split;
 // Initialize start and end of split
    start = fSplit.getStart();
    end = start + fSplit.getLength();
    final Path file = fSplit.getPath();
    codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
    final FileSystem fs = file.getFileSystem(context.getConfiguration());
    FSDataInputStream fileIn = fs.open(file);
    // open stream
      if (isCompressedInput()) { // decompress
      	decompressor = CodecPool.getDecompressor(codec);
      	if (codec instanceof SplittableCompressionCodec) {
		
        	final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, start, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
				bbr = new BitcoinBlockReader(cIn, this.maxSizeBitcoinBlock,this.bufferSize,this.specificMagicByteArray,this.useDirectBuffer,this.readAuxPOW);
		start = cIn.getAdjustedStart();
       		end = cIn.getAdjustedEnd();
        	filePosition = cIn; // take pos from compressed stream
      } else {
	bbr = new BitcoinBlockReader(codec.createInputStream(fileIn,decompressor), this.maxSizeBitcoinBlock,this.bufferSize,this.specificMagicByteArray,this.useDirectBuffer,readAuxPOW);
	filePosition = fileIn;
      }
    } else {
      fileIn.seek(start);
      bbr = new BitcoinBlockReader(fileIn, this.maxSizeBitcoinBlock,this.bufferSize,this.specificMagicByteArray,this.useDirectBuffer,readAuxPOW);  
      filePosition = fileIn;
    }
    // seek to block start (for the case a block overlaps a split)
    try {
    	bbr.seekBlockStart();
    } catch (BitcoinBlockReadException bbre) {
		LOG.error("Error reading Bitcoin blockchhain data");
		LOG.error(bbre);
    } 
}

开发者ID:ZuInnoTe，项目名称:hadoopcryptoledger，代码行数:48，代码来源:AbstractBitcoinRecordReader.java

示例13: initialize

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException
{
    numberOfLinesToProcess = getNumLinesPerSplit(context);
    FileSplit split = (FileSplit) genericSplit;
    final Path file = split.getPath();
    Configuration conf = context.getConfiguration();
    this.maxLineLength = conf
            .getInt("mapreduce.input.linerecordreader.line.maxlength", Integer.MAX_VALUE);
    FileSystem fs = file.getFileSystem(conf);
    start = split.getStart();
    end = start + split.getLength();
    boolean skipFirstLine = false;
    FSDataInputStream filein = fs.open(split.getPath());

    if (start != 0) {
        skipFirstLine = true;
        --start;
        filein.seek(start);
    }
    in = new LineReader(filein, conf);
    if (skipFirstLine) {
        start += in.readLine(new Text(), 0,
                (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

开发者ID:dkpro，项目名称:dkpro-c4corpus，代码行数:29，代码来源:MultiLineInputFormat.java

示例14: initialize

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
  FileSplit split = (FileSplit) genericSplit;
  Configuration job = context.getConfiguration();
  this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
  start = split.getStart();
  end = start + split.getLength();
  final Path file = split.getPath();
  compressionCodecs = new CompressionCodecFactory(job);
  final CompressionCodec codec = compressionCodecs.getCodec(file);
  
  // open the file and seek to the start of the split
  FileSystem fs = file.getFileSystem(job);
  FSDataInputStream fileIn = fs.open(split.getPath());
  boolean skipFirstLine = false;
  if (codec != null) {
    in = new LfLineReader(codec.createInputStream(fileIn), job);
    end = Long.MAX_VALUE;
  } else {
    if (start != 0) {
      skipFirstLine = true;
      --start;
      fileIn.seek(start);
    }
    in = new LfLineReader(fileIn, job);
  }
  if (skipFirstLine) { // skip first line and re-establish "start".
    start += in.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start));
  }
  this.pos = start;
}

开发者ID:apache，项目名称:accumulo-wikisearch，代码行数:32，代码来源:LongLineRecordReader.java

示例15: initialize

import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
@Override public void initialize(InputSplit spl, TaskAttemptContext ctx)
	throws IOException
{
	if(isInitialized)
		close();
	isInitialized = true;

	final FileSplit split = (FileSplit)spl;

	this.start =         split.getStart();
	this.end   = start + split.getLength();

	final Configuration conf = ContextUtil.getConfiguration(ctx);

	final ValidationStringency stringency =
		SAMHeaderReader.getValidationStringency(conf);

	final Path file = split.getPath();
	final FileSystem fs = file.getFileSystem(conf);

	input = fs.open(file);

	ValidationStringency origStringency = null;
	try {
		if (stringency != null) {
			origStringency = SAMFileReader.getDefaultValidationStringency();
			SAMFileReader.setDefaultValidationStringency(stringency);
		}
		final SAMFileHeader header =
			new SAMFileReader(input, false).getFileHeader();

		waInput = new WorkaroundingStream(input, header);

		final boolean firstSplit = this.start == 0;

		if (firstSplit) {
			// Skip the header because we already have it, and adjust the start
			// to match.
			final int headerLength = waInput.getRemainingHeaderLength();
			input.seek(headerLength);
			this.start += headerLength;
		} else
			input.seek(--this.start);

		// Creating the iterator causes reading from the stream, so make sure
		// to start counting this early.
		waInput.setLength(this.end - this.start);

		iterator = new SAMFileReader(waInput, false).iterator();

		if (!firstSplit) {
			// Skip the first line, it'll be handled with the previous split.
			try {
				if (iterator.hasNext())
					iterator.next();
			} catch (SAMFormatException e) {}
		}
	} finally {
		if (origStringency != null)
			SAMFileReader.setDefaultValidationStringency(origStringency);
	}
}

开发者ID:BGI-flexlab，项目名称:SOAPgaea，代码行数:63，代码来源:GaeaSamRecordReader.java

注：本文中的org.apache.hadoop.mapreduce.lib.input.FileSplit.getStart方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。