本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.FileSplit.getStart方法的典型用法代码示例。如果您正苦于以下问题:Java FileSplit.getStart方法的具体用法?Java FileSplit.getStart怎么用?Java FileSplit.getStart使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.input.FileSplit
的用法示例。
在下文中一共展示了FileSplit.getStart方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: XMLRecordReader
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
/**
* 初始化读取资源以及相关的参数也可以放到initialize()方法中去执行
* @param inputSplit
* @param context
* @throws IOException
*/
public XMLRecordReader(InputSplit inputSplit, Configuration context) throws IOException {
/**
* 获取开传入的开始和结束标签
*/
startTag = context.get(START_TAG_KEY).getBytes("UTF-8");
endTag = context.get(END_TAG_KEY).getBytes("UTF-8");
FileSplit fileSplit = (FileSplit) inputSplit;
/**
* 获取分片的开始位置和结束的位置
*/
start = fileSplit.getStart();
end = start + fileSplit.getLength();
Path file = fileSplit.getPath();
FileSystem fs = file.getFileSystem(context);
/**
* 根据分片打开一个HDFS的文件输入流
*/
fsin = fs.open(fileSplit.getPath());
/**
* 定位到分片开始的位置
*/
fsin.seek(start);
}
示例2: checkSplits
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
private void checkSplits(Path listFile, List<InputSplit> splits) throws IOException {
long lastEnd = 0;
//Verify if each split's start is matching with the previous end and
//we are not missing anything
for (InputSplit split : splits) {
FileSplit fileSplit = (FileSplit) split;
long start = fileSplit.getStart();
Assert.assertEquals(lastEnd, start);
lastEnd = start + fileSplit.getLength();
}
//Verify there is nothing more to read from the input file
SequenceFile.Reader reader
= new SequenceFile.Reader(cluster.getFileSystem().getConf(),
SequenceFile.Reader.file(listFile));
try {
reader.seek(lastEnd);
CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
Text srcRelPath = new Text();
Assert.assertFalse(reader.next(srcRelPath, srcFileStatus));
} finally {
IOUtils.closeStream(reader);
}
}
示例3: SingleFastqRecordReader
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
public SingleFastqRecordReader(Configuration conf, FileSplit split) throws IOException {
file = split.getPath();
start = split.getStart();
end = start + split.getLength();
FileSystem fs = file.getFileSystem(conf);
FSDataInputStream fileIn = fs.open(file);
CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
CompressionCodec codec = codecFactory.getCodec(file);
if (codec == null) { // no codec. Uncompressed file.
positionAtFirstRecord(fileIn);
inputStream = fileIn;
} else {
// compressed file
if (start != 0) {
throw new RuntimeException("Start position for compressed file is not 0! (found " + start + ")");
}
inputStream = codec.createInputStream(fileIn);
end = Long.MAX_VALUE; // read until the end of the file
}
lineReader = new LineReader(inputStream);
}
示例4: initialize
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
@Override
public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException {
final FileSplit split = (FileSplit) genericSplit;
final Configuration configuration = context.getConfiguration();
if (configuration.get(Constants.GREMLIN_HADOOP_GRAPH_FILTER, null) != null)
this.graphFilter = VertexProgramHelper.deserialize(ConfUtil.makeApacheConfiguration(configuration), Constants.GREMLIN_HADOOP_GRAPH_FILTER);
KryoShimServiceLoader.applyConfiguration(ConfUtil.makeApacheConfiguration(configuration));
this.gryoReader = HadoopPools.getGryoPool().takeReader();
long start = split.getStart();
final Path file = split.getPath();
if (null != new CompressionCodecFactory(configuration).getCodec(file)) {
throw new IllegalStateException("Compression is not supported for the (binary) Gryo format");
}
// open the file and seek to the start of the split
this.inputStream = file.getFileSystem(configuration).open(split.getPath());
this.splitLength = split.getLength();
if (this.splitLength > 0) this.splitLength -= (seekToHeader(this.inputStream, start) - start);
}
示例5: initialize
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
FileSplit fileSplit = (FileSplit) split;
FSDataInputStream stream = FileSystem.get(context.getConfiguration()).open(fileSplit.getPath());
if (fileSplit.getStart() != 0) {
stream.seek(fileSplit.getStart());
}
remaining = fileSplit.getLength();
JsonFactory factory = new JsonFactory().disable(JsonFactory.Feature.CANONICALIZE_FIELD_NAMES);
parser = factory.createParser(new BufferedInputStream(stream));
parser.setCodec(new ObjectMapper());
parser.nextToken();
if (parser.currentToken() == JsonToken.START_ARRAY) {
parser.nextToken();
}
}
示例6: initialize
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
@Override
public void initialize( final InputSplit inputSplit, final TaskAttemptContext context ) throws IOException, InterruptedException {
FileSplit fileSplit = (FileSplit)inputSplit;
Configuration config = context.getConfiguration();
Path path = fileSplit.getPath();
FileSystem fs = path.getFileSystem( config );
long fileLength = fs.getLength( path );
long start = fileSplit.getStart();
long length = fileSplit.getLength();
InputStream in = fs.open( path );
}
示例7: initialize
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
throws IOException, InterruptedException {
FileSplit split = (FileSplit) genericSplit;
Configuration conf = context.getConfiguration();
SeekableInput in = new FsInput(split.getPath(), conf);
DatumReader<T> datumReader = new GenericDatumReader<T>();
this.reader = DataFileReader.openReader(in, datumReader);
reader.sync(split.getStart()); // sync to start
this.start = reader.tell();
this.end = split.getStart() + split.getLength();
}
示例8: initialize
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
public void initialize(InputSplit genericSplit, TaskAttemptContext context) {
try {
FileSplit split = (FileSplit)genericSplit;
Configuration job = context.getConfiguration();
this.maxLineLength = job.getInt("mapreduce.input.linerecordreader.line.maxlength", 2147483647);
this.start = split.getStart();
this.end = this.start + split.getLength();
Path file = split.getPath();
FileSystem fs = file.getFileSystem(job);
this.fileIn = fs.open(file);
CompressionCodec codec = (new CompressionCodecFactory(job)).getCodec(file);
if(null != codec) {
this.isCompressedInput = true;
this.decompressor = CodecPool.getDecompressor(codec);
if(codec instanceof SplittableCompressionCodec) {
SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).createInputStream(this.fileIn, this.decompressor, this.start, this.end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
this.in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
this.start = cIn.getAdjustedStart();
this.end = cIn.getAdjustedEnd();
this.filePosition = cIn;
} else {
this.in = new SplitLineReader(codec.createInputStream(this.fileIn, this.decompressor), job, this.recordDelimiterBytes);
this.filePosition = this.fileIn;
}
} else {
this.fileIn.seek(this.start);
this.in = new SplitLineReader(this.fileIn, job, this.recordDelimiterBytes);
this.filePosition = this.fileIn;
}
if(this.start != 0L) {
this.start += (long)this.in.readLine(new Text(), 0, this.maxBytesToConsume(this.start));
}
this.pos = this.start;
}catch(Exception ex){
LOG.warn("Exception occurred during initialization {}", ex, ex);
}
}
示例9: initialize
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit is, TaskAttemptContext tac) throws IOException, InterruptedException {
FileSplit fileSplit = (FileSplit) is;
startTag = tac.getConfiguration().get(START_TAG_KEY).getBytes("utf-8");
endTag = tac.getConfiguration().get(END_TAG_KEY).getBytes("utf-8");
start = fileSplit.getStart();
end = start + fileSplit.getLength();
Path file = fileSplit.getPath();
FileSystem fs = file.getFileSystem(tac.getConfiguration());
fsin = fs.open(fileSplit.getPath());
fsin.seek(start);
}
示例10: addIndexedSplits
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
private int addIndexedSplits(List<InputSplit> splits, int i,
List<InputSplit> newSplits, Configuration cfg) throws IOException {
Path file = ((FileSplit) splits.get(i)).getPath();
SplittingBAMIndex idx = new SplittingBAMIndex(file.getFileSystem(cfg)
.open(getIdxPath(file)));
int splitsEnd = splits.size();
for (int j = i; j < splitsEnd; j++) {
if (!file.equals(((FileSplit) splits.get(j)).getPath()))
splitsEnd = j;
}
for (int j = i; j < splitsEnd; j++) {
FileSplit fileSplit = (FileSplit) splits.get(j);
long start = fileSplit.getStart();
long end = start + fileSplit.getLength();
Long blockStart = idx.nextAlignment(start);
Long blockEnd = Long.valueOf(j == splitsEnd - 1 ? idx
.prevAlignment(end).longValue() | 0xFFFF : idx
.nextAlignment(end).longValue());
if (blockStart == null) {
throw new RuntimeException(
"Internal error or invalid index: no block start for "
+ start);
}
if (blockEnd == null) {
throw new RuntimeException(
"Internal error or invalid index: no block end for "
+ end);
}
newSplits.add(new FileVirtualSplit(file, blockStart.longValue(),
blockEnd.longValue(), fileSplit.getLocations()));
}
return splitsEnd;
}
示例11: initialize
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
/***
* Initializes readers
*
* @param split Split to be used (asssumed to be a file split)
* ϟaram context context of the job
* @throws java.io.IOException in case of errors reading from the filestream provided by Hadoop
* @throws java.lang.InterruptedException in case of thread interruption
*
*/
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
FileSplit fSplit = (FileSplit)split;
// Initialize start and end of split
start = fSplit.getStart();
end = start + fSplit.getLength();
final Path file = fSplit.getPath();
codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
final FileSystem fs = file.getFileSystem(context.getConfiguration());
FSDataInputStream fileIn = fs.open(file);
// open stream
if (isCompressedInput()) { // decompress
decompressor = CodecPool.getDecompressor(codec);
if (codec instanceof SplittableCompressionCodec) {
final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, start, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
ebr = new EthereumBlockReader(cIn, this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer);
start = cIn.getAdjustedStart();
end = cIn.getAdjustedEnd();
filePosition = cIn; // take pos from compressed stream
} else {
ebr = new EthereumBlockReader(codec.createInputStream(fileIn,decompressor), this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer);
filePosition = fileIn;
}
} else {
fileIn.seek(start);
ebr = new EthereumBlockReader(fileIn, this.maxSizeEthereumBlock,this.bufferSize,this.useDirectBuffer);
filePosition = fileIn;
}
}
示例12: initialize
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
/**
* Initializes reader
* @param split Split to use (assumed to be a file split)
* @param context context of the job
*
*
* @throws java.io.IOException in case of errors reading from the filestream provided by Hadoop
* @throws java.lang.InterruptedException in case of thread interruption
*
*/
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
FileSplit fSplit = (FileSplit)split;
// Initialize start and end of split
start = fSplit.getStart();
end = start + fSplit.getLength();
final Path file = fSplit.getPath();
codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
final FileSystem fs = file.getFileSystem(context.getConfiguration());
FSDataInputStream fileIn = fs.open(file);
// open stream
if (isCompressedInput()) { // decompress
decompressor = CodecPool.getDecompressor(codec);
if (codec instanceof SplittableCompressionCodec) {
final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, start, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
bbr = new BitcoinBlockReader(cIn, this.maxSizeBitcoinBlock,this.bufferSize,this.specificMagicByteArray,this.useDirectBuffer,this.readAuxPOW);
start = cIn.getAdjustedStart();
end = cIn.getAdjustedEnd();
filePosition = cIn; // take pos from compressed stream
} else {
bbr = new BitcoinBlockReader(codec.createInputStream(fileIn,decompressor), this.maxSizeBitcoinBlock,this.bufferSize,this.specificMagicByteArray,this.useDirectBuffer,readAuxPOW);
filePosition = fileIn;
}
} else {
fileIn.seek(start);
bbr = new BitcoinBlockReader(fileIn, this.maxSizeBitcoinBlock,this.bufferSize,this.specificMagicByteArray,this.useDirectBuffer,readAuxPOW);
filePosition = fileIn;
}
// seek to block start (for the case a block overlaps a split)
try {
bbr.seekBlockStart();
} catch (BitcoinBlockReadException bbre) {
LOG.error("Error reading Bitcoin blockchhain data");
LOG.error(bbre);
}
}
示例13: initialize
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
throws IOException, InterruptedException
{
numberOfLinesToProcess = getNumLinesPerSplit(context);
FileSplit split = (FileSplit) genericSplit;
final Path file = split.getPath();
Configuration conf = context.getConfiguration();
this.maxLineLength = conf
.getInt("mapreduce.input.linerecordreader.line.maxlength", Integer.MAX_VALUE);
FileSystem fs = file.getFileSystem(conf);
start = split.getStart();
end = start + split.getLength();
boolean skipFirstLine = false;
FSDataInputStream filein = fs.open(split.getPath());
if (start != 0) {
skipFirstLine = true;
--start;
filein.seek(start);
}
in = new LineReader(filein, conf);
if (skipFirstLine) {
start += in.readLine(new Text(), 0,
(int) Math.min((long) Integer.MAX_VALUE, end - start));
}
this.pos = start;
}
示例14: initialize
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
FileSplit split = (FileSplit) genericSplit;
Configuration job = context.getConfiguration();
this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
start = split.getStart();
end = start + split.getLength();
final Path file = split.getPath();
compressionCodecs = new CompressionCodecFactory(job);
final CompressionCodec codec = compressionCodecs.getCodec(file);
// open the file and seek to the start of the split
FileSystem fs = file.getFileSystem(job);
FSDataInputStream fileIn = fs.open(split.getPath());
boolean skipFirstLine = false;
if (codec != null) {
in = new LfLineReader(codec.createInputStream(fileIn), job);
end = Long.MAX_VALUE;
} else {
if (start != 0) {
skipFirstLine = true;
--start;
fileIn.seek(start);
}
in = new LfLineReader(fileIn, job);
}
if (skipFirstLine) { // skip first line and re-establish "start".
start += in.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start));
}
this.pos = start;
}
示例15: initialize
import org.apache.hadoop.mapreduce.lib.input.FileSplit; //导入方法依赖的package包/类
@Override public void initialize(InputSplit spl, TaskAttemptContext ctx)
throws IOException
{
if(isInitialized)
close();
isInitialized = true;
final FileSplit split = (FileSplit)spl;
this.start = split.getStart();
this.end = start + split.getLength();
final Configuration conf = ContextUtil.getConfiguration(ctx);
final ValidationStringency stringency =
SAMHeaderReader.getValidationStringency(conf);
final Path file = split.getPath();
final FileSystem fs = file.getFileSystem(conf);
input = fs.open(file);
ValidationStringency origStringency = null;
try {
if (stringency != null) {
origStringency = SAMFileReader.getDefaultValidationStringency();
SAMFileReader.setDefaultValidationStringency(stringency);
}
final SAMFileHeader header =
new SAMFileReader(input, false).getFileHeader();
waInput = new WorkaroundingStream(input, header);
final boolean firstSplit = this.start == 0;
if (firstSplit) {
// Skip the header because we already have it, and adjust the start
// to match.
final int headerLength = waInput.getRemainingHeaderLength();
input.seek(headerLength);
this.start += headerLength;
} else
input.seek(--this.start);
// Creating the iterator causes reading from the stream, so make sure
// to start counting this early.
waInput.setLength(this.end - this.start);
iterator = new SAMFileReader(waInput, false).iterator();
if (!firstSplit) {
// Skip the first line, it'll be handled with the previous split.
try {
if (iterator.hasNext())
iterator.next();
} catch (SAMFormatException e) {}
}
} finally {
if (origStringency != null)
SAMFileReader.setDefaultValidationStringency(origStringency);
}
}