本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.CombineFileSplit.getLength方法的典型用法代码示例。如果您正苦于以下问题:Java CombineFileSplit.getLength方法的具体用法?Java CombineFileSplit.getLength怎么用?Java CombineFileSplit.getLength使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.input.CombineFileSplit
的用法示例。
在下文中一共展示了CombineFileSplit.getLength方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: CombineFileLineRecordReader
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public CombineFileLineRecordReader(CombineFileSplit split,
TaskAttemptContext context, Integer index) throws IOException {
this.path = split.getPath(index);
fs = this.path.getFileSystem(context.getConfiguration());
this.startOffset = split.getOffset(index);
this.end = startOffset + split.getLength(index);
boolean skipFirstLine = false;
//open the file
fileIn = fs.open(path);
if (startOffset != 0) {
skipFirstLine = true;
--startOffset;
fileIn.seek(startOffset);
}
reader = new LineReader(fileIn);
if (skipFirstLine) { // skip first line and re-establish "startOffset".
startOffset += reader.readLine(new Text(), 0,
(int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
}
this.pos = startOffset;
}
示例2: CombineFileLineRecordReader
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public CombineFileLineRecordReader(CombineFileSplit split,
TaskAttemptContext context, Integer index) throws IOException {
fs = FileSystem.get(context.getConfiguration());
this.path = split.getPath(index);
this.startOffset = split.getOffset(index);
this.end = startOffset + split.getLength(index);
boolean skipFirstLine = false;
//open the file
fileIn = fs.open(path);
if (startOffset != 0) {
skipFirstLine = true;
--startOffset;
fileIn.seek(startOffset);
}
reader = new LineReader(fileIn);
if (skipFirstLine) { // skip first line and re-establish "startOffset".
startOffset += reader.readLine(new Text(), 0,
(int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
}
this.pos = startOffset;
}
示例3: CombineFileLineRecordReader
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public CombineFileLineRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException
{
fs = FileSystem.get(context.getConfiguration());
this.path = split.getPath(index);
this.startOffset = split.getOffset(index);
this.end = startOffset + split.getLength(index);
boolean skipFirstLine = false;
fileIn = fs.open(path); // open the file
if (startOffset != 0) {
skipFirstLine = true;
--startOffset;
fileIn.seek(startOffset);
}
reader = new LineReader(fileIn);
if (skipFirstLine) // skip first line and re-establish "startOffset".
{
int readNum = reader.readLine(new Text(),0,(int) Math.min((long) Integer.MAX_VALUE, end - startOffset));
startOffset += readNum;
}
this.pos = startOffset;
}
示例4: checkSplitEq
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
void checkSplitEq(FileSystem fs, CombineFileSplit split, long bytes)
throws Exception {
long splitBytes = 0L;
HashSet<Path> uniq = new HashSet<Path>();
for (int i = 0; i < split.getNumPaths(); ++i) {
splitBytes += split.getLength(i);
assertTrue(
split.getLength(i) <= fs.getFileStatus(split.getPath(i)).getLen());
assertFalse(uniq.contains(split.getPath(i)));
uniq.add(split.getPath(i));
}
assertEquals(bytes, splitBytes);
}
示例5: testHfileSplitCompleteness
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public void testHfileSplitCompleteness() throws Exception {
cluster = initMiniCluster(CLUSTER_PORT, 1);
int count = 40;
HdfsSortedOplogOrganizer bucket1 = new HdfsSortedOplogOrganizer(
regionManager, 1);
ArrayList<TestEvent> items = new ArrayList<TestEvent>();
for (int i = 0; i < count; i++) {
items.add(new TestEvent(("key-" + i), ("value-" + System.nanoTime())));
}
bucket1.flush(items.iterator(), count);
Configuration conf = hdfsStore.getFileSystem().getConf();
GFInputFormat gfInputFormat = new GFInputFormat();
Job job = Job.getInstance(conf, "test");
conf = job.getConfiguration();
conf.set(GFInputFormat.INPUT_REGION, getName());
conf.set(GFInputFormat.HOME_DIR, testDataDir.getName());
conf.setBoolean(GFInputFormat.CHECKPOINT, false);
List<InputSplit> splits = gfInputFormat.getSplits(job);
assertTrue(1 < splits.size());
long lastBytePositionOfPrevious = 0;
for (InputSplit inputSplit : splits) {
CombineFileSplit split = (CombineFileSplit) inputSplit;
assertEquals(1, split.getPaths().length);
assertEquals(lastBytePositionOfPrevious, split.getOffset(0));
lastBytePositionOfPrevious += split.getLength();
assertEquals(1, split.getLocations().length);
}
Path bucketPath = new Path(regionPath, "1");
Path hopPath = new Path(bucketPath, bucket1.getSortedOplogs().iterator()
.next().get().getFileName());
FileStatus status = hdfsStore.getFileSystem().getFileStatus(hopPath);
assertEquals(status.getLen(), lastBytePositionOfPrevious);
}
示例6: initialize
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit split, TaskAttemptContext context)
throws IOException, InterruptedException {
CombineFileSplit combineSplit = (CombineFileSplit)split;
Path path = combineSplit.getPath(splitIndex);
this.fileName = path.getName();
FileSplit fileSplit = new FileSplit(
path,
combineSplit.getOffset(splitIndex),
combineSplit.getLength(splitIndex),
combineSplit.getLocations());
delegate.initialize(fileSplit, context);
}
示例7: initialize
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
throws IOException, InterruptedException {
CombineFileSplit combineSplit = (CombineFileSplit)inputSplit;
FileSplit split = new FileSplit(
combineSplit.getPath(splitIndex),
combineSplit.getOffset(splitIndex),
combineSplit.getLength(splitIndex),
combineSplit.getLocations());
// Initialize with the single FileSplit for the current index
delegate.initialize(split, context);
}
示例8: getSplits
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
* get and combine all splits of .shp files
* @param job
* @return
* @throws IOException
*/
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
// get original combine split.
CombineFileSplit combineSplit = (CombineFileSplit)super.getSplits(job).get(0);
Path[] paths = combineSplit.getPaths();
// get indexes of all .shp file
List<Integer> shpIds = new ArrayList<>();
for(int i = 0;i < paths.length; ++i){
if(FilenameUtils.getExtension(paths[i].toString()).equals("shp")){
shpIds.add(i);
}
}
// prepare parameters for constructing new combine split
Path[] shpPaths = new Path[shpIds.size()];
long[] shpStarts = new long[shpIds.size()];
long[] shpLengths = new long[shpIds.size()];
for(int i = 0;i < shpIds.size(); ++i){
int id = shpIds.get(i);
shpPaths[i] = combineSplit.getPath(id);
shpStarts[i] = combineSplit.getOffset(id);
shpLengths[i] = combineSplit.getLength(id);
}
//combine all .shp splits as one split.
CombineFileSplit shpSplit = new CombineFileSplit(shpPaths, shpStarts, shpLengths, combineSplit.getLocations());
List<InputSplit> shpSplits = new ArrayList<>();
shpSplits.add(shpSplit);
return shpSplits;
}
示例9: initialize
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
throws IOException, InterruptedException {
CombineFileSplit split = (CombineFileSplit) genericSplit;
Configuration job = context.getConfiguration();
this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength",
Integer.MAX_VALUE);
this.start = split.getStartOffsets()[idx];
this.end = start + split.getLength();
Path file = split.getPath(idx);
this.compressionCodescs = new CompressionCodecFactory(job);
final CompressionCodec codec = compressionCodescs.getCodec(file);
FileSystem fs = file.getFileSystem(job);
FSDataInputStream fileIn = fs.open(split.getPath(idx));
boolean skipFirstLine = false;
if (codec != null) {
in = new LineReader(codec.createInputStream(fileIn), job);
end = Long.MAX_VALUE;
} else {
if (start != 0) {
skipFirstLine = true;
--start;
fileIn.seek(start);
}
in = new LineReader(fileIn, job);
}
if (skipFirstLine) {// skip first line and re-establish "start"
start += in.readLine(new Text(), 0,
(int) Math.min((long) Integer.MAX_VALUE, end - start));
}
this.pos = start;
}
示例10: SequenceFileRecordReaderWrapper
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@SuppressWarnings("unused")
public SequenceFileRecordReaderWrapper(CombineFileSplit split, TaskAttemptContext context, Integer index)
throws IOException {
fileSplit = new FileSplit(split.getPath(index), split.getOffset(index), split.getLength(index),
split.getLocations());
delegate = new SequenceFileInputFormat<Writable, Text>().createRecordReader(fileSplit, context);
}
示例11: CFRecordReader
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public CFRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException{
this.path = split.getPath(index);
fs = this.path.getFileSystem(context.getConfiguration());
this.startOffset = split.getOffset(index);
this.end = startOffset + split.getLength(index);
fileIn = fs.open(path);
reader = new LineReader(fileIn);
this.pos = startOffset;
}
示例12: initialize
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit paramInputSplit,
TaskAttemptContext paramTaskAttemptContext) throws IOException,
InterruptedException {
context = paramTaskAttemptContext;
split = (CombineFileSplit) paramInputSplit;
if (split.getLength() != 0) {
initializeNextReader();
}
}
示例13: ParserPump
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public ParserPump(CombineFileSplit split, TaskAttemptContext context) {
this.context = context;
this.paths = split.getPaths();
this.size = split.getLength();
this.skipInvalid = context.getConfiguration().getBoolean(SKIP_INVALID_PROPERTY, false);
}
示例14: initialize
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
* cut the combined split into FileSplit for .shp, .shx and .dbf
* @param split
* @param context
* @throws IOException
* @throws InterruptedException
*/
public void initialize(InputSplit split, TaskAttemptContext context)
throws IOException, InterruptedException
{
CombineFileSplit fileSplit = (CombineFileSplit)split;
Path[] paths = fileSplit.getPaths();
for(int i = 0;i < paths.length; ++i){
String suffix = FilenameUtils.getExtension(paths[i].toString());
if(suffix.equals(SHP_SUFFIX)) shpSplit = new FileSplit(paths[i], fileSplit.getOffset(i), fileSplit.getLength(i), fileSplit.getLocations());
else if(suffix.equals(SHX_SUFFIX)) shxSplit = new FileSplit(paths[i], fileSplit.getOffset(i), fileSplit.getLength(i), fileSplit.getLocations());
else if(suffix.equals(DBF_SUFFIX)) dbfSplit = new FileSplit(paths[i], fileSplit.getOffset(i), fileSplit.getLength(i), fileSplit.getLocations());
}
// if shape file doesn't exists, throw an IOException
if(shpSplit == null) throw new IOException("Can't find .shp file.");
else{
if(shxSplit != null){
// shape file exists, extract .shp with .shx
// first read all indexes into memory
Path filePath = shxSplit.getPath();
FileSystem fileSys = filePath.getFileSystem(context.getConfiguration());
FSDataInputStream shxInpuStream = fileSys.open(filePath);
shxInpuStream.skip(24);
int shxFileLength = shxInpuStream.readInt() * 2 - 100; // get length in bytes, exclude header
// skip following 72 bytes in header
shxInpuStream.skip(72);
byte[] bytes = new byte[shxFileLength];
// read all indexes into memory, skip first 50 bytes(header)
shxInpuStream.readFully(bytes, 0, bytes.length);
IntBuffer buffer = ByteBuffer.wrap(bytes).asIntBuffer();
int[] indexes = new int[shxFileLength / 4];
buffer.get(indexes);
shapeFileReader = new ShapeFileReader(indexes);
}else shapeFileReader = new ShapeFileReader(); // no index, construct with no parameter
shapeFileReader.initialize(shpSplit, context);
}
if(dbfSplit != null){
dbfFileReader = new DbfFileReader();
dbfFileReader.initialize(dbfSplit, context);
hasDbf = true;
}else hasDbf = false;
}