本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.CombineFileSplit.getPath方法的典型用法代码示例。如果您正苦于以下问题:Java CombineFileSplit.getPath方法的具体用法?Java CombineFileSplit.getPath怎么用?Java CombineFileSplit.getPath使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.input.CombineFileSplit
的用法示例。
在下文中一共展示了CombineFileSplit.getPath方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: CombineFileLineRecordReader
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public CombineFileLineRecordReader(CombineFileSplit split,
TaskAttemptContext context, Integer index) throws IOException {
this.path = split.getPath(index);
fs = this.path.getFileSystem(context.getConfiguration());
this.startOffset = split.getOffset(index);
this.end = startOffset + split.getLength(index);
boolean skipFirstLine = false;
//open the file
fileIn = fs.open(path);
if (startOffset != 0) {
skipFirstLine = true;
--startOffset;
fileIn.seek(startOffset);
}
reader = new LineReader(fileIn);
if (skipFirstLine) { // skip first line and re-establish "startOffset".
startOffset += reader.readLine(new Text(), 0,
(int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
}
this.pos = startOffset;
}
示例2: CombineFileLineRecordReader
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public CombineFileLineRecordReader(CombineFileSplit split,
TaskAttemptContext context, Integer index) throws IOException {
fs = FileSystem.get(context.getConfiguration());
this.path = split.getPath(index);
this.startOffset = split.getOffset(index);
this.end = startOffset + split.getLength(index);
boolean skipFirstLine = false;
//open the file
fileIn = fs.open(path);
if (startOffset != 0) {
skipFirstLine = true;
--startOffset;
fileIn.seek(startOffset);
}
reader = new LineReader(fileIn);
if (skipFirstLine) { // skip first line and re-establish "startOffset".
startOffset += reader.readLine(new Text(), 0,
(int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
}
this.pos = startOffset;
}
示例3: CombineFileLineRecordReader
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public CombineFileLineRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException
{
fs = FileSystem.get(context.getConfiguration());
this.path = split.getPath(index);
this.startOffset = split.getOffset(index);
this.end = startOffset + split.getLength(index);
boolean skipFirstLine = false;
fileIn = fs.open(path); // open the file
if (startOffset != 0) {
skipFirstLine = true;
--startOffset;
fileIn.seek(startOffset);
}
reader = new LineReader(fileIn);
if (skipFirstLine) // skip first line and re-establish "startOffset".
{
int readNum = reader.readLine(new Text(),0,(int) Math.min((long) Integer.MAX_VALUE, end - startOffset));
startOffset += readNum;
}
this.pos = startOffset;
}
示例4: MDSCombineSpreadReader
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public MDSCombineSpreadReader( final CombineFileSplit split , final TaskAttemptContext context , final Integer index ) throws IOException{
Configuration config = context.getConfiguration();
Path path = split.getPath( index );
FileSystem fs = path.getFileSystem( config );
long fileLength = fs.getLength( path );
InputStream in = fs.open( path );
innerReader = new MDSSpreadReader();
innerReader.setStream( in , fileLength , 0 , fileLength );
}
示例5: initialize
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit split, TaskAttemptContext context)
throws IOException, InterruptedException {
CombineFileSplit combineSplit = (CombineFileSplit)split;
Path path = combineSplit.getPath(splitIndex);
this.fileName = path.getName();
FileSplit fileSplit = new FileSplit(
path,
combineSplit.getOffset(splitIndex),
combineSplit.getLength(splitIndex),
combineSplit.getLocations());
delegate.initialize(fileSplit, context);
}
示例6: initialize
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
throws IOException, InterruptedException {
CombineFileSplit combineSplit = (CombineFileSplit)inputSplit;
FileSplit split = new FileSplit(
combineSplit.getPath(splitIndex),
combineSplit.getOffset(splitIndex),
combineSplit.getLength(splitIndex),
combineSplit.getLocations());
// Initialize with the single FileSplit for the current index
delegate.initialize(split, context);
}
示例7: getSplits
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
* get and combine all splits of .shp files
* @param job
* @return
* @throws IOException
*/
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
// get original combine split.
CombineFileSplit combineSplit = (CombineFileSplit)super.getSplits(job).get(0);
Path[] paths = combineSplit.getPaths();
// get indexes of all .shp file
List<Integer> shpIds = new ArrayList<>();
for(int i = 0;i < paths.length; ++i){
if(FilenameUtils.getExtension(paths[i].toString()).equals("shp")){
shpIds.add(i);
}
}
// prepare parameters for constructing new combine split
Path[] shpPaths = new Path[shpIds.size()];
long[] shpStarts = new long[shpIds.size()];
long[] shpLengths = new long[shpIds.size()];
for(int i = 0;i < shpIds.size(); ++i){
int id = shpIds.get(i);
shpPaths[i] = combineSplit.getPath(id);
shpStarts[i] = combineSplit.getOffset(id);
shpLengths[i] = combineSplit.getLength(id);
}
//combine all .shp splits as one split.
CombineFileSplit shpSplit = new CombineFileSplit(shpPaths, shpStarts, shpLengths, combineSplit.getLocations());
List<InputSplit> shpSplits = new ArrayList<>();
shpSplits.add(shpSplit);
return shpSplits;
}
示例8: getSchema
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
private static Schema getSchema(CombineFileSplit split, TaskAttemptContext cx, Integer idx) throws IOException {
Schema schema = AvroJob.getInputKeySchema(cx.getConfiguration());
if (schema != null) {
return schema;
}
Path path = split.getPath(idx);
FileSystem fs = path.getFileSystem(cx.getConfiguration());
return AvroUtils.getSchemaFromDataFile(path, fs);
}
示例9: initialize
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
throws IOException, InterruptedException {
CombineFileSplit split = (CombineFileSplit) genericSplit;
Configuration job = context.getConfiguration();
this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength",
Integer.MAX_VALUE);
this.start = split.getStartOffsets()[idx];
this.end = start + split.getLength();
Path file = split.getPath(idx);
this.compressionCodescs = new CompressionCodecFactory(job);
final CompressionCodec codec = compressionCodescs.getCodec(file);
FileSystem fs = file.getFileSystem(job);
FSDataInputStream fileIn = fs.open(split.getPath(idx));
boolean skipFirstLine = false;
if (codec != null) {
in = new LineReader(codec.createInputStream(fileIn), job);
end = Long.MAX_VALUE;
} else {
if (start != 0) {
skipFirstLine = true;
--start;
fileIn.seek(start);
}
in = new LineReader(fileIn, job);
}
if (skipFirstLine) {// skip first line and re-establish "start"
start += in.readLine(new Text(), 0,
(int) Math.min((long) Integer.MAX_VALUE, end - start));
}
this.pos = start;
}
示例10: SequenceFileRecordReaderWrapper
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@SuppressWarnings("unused")
public SequenceFileRecordReaderWrapper(CombineFileSplit split, TaskAttemptContext context, Integer index)
throws IOException {
fileSplit = new FileSplit(split.getPath(index), split.getOffset(index), split.getLength(index),
split.getLocations());
delegate = new SequenceFileInputFormat<Writable, Text>().createRecordReader(fileSplit, context);
}
示例11: CFRecordReader
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public CFRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException{
this.path = split.getPath(index);
fs = this.path.getFileSystem(context.getConfiguration());
this.startOffset = split.getOffset(index);
this.end = startOffset + split.getLength(index);
fileIn = fs.open(path);
reader = new LineReader(fileIn);
this.pos = startOffset;
}