当前位置: 首页>>代码示例>>Java>>正文


Java CombineFileSplit.getPath方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.CombineFileSplit.getPath方法的典型用法代码示例。如果您正苦于以下问题:Java CombineFileSplit.getPath方法的具体用法?Java CombineFileSplit.getPath怎么用?Java CombineFileSplit.getPath使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.mapreduce.lib.input.CombineFileSplit的用法示例。


在下文中一共展示了CombineFileSplit.getPath方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: CombineFileLineRecordReader

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public CombineFileLineRecordReader(CombineFileSplit split,
    TaskAttemptContext context, Integer index) throws IOException {
  
  this.path = split.getPath(index);
  fs = this.path.getFileSystem(context.getConfiguration());
  this.startOffset = split.getOffset(index);
  this.end = startOffset + split.getLength(index);
  boolean skipFirstLine = false;
  
  //open the file
  fileIn = fs.open(path);
  if (startOffset != 0) {
    skipFirstLine = true;
    --startOffset;
    fileIn.seek(startOffset);
  }
  reader = new LineReader(fileIn);
  if (skipFirstLine) {  // skip first line and re-establish "startOffset".
    startOffset += reader.readLine(new Text(), 0,
                (int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
  }
  this.pos = startOffset;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:24,代码来源:MultiFileWordCount.java

示例2: CombineFileLineRecordReader

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public CombineFileLineRecordReader(CombineFileSplit split,
    TaskAttemptContext context, Integer index) throws IOException {
  
  fs = FileSystem.get(context.getConfiguration());
  this.path = split.getPath(index);
  this.startOffset = split.getOffset(index);
  this.end = startOffset + split.getLength(index);
  boolean skipFirstLine = false;
  
  //open the file
  fileIn = fs.open(path);
  if (startOffset != 0) {
    skipFirstLine = true;
    --startOffset;
    fileIn.seek(startOffset);
  }
  reader = new LineReader(fileIn);
  if (skipFirstLine) {  // skip first line and re-establish "startOffset".
    startOffset += reader.readLine(new Text(), 0,
                (int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
  }
  this.pos = startOffset;
}
 
开发者ID:Nextzero,项目名称:hadoop-2.6.0-cdh5.4.3,代码行数:24,代码来源:MultiFileWordCount.java

示例3: CombineFileLineRecordReader

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public CombineFileLineRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException 
{
	fs = FileSystem.get(context.getConfiguration());
	this.path = split.getPath(index);
	this.startOffset = split.getOffset(index);
	this.end = startOffset + split.getLength(index);
	boolean skipFirstLine = false;

	fileIn = fs.open(path); // open the file
	if (startOffset != 0) {
		skipFirstLine = true;
		--startOffset;
		fileIn.seek(startOffset);
	}
	reader = new LineReader(fileIn);
	if (skipFirstLine) // skip first line and re-establish "startOffset".
	{
		int readNum = reader.readLine(new Text(),0,(int) Math.min((long) Integer.MAX_VALUE, end - startOffset));
		startOffset += readNum;
	}
	this.pos = startOffset;
}
 
开发者ID:willddy,项目名称:bigdata_pattern,代码行数:23,代码来源:CombineFileLineRecordReader.java

示例4: MDSCombineSpreadReader

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public MDSCombineSpreadReader( final CombineFileSplit split , final TaskAttemptContext context , final Integer index ) throws IOException{
  Configuration config = context.getConfiguration();
  Path path = split.getPath( index );
  FileSystem fs = path.getFileSystem( config );
  long fileLength = fs.getLength( path );
  InputStream in = fs.open( path );

  innerReader = new MDSSpreadReader();
  innerReader.setStream( in , fileLength , 0 , fileLength );
}
 
开发者ID:yahoojapan,项目名称:multiple-dimension-spread,代码行数:11,代码来源:MDSCombineSpreadReader.java

示例5: initialize

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit split, TaskAttemptContext context)
		throws IOException, InterruptedException {
	CombineFileSplit combineSplit = (CombineFileSplit)split;
	Path path = combineSplit.getPath(splitIndex);
	this.fileName = path.getName();
	FileSplit fileSplit = new FileSplit(
			path,
			combineSplit.getOffset(splitIndex),
			combineSplit.getLength(splitIndex),
			combineSplit.getLocations());
	delegate.initialize(fileSplit, context);

}
 
开发者ID:conversant,项目名称:mara,代码行数:15,代码来源:CombineTextFileInputFormat.java

示例6: initialize

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
		throws IOException, InterruptedException {
	CombineFileSplit combineSplit = (CombineFileSplit)inputSplit;
	FileSplit split = new FileSplit(
			combineSplit.getPath(splitIndex),
			combineSplit.getOffset(splitIndex),
			combineSplit.getLength(splitIndex),
			combineSplit.getLocations());
	// Initialize with the single FileSplit for the current index
	delegate.initialize(split, context);
}
 
开发者ID:conversant,项目名称:mara,代码行数:13,代码来源:CombineAvroKeyFileInputFormat.java

示例7: getSplits

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
/**
 * get and combine all splits of .shp files
 * @param job
 * @return
 * @throws IOException
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    // get original combine split.
    CombineFileSplit combineSplit = (CombineFileSplit)super.getSplits(job).get(0);
    Path[] paths = combineSplit.getPaths();

    // get indexes of all .shp file
    List<Integer> shpIds = new ArrayList<>();
    for(int i = 0;i < paths.length; ++i){
        if(FilenameUtils.getExtension(paths[i].toString()).equals("shp")){
            shpIds.add(i);
        }
    }

    // prepare parameters for constructing new combine split
    Path[] shpPaths = new Path[shpIds.size()];
    long[] shpStarts = new long[shpIds.size()];
    long[] shpLengths = new long[shpIds.size()];

    for(int i = 0;i < shpIds.size(); ++i){
        int id = shpIds.get(i);
        shpPaths[i] = combineSplit.getPath(id);
        shpStarts[i] = combineSplit.getOffset(id);
        shpLengths[i] = combineSplit.getLength(id);
    }

    //combine all .shp splits as one split.
    CombineFileSplit shpSplit = new CombineFileSplit(shpPaths, shpStarts, shpLengths, combineSplit.getLocations());
    List<InputSplit> shpSplits = new ArrayList<>();
    shpSplits.add(shpSplit);
    return shpSplits;
}
 
开发者ID:DataSystemsLab,项目名称:GeoSpark,代码行数:39,代码来源:BoundaryInputFormat.java

示例8: getSchema

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
private static Schema getSchema(CombineFileSplit split, TaskAttemptContext cx, Integer idx) throws IOException {
  Schema schema = AvroJob.getInputKeySchema(cx.getConfiguration());
  if (schema != null) {
    return schema;
  }

  Path path = split.getPath(idx);
  FileSystem fs = path.getFileSystem(cx.getConfiguration());
  return AvroUtils.getSchemaFromDataFile(path, fs);
}
 
开发者ID:apache,项目名称:incubator-gobblin,代码行数:11,代码来源:AvroKeyCombineFileRecordReader.java

示例9: initialize

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
		throws IOException, InterruptedException {
	CombineFileSplit split = (CombineFileSplit) genericSplit;
	Configuration job = context.getConfiguration();
	this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength",
			Integer.MAX_VALUE);

	this.start = split.getStartOffsets()[idx];
	this.end = start + split.getLength();
	Path file = split.getPath(idx);
	this.compressionCodescs = new CompressionCodecFactory(job);
	final CompressionCodec codec = compressionCodescs.getCodec(file);

	FileSystem fs = file.getFileSystem(job);
	FSDataInputStream fileIn = fs.open(split.getPath(idx));
	boolean skipFirstLine = false;
	if (codec != null) {
		in = new LineReader(codec.createInputStream(fileIn), job);
		end = Long.MAX_VALUE;
	} else {
		if (start != 0) {
			skipFirstLine = true;
			--start;
			fileIn.seek(start);
		}
		in = new LineReader(fileIn, job);
	}
	if (skipFirstLine) {// skip first line and re-establish "start"
		start += in.readLine(new Text(), 0,
				(int) Math.min((long) Integer.MAX_VALUE, end - start));
	}
	this.pos = start;
}
 
开发者ID:makelove,项目名称:book-hadoop-hacks,代码行数:35,代码来源:CombineFileLineRecordReader.java

示例10: SequenceFileRecordReaderWrapper

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
@SuppressWarnings("unused")
public SequenceFileRecordReaderWrapper(CombineFileSplit split, TaskAttemptContext context, Integer index)
    throws IOException {
  fileSplit = new FileSplit(split.getPath(index), split.getOffset(index), split.getLength(index),
      split.getLocations());
  delegate = new SequenceFileInputFormat<Writable, Text>().createRecordReader(fileSplit, context);
}
 
开发者ID:apache,项目名称:incubator-blur,代码行数:8,代码来源:CsvBlurDriver.java

示例11: CFRecordReader

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入方法依赖的package包/类
public CFRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException{
  this.path = split.getPath(index);
  fs = this.path.getFileSystem(context.getConfiguration());
  this.startOffset = split.getOffset(index);
  this.end = startOffset + split.getLength(index);

  fileIn = fs.open(path);
  reader = new LineReader(fileIn);
  this.pos = startOffset;
}
 
开发者ID:dryman,项目名称:Hadoop-CombineFileInputFormat,代码行数:11,代码来源:CFRecordReader.java


注:本文中的org.apache.hadoop.mapreduce.lib.input.CombineFileSplit.getPath方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。