Java CombineFileSplit类代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.CombineFileSplit类的典型用法代码示例。如果您正苦于以下问题：Java CombineFileSplit类的具体用法？Java CombineFileSplit怎么用？Java CombineFileSplit使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

CombineFileSplit类属于org.apache.hadoop.mapreduce.lib.input包，在下文中一共展示了CombineFileSplit类的12个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: addCreatedSplit

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入依赖的package包/类
private void addCreatedSplit(List<InputSplit> splitList,
  Collection<String> locations,
  ArrayList<OneBlockInfo> validBlocks) {
  // create an input split
  Path[] fl = new Path[validBlocks.size()];
  long[] offset = new long[validBlocks.size()];
  long[] length = new long[validBlocks.size()];
  for (int i = 0; i < validBlocks.size(); i++) {
    fl[i] = validBlocks.get(i).onepath;
    offset[i] = validBlocks.get(i).offset;
    length[i] = validBlocks.get(i).length;
  }
  // add this split to the list that is returned
  CombineFileSplit thissplit = new CombineFileSplit(fl, offset,
    length, locations.toArray(new String[0]));
  splitList.add(thissplit);
}

开发者ID:Tencent，项目名称:angel，代码行数:18，代码来源:BalanceInputFormat.java

示例2: CombineFileLineRecordReader

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入依赖的package包/类
public CombineFileLineRecordReader(CombineFileSplit split,
    TaskAttemptContext context, Integer index) throws IOException {
  
  this.path = split.getPath(index);
  fs = this.path.getFileSystem(context.getConfiguration());
  this.startOffset = split.getOffset(index);
  this.end = startOffset + split.getLength(index);
  boolean skipFirstLine = false;
  
  //open the file
  fileIn = fs.open(path);
  if (startOffset != 0) {
    skipFirstLine = true;
    --startOffset;
    fileIn.seek(startOffset);
  }
  reader = new LineReader(fileIn);
  if (skipFirstLine) {  // skip first line and re-establish "startOffset".
    startOffset += reader.readLine(new Text(), 0,
                (int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
  }
  this.pos = startOffset;
}

开发者ID:naver，项目名称:hadoop，代码行数:24，代码来源:MultiFileWordCount.java

示例3: GridmixSplit

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入依赖的package包/类
public GridmixSplit(CombineFileSplit cfsplit, int maps, int id,
    long inputBytes, long inputRecords, long outputBytes,
    long outputRecords, double[] reduceBytes, double[] reduceRecords,
    long[] reduceOutputBytes, long[] reduceOutputRecords)
    throws IOException {
  super(cfsplit);
  this.id = id;
  this.maps = maps;
  reduces = reduceBytes.length;
  this.inputRecords = inputRecords;
  this.outputBytes = outputBytes;
  this.outputRecords = outputRecords;
  this.reduceBytes = reduceBytes;
  this.reduceRecords = reduceRecords;
  nSpec = reduceOutputBytes.length;
  this.reduceOutputBytes = reduceOutputBytes;
  this.reduceOutputRecords = reduceOutputRecords;
}

开发者ID:naver，项目名称:hadoop，代码行数:19，代码来源:GridmixSplit.java

示例4: LoadSplit

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入依赖的package包/类
public LoadSplit(CombineFileSplit cfsplit, int maps, int id, long inputBytes, 
                 long inputRecords, long outputBytes, long outputRecords, 
                 double[] reduceBytes, double[] reduceRecords, 
                 long[] reduceOutputBytes, long[] reduceOutputRecords,
                 ResourceUsageMetrics metrics,
                 ResourceUsageMetrics[] rMetrics)
throws IOException {
  super(cfsplit);
  this.id = id;
  this.maps = maps;
  reduces = reduceBytes.length;
  this.inputRecords = inputRecords;
  this.outputBytes = outputBytes;
  this.outputRecords = outputRecords;
  this.reduceBytes = reduceBytes;
  this.reduceRecords = reduceRecords;
  nSpec = reduceOutputBytes.length;
  this.reduceOutputBytes = reduceOutputBytes;
  this.reduceOutputRecords = reduceOutputRecords;
  this.mapMetrics = metrics;
  this.reduceMetrics = rMetrics;
}

开发者ID:naver，项目名称:hadoop，代码行数:23，代码来源:LoadSplit.java

示例5: testRepeat

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入依赖的package包/类
@Test
public void testRepeat() throws Exception {
  final Configuration conf = new Configuration();
  Arrays.fill(loc, "");
  Arrays.fill(start, 0L);
  Arrays.fill(len, BLOCK);

  final ByteArrayOutputStream out = fillVerif();
  final FileQueue q =
    new FileQueue(new CombineFileSplit(paths, start, len, loc), conf);
  final byte[] verif = out.toByteArray();
  final byte[] check = new byte[2 * NFILES * BLOCK];
  q.read(check, 0, NFILES * BLOCK);
  assertArrayEquals(verif, Arrays.copyOf(check, NFILES * BLOCK));

  final byte[] verif2 = new byte[2 * NFILES * BLOCK];
  System.arraycopy(verif, 0, verif2, 0, verif.length);
  System.arraycopy(verif, 0, verif2, verif.length, verif.length);
  q.read(check, 0, 2 * NFILES * BLOCK);
  assertArrayEquals(verif2, check);

}

开发者ID:naver，项目名称:hadoop，代码行数:23，代码来源:TestFileQueue.java

示例6: testUneven

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入依赖的package包/类
@Test
public void testUneven() throws Exception {
  final Configuration conf = new Configuration();
  Arrays.fill(loc, "");
  Arrays.fill(start, 0L);
  Arrays.fill(len, BLOCK);

  final int B2 = BLOCK / 2;
  for (int i = 0; i < NFILES; i += 2) {
    start[i] += B2;
    len[i] -= B2;
  }
  final FileQueue q =
    new FileQueue(new CombineFileSplit(paths, start, len, loc), conf);
  final ByteArrayOutputStream out = fillVerif();
  final byte[] verif = out.toByteArray();
  final byte[] check = new byte[NFILES / 2 * BLOCK + NFILES / 2 * B2];
  q.read(check, 0, verif.length);
  assertArrayEquals(verif, Arrays.copyOf(check, verif.length));
  q.read(check, 0, verif.length);
  assertArrayEquals(verif, Arrays.copyOf(check, verif.length));
}

开发者ID:naver，项目名称:hadoop，代码行数:23，代码来源:TestFileQueue.java

示例7: getLoadSplit

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入依赖的package包/类
private LoadSplit getLoadSplit() throws Exception {

    Path[] files = {new Path("one"), new Path("two")};
    long[] start = {1, 2};
    long[] lengths = {100, 200};
    String[] locations = {"locOne", "loctwo"};

    CombineFileSplit cfSplit = new CombineFileSplit(files, start, lengths,
            locations);
    ResourceUsageMetrics metrics = new ResourceUsageMetrics();
    metrics.setCumulativeCpuUsage(200);
    ResourceUsageMetrics[] rMetrics = {metrics};

    double[] reduceBytes = {8.1d, 8.2d};
    double[] reduceRecords = {9.1d, 9.2d};
    long[] reduceOutputBytes = {101L, 102L};
    long[] reduceOutputRecords = {111L, 112L};

    return new LoadSplit(cfSplit, 2, 1, 4L, 5L, 6L, 7L,
            reduceBytes, reduceRecords, reduceOutputBytes, reduceOutputRecords,
            metrics, rMetrics);
  }

开发者ID:naver，项目名称:hadoop，代码行数:23，代码来源:TestGridMixClasses.java

示例8: initialize

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入依赖的package包/类
@Override
public void initialize(InputSplit split, TaskAttemptContext context)
    throws IOException {
  Configuration conf = context.getConfiguration();
  CombineFileSplit cSplit =  (CombineFileSplit) split;
  Path[] path = cSplit.getPaths();
  long[] start = cSplit.getStartOffsets();
  long[] len = cSplit.getLengths();
  
  FileSystem fs = cSplit.getPath(0).getFileSystem(conf);
  
  long startTS = conf.getLong(RowInputFormat.START_TIME_MILLIS, 0l);
  long endTS = conf.getLong(RowInputFormat.END_TIME_MILLIS, 0l);
  this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, startTS, endTS);

  instantiateGfxdLoner(conf);
}

开发者ID:gemxd，项目名称:gemfirexd-oss，代码行数:18，代码来源:RowRecordReader.java

示例9: HDFSSplitIterator

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入依赖的package包/类
public HDFSSplitIterator(FileSystem fs, Path[] paths, long[] offsets, long[] lengths, long startTime, long endTime) throws IOException {
  this.fs = fs;
  this.split = new CombineFileSplit(paths, offsets, lengths, null);
  while(currentHopIndex < split.getNumPaths() && !fs.exists(split.getPath(currentHopIndex))){
    logger.warning(LocalizedStrings.HOPLOG_CLEANED_UP_BY_JANITOR, split.getPath(currentHopIndex));
    currentHopIndex++;
  }
  if(currentHopIndex == split.getNumPaths()){
    this.hoplog = null;
    iterator = null;
  } else {
    this.hoplog = getHoplog(fs,split.getPath(currentHopIndex));
    iterator = hoplog.getReader().scan(split.getOffset(currentHopIndex), split.getLength(currentHopIndex));
  }
  this.startTime = startTime;
  this.endTime = endTime;
}

开发者ID:gemxd，项目名称:gemfirexd-oss，代码行数:18，代码来源:HDFSSplitIterator.java

示例10: CombineFileLineRecordReader

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入依赖的package包/类
public CombineFileLineRecordReader(CombineFileSplit split,
    TaskAttemptContext context, Integer index) throws IOException {
  
  fs = FileSystem.get(context.getConfiguration());
  this.path = split.getPath(index);
  this.startOffset = split.getOffset(index);
  this.end = startOffset + split.getLength(index);
  boolean skipFirstLine = false;
  
  //open the file
  fileIn = fs.open(path);
  if (startOffset != 0) {
    skipFirstLine = true;
    --startOffset;
    fileIn.seek(startOffset);
  }
  reader = new LineReader(fileIn);
  if (skipFirstLine) {  // skip first line and re-establish "startOffset".
    startOffset += reader.readLine(new Text(), 0,
                (int)Math.min((long)Integer.MAX_VALUE, end - startOffset));
  }
  this.pos = startOffset;
}

开发者ID:Nextzero，项目名称:hadoop-2.6.0-cdh5.4.3，代码行数:24，代码来源:MultiFileWordCount.java

示例11: cleanSplits

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入依赖的package包/类
/**
 * Set the number of locations in the split to SPLIT_MAX_NUM_LOCATIONS if it is larger than
 * SPLIT_MAX_NUM_LOCATIONS (MAPREDUCE-5186).
 */
private List<InputSplit> cleanSplits(List<InputSplit> splits) throws IOException {
  List<InputSplit> cleanedSplits = Lists.newArrayList();

  for (int i = 0; i < splits.size(); i++) {
    CombineFileSplit oldSplit = (CombineFileSplit) splits.get(i);
    String[] locations = oldSplit.getLocations();

    Preconditions.checkNotNull(locations, "CombineFileSplit.getLocations() returned null");

    if (locations.length > SPLIT_MAX_NUM_LOCATIONS) {
      locations = Arrays.copyOf(locations, SPLIT_MAX_NUM_LOCATIONS);
    }

    cleanedSplits
        .add(new CombineFileSplit(oldSplit.getPaths(), oldSplit.getStartOffsets(), oldSplit.getLengths(), locations));
  }
  return cleanedSplits;
}

开发者ID:Hanmourang，项目名称:Gobblin，代码行数:23，代码来源:AvroKeyRecursiveCombineFileInputFormat.java

示例12: BinaryFileRecordReader

import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; //导入依赖的package包/类
/**
* Constructor
*/
  public BinaryFileRecordReader(CombineFileSplit fileSplit, TaskAttemptContext context, Integer pathToProcess) {
  	
  	processed = false;
  	fileToRead = fileSplit.getPath(pathToProcess);
  	fileLength = fileSplit.getLength(pathToProcess);
  	config = context.getConfiguration();

  	assert 0 == fileSplit.getOffset(pathToProcess);
  	
  	try {
  		FileSystem dfs = FileSystem.get(config);
  		assert dfs.getFileStatus(fileToRead).getLen() == fileLength;
  		
  	} catch(Exception e) {
  		e.printStackTrace();
  	}
  	
  	key = new Text(Path.getPathWithoutSchemeAndAuthority(fileToRead).toString());
  	value = new BytesWritable();
  }

开发者ID:hiiamok，项目名称:DISH，代码行数:24，代码来源:BinaryFileRecordReader.java

注：本文中的org.apache.hadoop.mapreduce.lib.input.CombineFileSplit类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。