当前位置: 首页>>代码示例>>Java>>正文


Java RecordReader.getCurrentValue方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.RecordReader.getCurrentValue方法的典型用法代码示例。如果您正苦于以下问题:Java RecordReader.getCurrentValue方法的具体用法?Java RecordReader.getCurrentValue怎么用?Java RecordReader.getCurrentValue使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.mapreduce.RecordReader的用法示例。


在下文中一共展示了RecordReader.getCurrentValue方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getSplits

import org.apache.hadoop.mapreduce.RecordReader; //导入方法依赖的package包/类
@Test
public void getSplits() throws Exception {
  S3MapReduceCpOptions options = getOptions();
  Configuration configuration = new Configuration();
  configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
  CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(
      new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testDynInputFormat/fileList.seq"), options);

  JobContext jobContext = new JobContextImpl(configuration, new JobID());
  DynamicInputFormat<Text, CopyListingFileStatus> inputFormat = new DynamicInputFormat<>();
  List<InputSplit> splits = inputFormat.getSplits(jobContext);

  int nFiles = 0;
  int taskId = 0;

  for (InputSplit split : splits) {
    RecordReader<Text, CopyListingFileStatus> recordReader = inputFormat.createRecordReader(split, null);
    StubContext stubContext = new StubContext(jobContext.getConfiguration(), recordReader, taskId);
    final TaskAttemptContext taskAttemptContext = stubContext.getContext();

    recordReader.initialize(splits.get(0), taskAttemptContext);
    float previousProgressValue = 0f;
    while (recordReader.nextKeyValue()) {
      CopyListingFileStatus fileStatus = recordReader.getCurrentValue();
      String source = fileStatus.getPath().toString();
      assertTrue(expectedFilePaths.contains(source));
      final float progress = recordReader.getProgress();
      assertTrue(progress >= previousProgressValue);
      assertTrue(progress >= 0.0f);
      assertTrue(progress <= 1.0f);
      previousProgressValue = progress;
      ++nFiles;
    }
    assertTrue(recordReader.getProgress() == 1.0f);

    ++taskId;
  }

  Assert.assertEquals(expectedFilePaths.size(), nFiles);
}
 
开发者ID:HotelsDotCom,项目名称:circus-train,代码行数:41,代码来源:DynamicInputFormatTest.java

示例2: readSplit

import org.apache.hadoop.mapreduce.RecordReader; //导入方法依赖的package包/类
private static List<String> readSplit(FixedLengthInputFormat format, 
                                      InputSplit split, 
                                      Job job) throws Exception {
  List<String> result = new ArrayList<String>();
  TaskAttemptContext context = MapReduceTestUtil.
      createDummyMapTaskAttemptContext(job.getConfiguration());
  RecordReader<LongWritable, BytesWritable> reader =
      format.createRecordReader(split, context);
  MapContext<LongWritable, BytesWritable, LongWritable, BytesWritable>
      mcontext =
      new MapContextImpl<LongWritable, BytesWritable, LongWritable,
      BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(),
      reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
  LongWritable key;
  BytesWritable value;
  try {
    reader.initialize(split, mcontext);
    while (reader.nextKeyValue()) {
      key = reader.getCurrentKey();
      value = reader.getCurrentValue();
      result.add(new String(value.getBytes(), 0, value.getLength()));
    }
  } finally {
    reader.close();
  }
  return result;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:28,代码来源:TestFixedLengthInputFormat.java

示例3: doValidateSetupGenDC

import org.apache.hadoop.mapreduce.RecordReader; //导入方法依赖的package包/类
/**
 * Validate setupGenerateDistCacheData by validating <li>permissions of the
 * distributed cache directory and <li>content of the generated sequence file.
 * This includes validation of dist cache file paths and their file sizes.
 */
private void doValidateSetupGenDC(
    RecordReader<LongWritable, BytesWritable> reader, FileSystem fs,
    long[] sortedFileSizes) throws IOException, InterruptedException {

  // Validate permissions of dist cache directory
  Path distCacheDir = dce.getDistributedCacheDir();
  assertEquals(
      "Wrong permissions for distributed cache dir " + distCacheDir,
      fs.getFileStatus(distCacheDir).getPermission().getOtherAction()
          .and(FsAction.EXECUTE), FsAction.EXECUTE);

  // Validate the content of the sequence file generated by
  // dce.setupGenerateDistCacheData().
  LongWritable key = new LongWritable();
  BytesWritable val = new BytesWritable();
  for (int i = 0; i < sortedFileSizes.length; i++) {
    assertTrue("Number of files written to the sequence file by "
        + "setupGenerateDistCacheData is less than the expected.",
        reader.nextKeyValue());
    key = reader.getCurrentKey();
    val = reader.getCurrentValue();
    long fileSize = key.get();
    String file = new String(val.getBytes(), 0, val.getLength());

    // Dist Cache files should be sorted based on file size.
    assertEquals("Dist cache file size is wrong.", sortedFileSizes[i],
        fileSize);

    // Validate dist cache file path.

    // parent dir of dist cache file
    Path parent = new Path(file).getParent().makeQualified(fs.getUri(),fs.getWorkingDirectory());
    // should exist in dist cache dir
    assertTrue("Public dist cache file path is wrong.",
        distCacheDir.equals(parent));
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:43,代码来源:TestDistCacheEmulation.java

示例4: testBinary

import org.apache.hadoop.mapreduce.RecordReader; //导入方法依赖的package包/类
public void testBinary() throws IOException, InterruptedException {
  Job job = Job.getInstance();
  FileSystem fs = FileSystem.getLocal(job.getConfiguration());
  Path dir = new Path(System.getProperty("test.build.data",".") + "/mapred");
  Path file = new Path(dir, "testbinary.seq");
  Random r = new Random();
  long seed = r.nextLong();
  r.setSeed(seed);

  fs.delete(dir, true);
  FileInputFormat.setInputPaths(job, dir);

  Text tkey = new Text();
  Text tval = new Text();

  SequenceFile.Writer writer = new SequenceFile.Writer(fs,
    job.getConfiguration(), file, Text.class, Text.class);
  try {
    for (int i = 0; i < RECORDS; ++i) {
      tkey.set(Integer.toString(r.nextInt(), 36));
      tval.set(Long.toString(r.nextLong(), 36));
      writer.append(tkey, tval);
    }
  } finally {
    writer.close();
  }
  TaskAttemptContext context = MapReduceTestUtil.
    createDummyMapTaskAttemptContext(job.getConfiguration());
  InputFormat<BytesWritable,BytesWritable> bformat =
    new SequenceFileAsBinaryInputFormat();

  int count = 0;
  r.setSeed(seed);
  BytesWritable bkey = new BytesWritable();
  BytesWritable bval = new BytesWritable();
  Text cmpkey = new Text();
  Text cmpval = new Text();
  DataInputBuffer buf = new DataInputBuffer();
  FileInputFormat.setInputPaths(job, file);
  for (InputSplit split : bformat.getSplits(job)) {
    RecordReader<BytesWritable, BytesWritable> reader =
          bformat.createRecordReader(split, context);
    MapContext<BytesWritable, BytesWritable, BytesWritable, BytesWritable> 
      mcontext = new MapContextImpl<BytesWritable, BytesWritable,
        BytesWritable, BytesWritable>(job.getConfiguration(), 
        context.getTaskAttemptID(), reader, null, null, 
        MapReduceTestUtil.createDummyReporter(), 
        split);
    reader.initialize(split, mcontext);
    try {
      while (reader.nextKeyValue()) {
        bkey = reader.getCurrentKey();
        bval = reader.getCurrentValue();
        tkey.set(Integer.toString(r.nextInt(), 36));
        tval.set(Long.toString(r.nextLong(), 36));
        buf.reset(bkey.getBytes(), bkey.getLength());
        cmpkey.readFields(buf);
        buf.reset(bval.getBytes(), bval.getLength());
        cmpval.readFields(buf);
        assertTrue(
          "Keys don't match: " + "*" + cmpkey.toString() + ":" +
          tkey.toString() + "*",
          cmpkey.toString().equals(tkey.toString()));
        assertTrue(
          "Vals don't match: " + "*" + cmpval.toString() + ":" +
          tval.toString() + "*",
          cmpval.toString().equals(tval.toString()));
        ++count;
      }
    } finally {
      reader.close();
    }
  }
  assertEquals("Some records not found", RECORDS, count);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:76,代码来源:TestMRSequenceFileAsBinaryInputFormat.java

示例5: testFormat

import org.apache.hadoop.mapreduce.RecordReader; //导入方法依赖的package包/类
@Test(timeout=10000)
public void testFormat() throws IOException, InterruptedException {
  Job job = Job.getInstance(conf);

  Random random = new Random();
  long seed = random.nextLong();
  random.setSeed(seed);

  localFs.delete(workDir, true);
  FileInputFormat.setInputPaths(job, workDir);

  final int length = 10000;
  final int numFiles = 10;

  // create files with a variety of lengths
  createFiles(length, numFiles, random, job);

  TaskAttemptContext context = MapReduceTestUtil.
    createDummyMapTaskAttemptContext(job.getConfiguration());
  // create a combine split for the files
  InputFormat<IntWritable,BytesWritable> format =
    new CombineSequenceFileInputFormat<IntWritable,BytesWritable>();
  for (int i = 0; i < 3; i++) {
    int numSplits =
      random.nextInt(length/(SequenceFile.SYNC_INTERVAL/20)) + 1;
    LOG.info("splitting: requesting = " + numSplits);
    List<InputSplit> splits = format.getSplits(job);
    LOG.info("splitting: got =        " + splits.size());

    // we should have a single split as the length is comfortably smaller than
    // the block size
    assertEquals("We got more than one splits!", 1, splits.size());
    InputSplit split = splits.get(0);
    assertEquals("It should be CombineFileSplit",
      CombineFileSplit.class, split.getClass());

    // check the split
    BitSet bits = new BitSet(length);
    RecordReader<IntWritable,BytesWritable> reader =
      format.createRecordReader(split, context);
    MapContext<IntWritable,BytesWritable,IntWritable,BytesWritable> mcontext =
      new MapContextImpl<IntWritable,BytesWritable,IntWritable,BytesWritable>(job.getConfiguration(),
      context.getTaskAttemptID(), reader, null, null,
      MapReduceTestUtil.createDummyReporter(), split);
    reader.initialize(split, mcontext);
    assertEquals("reader class is CombineFileRecordReader.",
      CombineFileRecordReader.class, reader.getClass());

    try {
      while (reader.nextKeyValue()) {
        IntWritable key = reader.getCurrentKey();
        BytesWritable value = reader.getCurrentValue();
        assertNotNull("Value should not be null.", value);
        final int k = key.get();
        LOG.debug("read " + k);
        assertFalse("Key in multiple partitions.", bits.get(k));
        bits.set(k);
      }
    } finally {
      reader.close();
    }
    assertEquals("Some keys in no partition.", length, bits.cardinality());
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:65,代码来源:TestCombineSequenceFileInputFormat.java

示例6: testFormat

import org.apache.hadoop.mapreduce.RecordReader; //导入方法依赖的package包/类
@Test(timeout=10000)
public void testFormat() throws Exception {
  Job job = Job.getInstance(new Configuration(defaultConf));

  Random random = new Random();
  long seed = random.nextLong();
  LOG.info("seed = " + seed);
  random.setSeed(seed);

  localFs.delete(workDir, true);
  FileInputFormat.setInputPaths(job, workDir);

  final int length = 10000;
  final int numFiles = 10;

  // create files with various lengths
  createFiles(length, numFiles, random);

  // create a combined split for the files
  CombineTextInputFormat format = new CombineTextInputFormat();
  for (int i = 0; i < 3; i++) {
    int numSplits = random.nextInt(length/20) + 1;
    LOG.info("splitting: requesting = " + numSplits);
    List<InputSplit> splits = format.getSplits(job);
    LOG.info("splitting: got =        " + splits.size());

    // we should have a single split as the length is comfortably smaller than
    // the block size
    assertEquals("We got more than one splits!", 1, splits.size());
    InputSplit split = splits.get(0);
    assertEquals("It should be CombineFileSplit",
      CombineFileSplit.class, split.getClass());

    // check the split
    BitSet bits = new BitSet(length);
    LOG.debug("split= " + split);
    TaskAttemptContext context = MapReduceTestUtil.
      createDummyMapTaskAttemptContext(job.getConfiguration());
    RecordReader<LongWritable, Text> reader =
      format.createRecordReader(split, context);
    assertEquals("reader class is CombineFileRecordReader.",
      CombineFileRecordReader.class, reader.getClass());
    MapContext<LongWritable,Text,LongWritable,Text> mcontext =
      new MapContextImpl<LongWritable,Text,LongWritable,Text>(job.getConfiguration(),
      context.getTaskAttemptID(), reader, null, null,
      MapReduceTestUtil.createDummyReporter(), split);
    reader.initialize(split, mcontext);

    try {
      int count = 0;
      while (reader.nextKeyValue()) {
        LongWritable key = reader.getCurrentKey();
        assertNotNull("Key should not be null.", key);
        Text value = reader.getCurrentValue();
        final int v = Integer.parseInt(value.toString());
        LOG.debug("read " + v);
        assertFalse("Key in multiple partitions.", bits.get(v));
        bits.set(v);
        count++;
      }
      LOG.debug("split=" + split + " count=" + count);
    } finally {
      reader.close();
    }
    assertEquals("Some keys in no partition.", length, bits.cardinality());
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:68,代码来源:TestCombineTextInputFormat.java


注:本文中的org.apache.hadoop.mapreduce.RecordReader.getCurrentValue方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。