本文整理汇总了Java中org.apache.hadoop.mapreduce.RecordReader.getCurrentValue方法的典型用法代码示例。如果您正苦于以下问题:Java RecordReader.getCurrentValue方法的具体用法?Java RecordReader.getCurrentValue怎么用?Java RecordReader.getCurrentValue使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.RecordReader
的用法示例。
在下文中一共展示了RecordReader.getCurrentValue方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getSplits
import org.apache.hadoop.mapreduce.RecordReader; //导入方法依赖的package包/类
@Test
public void getSplits() throws Exception {
S3MapReduceCpOptions options = getOptions();
Configuration configuration = new Configuration();
configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(
new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testDynInputFormat/fileList.seq"), options);
JobContext jobContext = new JobContextImpl(configuration, new JobID());
DynamicInputFormat<Text, CopyListingFileStatus> inputFormat = new DynamicInputFormat<>();
List<InputSplit> splits = inputFormat.getSplits(jobContext);
int nFiles = 0;
int taskId = 0;
for (InputSplit split : splits) {
RecordReader<Text, CopyListingFileStatus> recordReader = inputFormat.createRecordReader(split, null);
StubContext stubContext = new StubContext(jobContext.getConfiguration(), recordReader, taskId);
final TaskAttemptContext taskAttemptContext = stubContext.getContext();
recordReader.initialize(splits.get(0), taskAttemptContext);
float previousProgressValue = 0f;
while (recordReader.nextKeyValue()) {
CopyListingFileStatus fileStatus = recordReader.getCurrentValue();
String source = fileStatus.getPath().toString();
assertTrue(expectedFilePaths.contains(source));
final float progress = recordReader.getProgress();
assertTrue(progress >= previousProgressValue);
assertTrue(progress >= 0.0f);
assertTrue(progress <= 1.0f);
previousProgressValue = progress;
++nFiles;
}
assertTrue(recordReader.getProgress() == 1.0f);
++taskId;
}
Assert.assertEquals(expectedFilePaths.size(), nFiles);
}
示例2: readSplit
import org.apache.hadoop.mapreduce.RecordReader; //导入方法依赖的package包/类
private static List<String> readSplit(FixedLengthInputFormat format,
InputSplit split,
Job job) throws Exception {
List<String> result = new ArrayList<String>();
TaskAttemptContext context = MapReduceTestUtil.
createDummyMapTaskAttemptContext(job.getConfiguration());
RecordReader<LongWritable, BytesWritable> reader =
format.createRecordReader(split, context);
MapContext<LongWritable, BytesWritable, LongWritable, BytesWritable>
mcontext =
new MapContextImpl<LongWritable, BytesWritable, LongWritable,
BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(),
reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
LongWritable key;
BytesWritable value;
try {
reader.initialize(split, mcontext);
while (reader.nextKeyValue()) {
key = reader.getCurrentKey();
value = reader.getCurrentValue();
result.add(new String(value.getBytes(), 0, value.getLength()));
}
} finally {
reader.close();
}
return result;
}
示例3: doValidateSetupGenDC
import org.apache.hadoop.mapreduce.RecordReader; //导入方法依赖的package包/类
/**
* Validate setupGenerateDistCacheData by validating <li>permissions of the
* distributed cache directory and <li>content of the generated sequence file.
* This includes validation of dist cache file paths and their file sizes.
*/
private void doValidateSetupGenDC(
RecordReader<LongWritable, BytesWritable> reader, FileSystem fs,
long[] sortedFileSizes) throws IOException, InterruptedException {
// Validate permissions of dist cache directory
Path distCacheDir = dce.getDistributedCacheDir();
assertEquals(
"Wrong permissions for distributed cache dir " + distCacheDir,
fs.getFileStatus(distCacheDir).getPermission().getOtherAction()
.and(FsAction.EXECUTE), FsAction.EXECUTE);
// Validate the content of the sequence file generated by
// dce.setupGenerateDistCacheData().
LongWritable key = new LongWritable();
BytesWritable val = new BytesWritable();
for (int i = 0; i < sortedFileSizes.length; i++) {
assertTrue("Number of files written to the sequence file by "
+ "setupGenerateDistCacheData is less than the expected.",
reader.nextKeyValue());
key = reader.getCurrentKey();
val = reader.getCurrentValue();
long fileSize = key.get();
String file = new String(val.getBytes(), 0, val.getLength());
// Dist Cache files should be sorted based on file size.
assertEquals("Dist cache file size is wrong.", sortedFileSizes[i],
fileSize);
// Validate dist cache file path.
// parent dir of dist cache file
Path parent = new Path(file).getParent().makeQualified(fs.getUri(),fs.getWorkingDirectory());
// should exist in dist cache dir
assertTrue("Public dist cache file path is wrong.",
distCacheDir.equals(parent));
}
}
示例4: testBinary
import org.apache.hadoop.mapreduce.RecordReader; //导入方法依赖的package包/类
public void testBinary() throws IOException, InterruptedException {
Job job = Job.getInstance();
FileSystem fs = FileSystem.getLocal(job.getConfiguration());
Path dir = new Path(System.getProperty("test.build.data",".") + "/mapred");
Path file = new Path(dir, "testbinary.seq");
Random r = new Random();
long seed = r.nextLong();
r.setSeed(seed);
fs.delete(dir, true);
FileInputFormat.setInputPaths(job, dir);
Text tkey = new Text();
Text tval = new Text();
SequenceFile.Writer writer = new SequenceFile.Writer(fs,
job.getConfiguration(), file, Text.class, Text.class);
try {
for (int i = 0; i < RECORDS; ++i) {
tkey.set(Integer.toString(r.nextInt(), 36));
tval.set(Long.toString(r.nextLong(), 36));
writer.append(tkey, tval);
}
} finally {
writer.close();
}
TaskAttemptContext context = MapReduceTestUtil.
createDummyMapTaskAttemptContext(job.getConfiguration());
InputFormat<BytesWritable,BytesWritable> bformat =
new SequenceFileAsBinaryInputFormat();
int count = 0;
r.setSeed(seed);
BytesWritable bkey = new BytesWritable();
BytesWritable bval = new BytesWritable();
Text cmpkey = new Text();
Text cmpval = new Text();
DataInputBuffer buf = new DataInputBuffer();
FileInputFormat.setInputPaths(job, file);
for (InputSplit split : bformat.getSplits(job)) {
RecordReader<BytesWritable, BytesWritable> reader =
bformat.createRecordReader(split, context);
MapContext<BytesWritable, BytesWritable, BytesWritable, BytesWritable>
mcontext = new MapContextImpl<BytesWritable, BytesWritable,
BytesWritable, BytesWritable>(job.getConfiguration(),
context.getTaskAttemptID(), reader, null, null,
MapReduceTestUtil.createDummyReporter(),
split);
reader.initialize(split, mcontext);
try {
while (reader.nextKeyValue()) {
bkey = reader.getCurrentKey();
bval = reader.getCurrentValue();
tkey.set(Integer.toString(r.nextInt(), 36));
tval.set(Long.toString(r.nextLong(), 36));
buf.reset(bkey.getBytes(), bkey.getLength());
cmpkey.readFields(buf);
buf.reset(bval.getBytes(), bval.getLength());
cmpval.readFields(buf);
assertTrue(
"Keys don't match: " + "*" + cmpkey.toString() + ":" +
tkey.toString() + "*",
cmpkey.toString().equals(tkey.toString()));
assertTrue(
"Vals don't match: " + "*" + cmpval.toString() + ":" +
tval.toString() + "*",
cmpval.toString().equals(tval.toString()));
++count;
}
} finally {
reader.close();
}
}
assertEquals("Some records not found", RECORDS, count);
}
示例5: testFormat
import org.apache.hadoop.mapreduce.RecordReader; //导入方法依赖的package包/类
@Test(timeout=10000)
public void testFormat() throws IOException, InterruptedException {
Job job = Job.getInstance(conf);
Random random = new Random();
long seed = random.nextLong();
random.setSeed(seed);
localFs.delete(workDir, true);
FileInputFormat.setInputPaths(job, workDir);
final int length = 10000;
final int numFiles = 10;
// create files with a variety of lengths
createFiles(length, numFiles, random, job);
TaskAttemptContext context = MapReduceTestUtil.
createDummyMapTaskAttemptContext(job.getConfiguration());
// create a combine split for the files
InputFormat<IntWritable,BytesWritable> format =
new CombineSequenceFileInputFormat<IntWritable,BytesWritable>();
for (int i = 0; i < 3; i++) {
int numSplits =
random.nextInt(length/(SequenceFile.SYNC_INTERVAL/20)) + 1;
LOG.info("splitting: requesting = " + numSplits);
List<InputSplit> splits = format.getSplits(job);
LOG.info("splitting: got = " + splits.size());
// we should have a single split as the length is comfortably smaller than
// the block size
assertEquals("We got more than one splits!", 1, splits.size());
InputSplit split = splits.get(0);
assertEquals("It should be CombineFileSplit",
CombineFileSplit.class, split.getClass());
// check the split
BitSet bits = new BitSet(length);
RecordReader<IntWritable,BytesWritable> reader =
format.createRecordReader(split, context);
MapContext<IntWritable,BytesWritable,IntWritable,BytesWritable> mcontext =
new MapContextImpl<IntWritable,BytesWritable,IntWritable,BytesWritable>(job.getConfiguration(),
context.getTaskAttemptID(), reader, null, null,
MapReduceTestUtil.createDummyReporter(), split);
reader.initialize(split, mcontext);
assertEquals("reader class is CombineFileRecordReader.",
CombineFileRecordReader.class, reader.getClass());
try {
while (reader.nextKeyValue()) {
IntWritable key = reader.getCurrentKey();
BytesWritable value = reader.getCurrentValue();
assertNotNull("Value should not be null.", value);
final int k = key.get();
LOG.debug("read " + k);
assertFalse("Key in multiple partitions.", bits.get(k));
bits.set(k);
}
} finally {
reader.close();
}
assertEquals("Some keys in no partition.", length, bits.cardinality());
}
}
示例6: testFormat
import org.apache.hadoop.mapreduce.RecordReader; //导入方法依赖的package包/类
@Test(timeout=10000)
public void testFormat() throws Exception {
Job job = Job.getInstance(new Configuration(defaultConf));
Random random = new Random();
long seed = random.nextLong();
LOG.info("seed = " + seed);
random.setSeed(seed);
localFs.delete(workDir, true);
FileInputFormat.setInputPaths(job, workDir);
final int length = 10000;
final int numFiles = 10;
// create files with various lengths
createFiles(length, numFiles, random);
// create a combined split for the files
CombineTextInputFormat format = new CombineTextInputFormat();
for (int i = 0; i < 3; i++) {
int numSplits = random.nextInt(length/20) + 1;
LOG.info("splitting: requesting = " + numSplits);
List<InputSplit> splits = format.getSplits(job);
LOG.info("splitting: got = " + splits.size());
// we should have a single split as the length is comfortably smaller than
// the block size
assertEquals("We got more than one splits!", 1, splits.size());
InputSplit split = splits.get(0);
assertEquals("It should be CombineFileSplit",
CombineFileSplit.class, split.getClass());
// check the split
BitSet bits = new BitSet(length);
LOG.debug("split= " + split);
TaskAttemptContext context = MapReduceTestUtil.
createDummyMapTaskAttemptContext(job.getConfiguration());
RecordReader<LongWritable, Text> reader =
format.createRecordReader(split, context);
assertEquals("reader class is CombineFileRecordReader.",
CombineFileRecordReader.class, reader.getClass());
MapContext<LongWritable,Text,LongWritable,Text> mcontext =
new MapContextImpl<LongWritable,Text,LongWritable,Text>(job.getConfiguration(),
context.getTaskAttemptID(), reader, null, null,
MapReduceTestUtil.createDummyReporter(), split);
reader.initialize(split, mcontext);
try {
int count = 0;
while (reader.nextKeyValue()) {
LongWritable key = reader.getCurrentKey();
assertNotNull("Key should not be null.", key);
Text value = reader.getCurrentValue();
final int v = Integer.parseInt(value.toString());
LOG.debug("read " + v);
assertFalse("Key in multiple partitions.", bits.get(v));
bits.set(v);
count++;
}
LOG.debug("split=" + split + " count=" + count);
} finally {
reader.close();
}
assertEquals("Some keys in no partition.", length, bits.cardinality());
}
}