本文整理汇总了Java中org.apache.hadoop.mapreduce.RecordReader类的典型用法代码示例。如果您正苦于以下问题:Java RecordReader类的具体用法?Java RecordReader怎么用?Java RecordReader使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
RecordReader类属于org.apache.hadoop.mapreduce包,在下文中一共展示了RecordReader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: createDBRecordReader
import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
@Override
/** {@inheritDoc} */
protected RecordReader<LongWritable, T> createDBRecordReader(
DBInputSplit split, Configuration conf) throws IOException {
DBConfiguration dbConf = getDBConf();
Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
String dbProductName = getDBProductName();
LOG.debug("Creating db record reader for db product: " + dbProductName);
try {
return new SQLServerDBRecordReader<T>(split, inputClass,
conf, getConnection(), dbConf, dbConf.getInputConditions(),
dbConf.getInputFieldNames(), dbConf.getInputTableName(),
dbProductName);
} catch (SQLException ex) {
throw new IOException(ex);
}
}
示例2: createDBRecordReader
import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
@Override
protected RecordReader<LongWritable, T> createDBRecordReader(
DBInputSplit split, Configuration conf) throws IOException {
DBConfiguration dbConf = getDBConf();
@SuppressWarnings("unchecked")
Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
try {
// Use DB2-specific db reader
return new Db2DataDrivenDBRecordReader<T>(split, inputClass,
conf, getConnection(), dbConf, dbConf.getInputConditions(),
dbConf.getInputFieldNames(), dbConf.getInputTableName());
} catch (SQLException ex) {
throw new IOException(ex);
}
}
示例3: createDBRecordReader
import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
protected RecordReader<LongWritable, T> createDBRecordReader(
DBInputSplit split, Configuration conf) throws IOException {
DBConfiguration dbConf = getDBConf();
@SuppressWarnings("unchecked")
Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
String dbProductName = getDBProductName();
LOG.debug("Creating db record reader for db product: " + dbProductName);
try {
return new DataDrivenDBRecordReader<T>(split, inputClass,
conf, getConnection(), dbConf, dbConf.getInputConditions(),
dbConf.getInputFieldNames(), dbConf.getInputTableName(),
dbProductName);
} catch (SQLException ex) {
throw new IOException(ex);
}
}
示例4: createDBRecordReader
import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
@Override
protected RecordReader<LongWritable, T> createDBRecordReader(
DBInputSplit split, Configuration conf) throws IOException {
DBConfiguration dbConf = getDBConf();
@SuppressWarnings("unchecked")
Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
try {
// Use Oracle-specific db reader
return new OracleDataDrivenDBRecordReader<T>(split, inputClass,
conf, getConnection(), dbConf, dbConf.getInputConditions(),
dbConf.getInputFieldNames(), dbConf.getInputTableName());
} catch (SQLException ex) {
throw new IOException(ex);
}
}
示例5: createChildReader
import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
/**
* Actually instantiate the user's chosen RecordReader implementation.
*/
@SuppressWarnings("unchecked")
private void createChildReader() throws IOException, InterruptedException {
LOG.debug("ChildSplit operates on: " + split.getPath(index));
Configuration conf = context.getConfiguration();
// Determine the file format we're reading.
Class rrClass;
if (ExportJobBase.isSequenceFiles(conf, split.getPath(index))) {
rrClass = SequenceFileRecordReader.class;
} else {
rrClass = LineRecordReader.class;
}
// Create the appropriate record reader.
this.rr = (RecordReader<LongWritable, Object>)
ReflectionUtils.newInstance(rrClass, conf);
}
示例6: createDBRecordReader
import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
/** {@inheritDoc} */
@Override
protected RecordReader<LongWritable, T> createDBRecordReader(
DBInputSplit split, Configuration conf) throws IOException {
DBConfiguration dbConf = getDBConf();
@SuppressWarnings("unchecked")
Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
try {
// Use Microsoft SQL Server specific db reader
return new SqlServerRecordReader<T>(split, inputClass,
conf, getConnection(), dbConf, dbConf.getInputConditions(),
dbConf.getInputFieldNames(), dbConf.getInputTableName());
} catch (SQLException ex) {
throw new IOException(ex);
}
}
示例7: readSplit
import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
private static List<Text> readSplit(KeyValueTextInputFormat format,
InputSplit split, Job job) throws IOException, InterruptedException {
List<Text> result = new ArrayList<Text>();
Configuration conf = job.getConfiguration();
TaskAttemptContext context = MapReduceTestUtil.
createDummyMapTaskAttemptContext(conf);
RecordReader<Text, Text> reader = format.createRecordReader(split,
MapReduceTestUtil.createDummyMapTaskAttemptContext(conf));
MapContext<Text, Text, Text, Text> mcontext =
new MapContextImpl<Text, Text, Text, Text>(conf,
context.getTaskAttemptID(), reader, null, null,
MapReduceTestUtil.createDummyReporter(),
split);
reader.initialize(split, mcontext);
while (reader.nextKeyValue()) {
result.add(new Text(reader.getCurrentValue()));
}
reader.close();
return result;
}
示例8: testReinit
import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
@Test
public void testReinit() throws Exception {
// Test that a split containing multiple files works correctly,
// with the child RecordReader getting its initialize() method
// called a second time.
TaskAttemptID taskId = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
Configuration conf = new Configuration();
TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskId);
// This will create a CombineFileRecordReader that itself contains a
// DummyRecordReader.
InputFormat inputFormat = new ChildRRInputFormat();
Path [] files = { new Path("file1"), new Path("file2") };
long [] lengths = { 1, 1 };
CombineFileSplit split = new CombineFileSplit(files, lengths);
RecordReader rr = inputFormat.createRecordReader(split, context);
assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);
// first initialize() call comes from MapTask. We'll do it here.
rr.initialize(split, context);
// First value is first filename.
assertTrue(rr.nextKeyValue());
assertEquals("file1", rr.getCurrentValue().toString());
// The inner RR will return false, because it only emits one (k, v) pair.
// But there's another sub-split to process. This returns true to us.
assertTrue(rr.nextKeyValue());
// And the 2nd rr will have its initialize method called correctly.
assertEquals("file2", rr.getCurrentValue().toString());
// But after both child RR's have returned their singleton (k, v), this
// should also return false.
assertFalse(rr.nextKeyValue());
}
示例9: readSplit
import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
private static List<Text> readSplit(InputFormat<LongWritable,Text> format,
InputSplit split, Job job) throws IOException, InterruptedException {
List<Text> result = new ArrayList<Text>();
Configuration conf = job.getConfiguration();
TaskAttemptContext context = MapReduceTestUtil.
createDummyMapTaskAttemptContext(conf);
RecordReader<LongWritable, Text> reader = format.createRecordReader(split,
MapReduceTestUtil.createDummyMapTaskAttemptContext(conf));
MapContext<LongWritable,Text,LongWritable,Text> mcontext =
new MapContextImpl<LongWritable,Text,LongWritable,Text>(conf,
context.getTaskAttemptID(), reader, null, null,
MapReduceTestUtil.createDummyReporter(),
split);
reader.initialize(split, mcontext);
while (reader.nextKeyValue()) {
result.add(new Text(reader.getCurrentValue()));
}
return result;
}
示例10: createDBRecordReader
import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
@Override
protected RecordReader<LongWritable, T> createDBRecordReader(DBInputSplit split,
Configuration conf) throws IOException {
DBConfiguration dbConf = getDBConf();
@SuppressWarnings("unchecked")
Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
try {
// Use Oracle-specific db reader
return new OracleDataDrivenDBRecordReader<T>(split, inputClass,
conf, createConnection(), dbConf, dbConf.getInputConditions(),
dbConf.getInputFieldNames(), dbConf.getInputTableName());
} catch (SQLException ex) {
throw new IOException(ex.getMessage());
}
}
示例11: addMapper
import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
/**
* Add mapper(the first mapper) that reads input from the input
* context and writes to queue
*/
@SuppressWarnings("unchecked")
void addMapper(TaskInputOutputContext inputContext,
ChainBlockingQueue<KeyValuePair<?, ?>> output, int index)
throws IOException, InterruptedException {
Configuration conf = getConf(index);
Class<?> keyOutClass = conf.getClass(MAPPER_OUTPUT_KEY_CLASS, Object.class);
Class<?> valueOutClass = conf.getClass(MAPPER_OUTPUT_VALUE_CLASS,
Object.class);
RecordReader rr = new ChainRecordReader(inputContext);
RecordWriter rw = new ChainRecordWriter(keyOutClass, valueOutClass, output,
conf);
Mapper.Context mapperContext = createMapContext(rr, rw,
(MapContext) inputContext, getConf(index));
MapRunner runner = new MapRunner(mappers.get(index), mapperContext, rr, rw);
threads.add(runner);
}
示例12: createRecordReader
import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
@Override
public RecordReader<LongWritable, LongWritable> createRecordReader(InputSplit split,
TaskAttemptContext context)
throws IOException, InterruptedException {
int taskId = context.getTaskAttemptID().getTaskID().getId();
int numMapTasks = context.getConfiguration().getInt(NUM_MAPS_KEY, NUM_MAPS);
int numIterations = context.getConfiguration().getInt(NUM_IMPORT_ROUNDS_KEY, NUM_IMPORT_ROUNDS);
int iteration = context.getConfiguration().getInt(ROUND_NUM_KEY, 0);
taskId = taskId + iteration * numMapTasks;
numMapTasks = numMapTasks * numIterations;
long chainId = Math.abs(new Random().nextLong());
chainId = chainId - (chainId % numMapTasks) + taskId; // ensure that chainId is unique per task and across iterations
LongWritable[] keys = new LongWritable[] {new LongWritable(chainId)};
return new FixedRecordReader<LongWritable, LongWritable>(keys, keys);
}
示例13: initReader
import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
@SuppressWarnings({"rawtypes", "unchecked"})
public void initReader() throws IOException {
try {
Configuration conf = WorkerContext.get().getConf();
String inputFormatClassName =
conf.get(AngelConf.ANGEL_INPUTFORMAT_CLASS,
AngelConf.DEFAULT_ANGEL_INPUTFORMAT_CLASS);
Class<? extends org.apache.hadoop.mapreduce.InputFormat> inputFormatClass =
(Class<? extends org.apache.hadoop.mapreduce.InputFormat>) Class
.forName(inputFormatClassName);
org.apache.hadoop.mapreduce.InputFormat inputFormat =
ReflectionUtils.newInstance(inputFormatClass,
new JobConf(conf));
MRTaskContext taskContext = new MRTaskContext(conf);
org.apache.hadoop.mapreduce.RecordReader<KEY, VALUE> recordReader =
inputFormat.createRecordReader(split, taskContext);
recordReader.initialize(split, taskContext);
setReader(new DFSReaderNewAPI(recordReader));
} catch (Exception x) {
LOG.error("init reader error ", x);
throw new IOException(x);
}
}
示例14: createRecordReader
import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
@Override
public RecordReader<LongWritable, Text> createRecordReader(
InputSplit inputSplit, TaskAttemptContext context) {
try {
return new XMLRecordReader(inputSplit, context.getConfiguration());
} catch (IOException e) {
return null;
}
}
示例15: getSplits
import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
@Test
public void getSplits() throws Exception {
S3MapReduceCpOptions options = getOptions();
Configuration configuration = new Configuration();
configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(
new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testDynInputFormat/fileList.seq"), options);
JobContext jobContext = new JobContextImpl(configuration, new JobID());
DynamicInputFormat<Text, CopyListingFileStatus> inputFormat = new DynamicInputFormat<>();
List<InputSplit> splits = inputFormat.getSplits(jobContext);
int nFiles = 0;
int taskId = 0;
for (InputSplit split : splits) {
RecordReader<Text, CopyListingFileStatus> recordReader = inputFormat.createRecordReader(split, null);
StubContext stubContext = new StubContext(jobContext.getConfiguration(), recordReader, taskId);
final TaskAttemptContext taskAttemptContext = stubContext.getContext();
recordReader.initialize(splits.get(0), taskAttemptContext);
float previousProgressValue = 0f;
while (recordReader.nextKeyValue()) {
CopyListingFileStatus fileStatus = recordReader.getCurrentValue();
String source = fileStatus.getPath().toString();
assertTrue(expectedFilePaths.contains(source));
final float progress = recordReader.getProgress();
assertTrue(progress >= previousProgressValue);
assertTrue(progress >= 0.0f);
assertTrue(progress <= 1.0f);
previousProgressValue = progress;
++nFiles;
}
assertTrue(recordReader.getProgress() == 1.0f);
++taskId;
}
Assert.assertEquals(expectedFilePaths.size(), nFiles);
}