当前位置: 首页>>代码示例>>Java>>正文


Java RecordReader类代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.RecordReader的典型用法代码示例。如果您正苦于以下问题:Java RecordReader类的具体用法?Java RecordReader怎么用?Java RecordReader使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


RecordReader类属于org.apache.hadoop.mapreduce包,在下文中一共展示了RecordReader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: createDBRecordReader

import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
@Override
/** {@inheritDoc} */
protected RecordReader<LongWritable, T> createDBRecordReader(
    DBInputSplit split, Configuration conf) throws IOException {

  DBConfiguration dbConf = getDBConf();
  Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
  String dbProductName = getDBProductName();
  LOG.debug("Creating db record reader for db product: " + dbProductName);

  try {
    return new SQLServerDBRecordReader<T>(split, inputClass,
        conf, getConnection(), dbConf, dbConf.getInputConditions(),
        dbConf.getInputFieldNames(), dbConf.getInputTableName(),
        dbProductName);
  } catch (SQLException ex) {
    throw new IOException(ex);
  }
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:20,代码来源:SQLServerDBInputFormat.java

示例2: createDBRecordReader

import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
@Override
protected RecordReader<LongWritable, T> createDBRecordReader(
    DBInputSplit split, Configuration conf) throws IOException {

  DBConfiguration dbConf = getDBConf();
  @SuppressWarnings("unchecked")
  Class<T> inputClass = (Class<T>) (dbConf.getInputClass());

  try {
    // Use DB2-specific db reader
    return new Db2DataDrivenDBRecordReader<T>(split, inputClass,
        conf, getConnection(), dbConf, dbConf.getInputConditions(),
        dbConf.getInputFieldNames(), dbConf.getInputTableName());
  } catch (SQLException ex) {
    throw new IOException(ex);
  }
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:18,代码来源:Db2DataDrivenDBInputFormat.java

示例3: createDBRecordReader

import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
protected RecordReader<LongWritable, T> createDBRecordReader(
    DBInputSplit split, Configuration conf) throws IOException {

  DBConfiguration dbConf = getDBConf();
  @SuppressWarnings("unchecked")
  Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
  String dbProductName = getDBProductName();

  LOG.debug("Creating db record reader for db product: " + dbProductName);

  try {
    return new DataDrivenDBRecordReader<T>(split, inputClass,
        conf, getConnection(), dbConf, dbConf.getInputConditions(),
        dbConf.getInputFieldNames(), dbConf.getInputTableName(),
        dbProductName);
  } catch (SQLException ex) {
    throw new IOException(ex);
  }
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:20,代码来源:DataDrivenDBInputFormat.java

示例4: createDBRecordReader

import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
@Override
protected RecordReader<LongWritable, T> createDBRecordReader(
    DBInputSplit split, Configuration conf) throws IOException {

  DBConfiguration dbConf = getDBConf();
  @SuppressWarnings("unchecked")
  Class<T> inputClass = (Class<T>) (dbConf.getInputClass());

  try {
    // Use Oracle-specific db reader
    return new OracleDataDrivenDBRecordReader<T>(split, inputClass,
        conf, getConnection(), dbConf, dbConf.getInputConditions(),
        dbConf.getInputFieldNames(), dbConf.getInputTableName());
  } catch (SQLException ex) {
    throw new IOException(ex);
  }
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:18,代码来源:OracleDataDrivenDBInputFormat.java

示例5: createChildReader

import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
/**
 * Actually instantiate the user's chosen RecordReader implementation.
 */
@SuppressWarnings("unchecked")
private void createChildReader() throws IOException, InterruptedException {
  LOG.debug("ChildSplit operates on: " + split.getPath(index));

  Configuration conf = context.getConfiguration();

  // Determine the file format we're reading.
  Class rrClass;
  if (ExportJobBase.isSequenceFiles(conf, split.getPath(index))) {
    rrClass = SequenceFileRecordReader.class;
  } else {
    rrClass = LineRecordReader.class;
  }

  // Create the appropriate record reader.
  this.rr = (RecordReader<LongWritable, Object>)
      ReflectionUtils.newInstance(rrClass, conf);
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:22,代码来源:CombineShimRecordReader.java

示例6: createDBRecordReader

import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
/** {@inheritDoc} */
@Override
protected RecordReader<LongWritable, T> createDBRecordReader(
    DBInputSplit split, Configuration conf) throws IOException {

  DBConfiguration dbConf = getDBConf();
  @SuppressWarnings("unchecked")
  Class<T> inputClass = (Class<T>) (dbConf.getInputClass());

  try {
    // Use Microsoft SQL Server specific db reader
    return new SqlServerRecordReader<T>(split, inputClass,
        conf, getConnection(), dbConf, dbConf.getInputConditions(),
        dbConf.getInputFieldNames(), dbConf.getInputTableName());
  } catch (SQLException ex) {
    throw new IOException(ex);
  }
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:19,代码来源:SqlServerInputFormat.java

示例7: readSplit

import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
private static List<Text> readSplit(KeyValueTextInputFormat format, 
    InputSplit split, Job job) throws IOException, InterruptedException {
  List<Text> result = new ArrayList<Text>();
  Configuration conf = job.getConfiguration();
  TaskAttemptContext context = MapReduceTestUtil.
    createDummyMapTaskAttemptContext(conf);
  RecordReader<Text, Text> reader = format.createRecordReader(split, 
    MapReduceTestUtil.createDummyMapTaskAttemptContext(conf));
  MapContext<Text, Text, Text, Text> mcontext = 
    new MapContextImpl<Text, Text, Text, Text>(conf, 
    context.getTaskAttemptID(), reader, null, null,
    MapReduceTestUtil.createDummyReporter(), 
    split);
  reader.initialize(split, mcontext);
  while (reader.nextKeyValue()) {
    result.add(new Text(reader.getCurrentValue()));
  }
  reader.close();
  return result;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:21,代码来源:TestMRKeyValueTextInputFormat.java

示例8: testReinit

import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
@Test
public void testReinit() throws Exception {
  // Test that a split containing multiple files works correctly,
  // with the child RecordReader getting its initialize() method
  // called a second time.
  TaskAttemptID taskId = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
  Configuration conf = new Configuration();
  TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskId);

  // This will create a CombineFileRecordReader that itself contains a
  // DummyRecordReader.
  InputFormat inputFormat = new ChildRRInputFormat();

  Path [] files = { new Path("file1"), new Path("file2") };
  long [] lengths = { 1, 1 };

  CombineFileSplit split = new CombineFileSplit(files, lengths);
  RecordReader rr = inputFormat.createRecordReader(split, context);
  assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);

  // first initialize() call comes from MapTask. We'll do it here.
  rr.initialize(split, context);

  // First value is first filename.
  assertTrue(rr.nextKeyValue());
  assertEquals("file1", rr.getCurrentValue().toString());

  // The inner RR will return false, because it only emits one (k, v) pair.
  // But there's another sub-split to process. This returns true to us.
  assertTrue(rr.nextKeyValue());
  
  // And the 2nd rr will have its initialize method called correctly.
  assertEquals("file2", rr.getCurrentValue().toString());
  
  // But after both child RR's have returned their singleton (k, v), this
  // should also return false.
  assertFalse(rr.nextKeyValue());
}
 
开发者ID:naver,项目名称:hadoop,代码行数:39,代码来源:TestCombineFileInputFormat.java

示例9: readSplit

import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
private static List<Text> readSplit(InputFormat<LongWritable,Text> format,
  InputSplit split, Job job) throws IOException, InterruptedException {
  List<Text> result = new ArrayList<Text>();
  Configuration conf = job.getConfiguration();
  TaskAttemptContext context = MapReduceTestUtil.
    createDummyMapTaskAttemptContext(conf);
  RecordReader<LongWritable, Text> reader = format.createRecordReader(split,
    MapReduceTestUtil.createDummyMapTaskAttemptContext(conf));
  MapContext<LongWritable,Text,LongWritable,Text> mcontext =
    new MapContextImpl<LongWritable,Text,LongWritable,Text>(conf,
    context.getTaskAttemptID(), reader, null, null,
    MapReduceTestUtil.createDummyReporter(),
    split);
  reader.initialize(split, mcontext);
  while (reader.nextKeyValue()) {
    result.add(new Text(reader.getCurrentValue()));
  }
  return result;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:20,代码来源:TestCombineTextInputFormat.java

示例10: createDBRecordReader

import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
@Override
protected RecordReader<LongWritable, T> createDBRecordReader(DBInputSplit split,
    Configuration conf) throws IOException {

  DBConfiguration dbConf = getDBConf();
  @SuppressWarnings("unchecked")
  Class<T> inputClass = (Class<T>) (dbConf.getInputClass());

  try {
    // Use Oracle-specific db reader
    return new OracleDataDrivenDBRecordReader<T>(split, inputClass,
        conf, createConnection(), dbConf, dbConf.getInputConditions(),
        dbConf.getInputFieldNames(), dbConf.getInputTableName());
  } catch (SQLException ex) {
    throw new IOException(ex.getMessage());
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:18,代码来源:OracleDataDrivenDBInputFormat.java

示例11: addMapper

import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
/**
 * Add mapper(the first mapper) that reads input from the input
 * context and writes to queue
 */
@SuppressWarnings("unchecked")
void addMapper(TaskInputOutputContext inputContext,
    ChainBlockingQueue<KeyValuePair<?, ?>> output, int index)
    throws IOException, InterruptedException {
  Configuration conf = getConf(index);
  Class<?> keyOutClass = conf.getClass(MAPPER_OUTPUT_KEY_CLASS, Object.class);
  Class<?> valueOutClass = conf.getClass(MAPPER_OUTPUT_VALUE_CLASS,
      Object.class);

  RecordReader rr = new ChainRecordReader(inputContext);
  RecordWriter rw = new ChainRecordWriter(keyOutClass, valueOutClass, output,
      conf);
  Mapper.Context mapperContext = createMapContext(rr, rw,
      (MapContext) inputContext, getConf(index));
  MapRunner runner = new MapRunner(mappers.get(index), mapperContext, rr, rw);
  threads.add(runner);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:22,代码来源:Chain.java

示例12: createRecordReader

import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
@Override
public RecordReader<LongWritable, LongWritable> createRecordReader(InputSplit split,
  TaskAttemptContext context)
      throws IOException, InterruptedException {
  int taskId = context.getTaskAttemptID().getTaskID().getId();
  int numMapTasks = context.getConfiguration().getInt(NUM_MAPS_KEY, NUM_MAPS);
  int numIterations = context.getConfiguration().getInt(NUM_IMPORT_ROUNDS_KEY, NUM_IMPORT_ROUNDS);
  int iteration = context.getConfiguration().getInt(ROUND_NUM_KEY, 0);

  taskId = taskId + iteration * numMapTasks;
  numMapTasks = numMapTasks * numIterations;

  long chainId = Math.abs(new Random().nextLong());
  chainId = chainId - (chainId % numMapTasks) + taskId; // ensure that chainId is unique per task and across iterations
  LongWritable[] keys = new LongWritable[] {new LongWritable(chainId)};

  return new FixedRecordReader<LongWritable, LongWritable>(keys, keys);
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:19,代码来源:IntegrationTestBulkLoad.java

示例13: initReader

import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
@SuppressWarnings({"rawtypes", "unchecked"})
public void initReader() throws IOException {
  try {
    Configuration conf = WorkerContext.get().getConf();
    String inputFormatClassName =
        conf.get(AngelConf.ANGEL_INPUTFORMAT_CLASS,
            AngelConf.DEFAULT_ANGEL_INPUTFORMAT_CLASS);

    Class<? extends org.apache.hadoop.mapreduce.InputFormat> inputFormatClass =
        (Class<? extends org.apache.hadoop.mapreduce.InputFormat>) Class
            .forName(inputFormatClassName);

    org.apache.hadoop.mapreduce.InputFormat inputFormat =
        ReflectionUtils.newInstance(inputFormatClass,
            new JobConf(conf));

    MRTaskContext taskContext = new MRTaskContext(conf);
    org.apache.hadoop.mapreduce.RecordReader<KEY, VALUE> recordReader =
        inputFormat.createRecordReader(split, taskContext);

    recordReader.initialize(split, taskContext);
    setReader(new DFSReaderNewAPI(recordReader));
  } catch (Exception x) {
    LOG.error("init reader error ", x);
    throw new IOException(x);
  }
}
 
开发者ID:Tencent,项目名称:angel,代码行数:28,代码来源:DFSStorageNewAPI.java

示例14: createRecordReader

import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
@Override
public RecordReader<LongWritable, Text> createRecordReader(
        InputSplit inputSplit, TaskAttemptContext context) {
    try {
        return new XMLRecordReader(inputSplit, context.getConfiguration());
    } catch (IOException e) {
        return null;
    }
}
 
开发者ID:lzmhhh123,项目名称:Wikipedia-Index,代码行数:10,代码来源:XmlInputFormat.java

示例15: getSplits

import org.apache.hadoop.mapreduce.RecordReader; //导入依赖的package包/类
@Test
public void getSplits() throws Exception {
  S3MapReduceCpOptions options = getOptions();
  Configuration configuration = new Configuration();
  configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
  CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(
      new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testDynInputFormat/fileList.seq"), options);

  JobContext jobContext = new JobContextImpl(configuration, new JobID());
  DynamicInputFormat<Text, CopyListingFileStatus> inputFormat = new DynamicInputFormat<>();
  List<InputSplit> splits = inputFormat.getSplits(jobContext);

  int nFiles = 0;
  int taskId = 0;

  for (InputSplit split : splits) {
    RecordReader<Text, CopyListingFileStatus> recordReader = inputFormat.createRecordReader(split, null);
    StubContext stubContext = new StubContext(jobContext.getConfiguration(), recordReader, taskId);
    final TaskAttemptContext taskAttemptContext = stubContext.getContext();

    recordReader.initialize(splits.get(0), taskAttemptContext);
    float previousProgressValue = 0f;
    while (recordReader.nextKeyValue()) {
      CopyListingFileStatus fileStatus = recordReader.getCurrentValue();
      String source = fileStatus.getPath().toString();
      assertTrue(expectedFilePaths.contains(source));
      final float progress = recordReader.getProgress();
      assertTrue(progress >= previousProgressValue);
      assertTrue(progress >= 0.0f);
      assertTrue(progress <= 1.0f);
      previousProgressValue = progress;
      ++nFiles;
    }
    assertTrue(recordReader.getProgress() == 1.0f);

    ++taskId;
  }

  Assert.assertEquals(expectedFilePaths.size(), nFiles);
}
 
开发者ID:HotelsDotCom,项目名称:circus-train,代码行数:41,代码来源:DynamicInputFormatTest.java


注:本文中的org.apache.hadoop.mapreduce.RecordReader类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。