当前位置: 首页>>代码示例>>Java>>正文


Java RecordReader类代码示例

本文整理汇总了Java中org.apache.hadoop.mapred.RecordReader的典型用法代码示例。如果您正苦于以下问题:Java RecordReader类的具体用法?Java RecordReader怎么用?Java RecordReader使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


RecordReader类属于org.apache.hadoop.mapred包,在下文中一共展示了RecordReader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getRecordReader

import org.apache.hadoop.mapred.RecordReader; //导入依赖的package包/类
@Override
public RecordReader<NullWritable,ColumnAndIndex> getRecordReader( final InputSplit split, final JobConf job, final Reporter reporter ) throws IOException {
  FileSplit fileSplit = (FileSplit)split;
  Path path = fileSplit.getPath();
  FileSystem fs = path.getFileSystem( job );
  long fileLength = fs.getLength( path );
  long start = fileSplit.getStart();
  long length = fileSplit.getLength();
  InputStream in = fs.open( path );
  IJobReporter jobReporter = new HadoopJobReporter( reporter );
  jobReporter.setStatus( String.format( "Read file : %s" , path.toString() ) );
  HiveReaderSetting hiveConfig = new HiveReaderSetting( fileSplit , job );
  if ( hiveConfig.isVectorMode() ){
    IVectorizedReaderSetting vectorizedSetting = new HiveVectorizedReaderSetting( fileSplit , job , hiveConfig );
    return (RecordReader)new MDSHiveDirectVectorizedReader( in , fileLength , start , length , vectorizedSetting , jobReporter );
  }
  else{
    return new MDSHiveLineReader( in , fileLength , start , length , hiveConfig , jobReporter , spreadCounter );
  }
}
 
开发者ID:yahoojapan,项目名称:multiple-dimension-spread,代码行数:21,代码来源:MDSHiveLineInputFormat.java

示例2: initReader

import org.apache.hadoop.mapred.RecordReader; //导入依赖的package包/类
@SuppressWarnings({"rawtypes", "unchecked"})
public void initReader() throws IOException {
  try {
    Configuration conf = WorkerContext.get().getConf();
    String inputFormatClassName =
        conf.get(AngelConf.ANGEL_INPUTFORMAT_CLASS,
            AngelConf.DEFAULT_ANGEL_INPUTFORMAT_CLASS);

    Class<? extends org.apache.hadoop.mapred.InputFormat> inputFormatClass =
        (Class<? extends org.apache.hadoop.mapred.InputFormat>) Class
            .forName(inputFormatClassName);

    org.apache.hadoop.mapred.InputFormat inputFormat =
        ReflectionUtils.newInstance(inputFormatClass,
            new JobConf(conf));

    org.apache.hadoop.mapred.RecordReader<KEY, VALUE> recordReader =
        inputFormat.getRecordReader(split, new JobConf(conf), Reporter.NULL);

    setReader(new DFSReaderOldAPI(recordReader));
  } catch (Exception x) {
    LOG.error("init reader error ", x);
    throw new IOException(x);
  }
}
 
开发者ID:Tencent,项目名称:angel,代码行数:26,代码来源:DFSStorageOldAPI.java

示例3: getSample

import org.apache.hadoop.mapred.RecordReader; //导入依赖的package包/类
/**
 * From each split sampled, take the first numSamples / numSplits records.
 */
@SuppressWarnings("unchecked") // ArrayList::toArray doesn't preserve type
public K[] getSample(InputFormat<K,V> inf, JobConf job) throws IOException {
  InputSplit[] splits = inf.getSplits(job, job.getNumMapTasks());
  ArrayList<K> samples = new ArrayList<K>(numSamples);
  int splitsToSample = Math.min(maxSplitsSampled, splits.length);
  int splitStep = splits.length / splitsToSample;
  int samplesPerSplit = numSamples / splitsToSample;
  long records = 0;
  for (int i = 0; i < splitsToSample; ++i) {
    RecordReader<K,V> reader = inf.getRecordReader(splits[i * splitStep],
        job, Reporter.NULL);
    K key = reader.createKey();
    V value = reader.createValue();
    while (reader.next(key, value)) {
      samples.add(key);
      key = reader.createKey();
      ++records;
      if ((i+1) * samplesPerSplit <= records) {
        break;
      }
    }
    reader.close();
  }
  return (K[])samples.toArray();
}
 
开发者ID:naver,项目名称:hadoop,代码行数:29,代码来源:InputSampler.java

示例4: testDBInputFormat

import org.apache.hadoop.mapred.RecordReader; //导入依赖的package包/类
/**
 * test DBInputFormat class. Class should split result for chunks
 * @throws Exception
 */
@Test(timeout = 10000)
public void testDBInputFormat() throws Exception {
  JobConf configuration = new JobConf();
  setupDriver(configuration);
  
  DBInputFormat<NullDBWritable> format = new DBInputFormat<NullDBWritable>();
  format.setConf(configuration);
  format.setConf(configuration);
  DBInputFormat.DBInputSplit splitter = new DBInputFormat.DBInputSplit(1, 10);
  Reporter reporter = mock(Reporter.class);
  RecordReader<LongWritable, NullDBWritable> reader = format.getRecordReader(
      splitter, configuration, reporter);

  configuration.setInt(MRJobConfig.NUM_MAPS, 3);
  InputSplit[] lSplits = format.getSplits(configuration, 3);
  assertEquals(5, lSplits[0].getLength());
  assertEquals(3, lSplits.length);

  // test reader .Some simple tests
  assertEquals(LongWritable.class, reader.createKey().getClass());
  assertEquals(0, reader.getPos());
  assertEquals(0, reader.getProgress(), 0.001);
  reader.close();
}
 
开发者ID:naver,项目名称:hadoop,代码行数:29,代码来源:TestDBInputFormat.java

示例5: getRecordReader

import org.apache.hadoop.mapred.RecordReader; //导入依赖的package包/类
public RecordReader getRecordReader(InputSplit split, JobConf job,
  Reporter reporter) throws IOException {
  FileSplit fileSplit = (FileSplit) split;
  FileSystem fs = FileSystem.get(fileSplit.getPath().toUri(), job);
  FSDataInputStream is = fs.open(fileSplit.getPath());
  byte[] header = new byte[3];
  RecordReader reader = null;
  try {
    is.readFully(header);
  } catch (EOFException eof) {
    reader = textInputFormat.getRecordReader(split, job, reporter);
  } finally {
    is.close();
  }
  if (header[0] == 'S' && header[1] == 'E' && header[2] == 'Q') {
    reader = seqFileInputFormat.getRecordReader(split, job, reporter);
  } else {
    reader = textInputFormat.getRecordReader(split, job, reporter);
  }
  return reader;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:22,代码来源:AutoInputFormat.java

示例6: readEthereumBlockInputFormatBlock1346406Bzip2Compressed

import org.apache.hadoop.mapred.RecordReader; //导入依赖的package包/类
@Test
 public void readEthereumBlockInputFormatBlock1346406Bzip2Compressed() throws IOException, EthereumBlockReadException, ParseException, InterruptedException {
	JobConf job = new JobConf(defaultConf);
ClassLoader classLoader = getClass().getClassLoader();
String fileName="eth1346406.bin.bz2";
String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();	
Path file = new Path(fileNameBlock);
   FileInputFormat.setInputPaths(job, file);
   EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat();
   format.configure(job);
   InputSplit[] inputSplits = format.getSplits(job,1);
 
   assertEquals( 1, inputSplits.length,"Only one split generated for genesis block");
   	RecordReader<BytesWritable, EthereumBlock> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull( reader,"Format returned  null RecordReader");
	
BytesWritable key = new BytesWritable();	
EthereumBlock block = new EthereumBlock();
assertTrue( reader.next(key,block),"Input Split for block 1346406 contains at least one block");
	
assertEquals( 6, block.getEthereumTransactions().size(),"Block 1346406 must have 6 transactions");
   	assertFalse( reader.next(key,block),"No further blocks in block 1346406");
   	reader.close();
}
 
开发者ID:ZuInnoTe,项目名称:hadoopcryptoledger,代码行数:25,代码来源:EthereumFormatHadoopTest.java

示例7: readExcelInputFormatExcel2013SingleSheetEncryptedNegativeLowFootprint

import org.apache.hadoop.mapred.RecordReader; //导入依赖的package包/类
@Test
   public void readExcelInputFormatExcel2013SingleSheetEncryptedNegativeLowFootprint() throws IOException {
   	JobConf job = new JobConf(defaultConf);
   	ClassLoader classLoader = getClass().getClassLoader();
   	String fileName="excel2013encrypt.xlsx";
   	String fileNameSpreadSheet=classLoader.getResource(fileName).getFile();	
   	Path file = new Path(fileNameSpreadSheet);
   	FileInputFormat.setInputPaths(job, file);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
// low footprint
job.set("hadoopoffice.read.lowFootprint", "true");
// for decryption simply set the password
job.set("hadoopoffice.read.security.crypt.password","test2");
  	ExcelFileInputFormat format = new ExcelFileInputFormat();
   	format.configure(job);
   	InputSplit[] inputSplits = format.getSplits(job,1);
   	assertEquals(1,inputSplits.length,"Only one split generated for Excel file");
   	RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter);	
   	assertNull(reader,"Null record reader implies invalid password");
   }
 
开发者ID:ZuInnoTe,项目名称:hadoopoffice,代码行数:22,代码来源:OfficeFormatHadoopExcelTest.java

示例8: initNextReader

import org.apache.hadoop.mapred.RecordReader; //导入依赖的package包/类
/**
 * Initializes next reader if available, will close previous reader if any.
 *
 * @param job map / reduce job configuration.
 * @return true if new reader was initialized, false is no more readers are available
 * @throws ExecutionSetupException if could not init record reader
 */
protected boolean initNextReader(JobConf job) throws ExecutionSetupException {
  if (inputSplitsIterator.hasNext()) {
    if (reader != null) {
      closeReader();
    }
    InputSplit inputSplit = inputSplitsIterator.next();
    try {
      reader = (org.apache.hadoop.mapred.RecordReader<Object, Object>) job.getInputFormat().getRecordReader(inputSplit, job, Reporter.NULL);
      logger.trace("hive reader created: {} for inputSplit {}", reader.getClass().getName(), inputSplit.toString());
    } catch (Exception e) {
      throw new ExecutionSetupException("Failed to get o.a.hadoop.mapred.RecordReader from Hive InputFormat", e);
    }
    return true;
  }
  return false;
}
 
开发者ID:axbaretto,项目名称:drill,代码行数:24,代码来源:HiveAbstractReader.java

示例9: getRecordReader

import org.apache.hadoop.mapred.RecordReader; //导入依赖的package包/类
static RecordReader<NullWritable, DynamoDBItemWritable> getRecordReader(
    InputSplit inputSplit, JobConf job, Reporter reporter) throws IOException {
  // CombineFileSplit indicates the new export format which includes a manifest file
  if (inputSplit instanceof CombineFileSplit) {
    int version = job.getInt(DynamoDBConstants.EXPORT_FORMAT_VERSION, -1);
    if (version != ExportManifestRecordWriter.FORMAT_VERSION) {
      throw new IOException("Unknown version: " + job.get(DynamoDBConstants
          .EXPORT_FORMAT_VERSION));
    }
    return new ImportCombineFileRecordReader((CombineFileSplit) inputSplit, job, reporter);
  } else if (inputSplit instanceof FileSplit) {
    // FileSplit indicates the old data pipeline format which doesn't include a manifest file
    Path path = ((FileSplit) inputSplit).getPath();
    return new ImportRecordReader(job, path);
  } else {
    throw new IOException("Expecting CombineFileSplit or FileSplit but the input split type is:"
        + " " + inputSplit.getClass());
  }
}
 
开发者ID:awslabs,项目名称:emr-dynamodb-connector,代码行数:20,代码来源:ImportRecordReaderFactory.java

示例10: readEthereumBlockInputFormatBlock1346406

import org.apache.hadoop.mapred.RecordReader; //导入依赖的package包/类
@Test
 public void readEthereumBlockInputFormatBlock1346406() throws IOException, EthereumBlockReadException, ParseException, InterruptedException {
	JobConf job = new JobConf(defaultConf);
				ClassLoader classLoader = getClass().getClassLoader();
String fileName="eth1346406.bin";
String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();	
Path file = new Path(fileNameBlock);
   FileInputFormat.setInputPaths(job, file);
   EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat();
   format.configure(job);
   InputSplit[] inputSplits = format.getSplits(job,1);
 
   assertEquals( 1, inputSplits.length,"Only one split generated for genesis block");
   	RecordReader<BytesWritable, EthereumBlock> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull( reader,"Format returned  null RecordReader");
		
BytesWritable key = new BytesWritable();	
EthereumBlock block = new EthereumBlock();
assertTrue( reader.next(key,block),"Input Split for block 1346406 contains at least one block");
assertEquals( 6, block.getEthereumTransactions().size(),"Block 1346406 must have 6 transactions");
   	assertFalse( reader.next(key,block),"No further blocks in block 1346406");
   	reader.close();
}
 
开发者ID:ZuInnoTe,项目名称:hadoopcryptoledger,代码行数:24,代码来源:EthereumFormatHadoopTest.java

示例11: getSample

import org.apache.hadoop.mapred.RecordReader; //导入依赖的package包/类
/**
 * For each split sampled, emit when the ratio of the number of records
 * retained to the total record count is less than the specified
 * frequency.
 */
@SuppressWarnings("unchecked") // ArrayList::toArray doesn't preserve type
public K[] getSample(InputFormat<K,V> inf, JobConf job) throws IOException {
  InputSplit[] splits = inf.getSplits(job, job.getNumMapTasks());
  ArrayList<K> samples = new ArrayList<K>();
  int splitsToSample = Math.min(maxSplitsSampled, splits.length);
  int splitStep = splits.length / splitsToSample;
  long records = 0;
  long kept = 0;
  for (int i = 0; i < splitsToSample; ++i) {
    RecordReader<K,V> reader = inf.getRecordReader(splits[i * splitStep],
        job, Reporter.NULL);
    K key = reader.createKey();
    V value = reader.createValue();
    while (reader.next(key, value)) {
      ++records;
      if ((double) kept / records < freq) {
        ++kept;
        samples.add(key);
        key = reader.createKey();
      }
    }
    reader.close();
  }
  return (K[])samples.toArray();
}
 
开发者ID:yncxcw,项目名称:big-c,代码行数:31,代码来源:InputSampler.java

示例12: readExcelInputFormatExcel2003SingleSheetEncryptedNegativeLowFootprint

import org.apache.hadoop.mapred.RecordReader; //导入依赖的package包/类
@Test
   public void readExcelInputFormatExcel2003SingleSheetEncryptedNegativeLowFootprint() throws IOException {
   	JobConf job = new JobConf(defaultConf);
   	ClassLoader classLoader = getClass().getClassLoader();
   	String fileName="excel2003encrypt.xls";
   	String fileNameSpreadSheet=classLoader.getResource(fileName).getFile();	
   	Path file = new Path(fileNameSpreadSheet);
   	FileInputFormat.setInputPaths(job, file);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");

// low footprint
job.set("hadoopoffice.read.lowFootprint", "true");
// for decryption simply set the password
job.set("hadoopoffice.read.security.crypt.password","test2");
  	ExcelFileInputFormat format = new ExcelFileInputFormat();
   	format.configure(job);
   	InputSplit[] inputSplits = format.getSplits(job,1);
   	assertEquals(1,inputSplits.length,"Only one split generated for Excel file");
   	RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter);
   	assertNull(reader,"Null record reader implies invalid password");
   }
 
开发者ID:ZuInnoTe,项目名称:hadoopoffice,代码行数:23,代码来源:OfficeFormatHadoopExcelTest.java

示例13: readExcelInputFormatExcel2003Empty

import org.apache.hadoop.mapred.RecordReader; //导入依赖的package包/类
@Test
    public void readExcelInputFormatExcel2003Empty() throws IOException {
JobConf job = new JobConf(defaultConf);
    	ClassLoader classLoader = getClass().getClassLoader();
    	String fileName="excel2003empty.xls";
    	String fileNameSpreadSheet=classLoader.getResource(fileName).getFile();	
    	Path file = new Path(fileNameSpreadSheet);
    	FileInputFormat.setInputPaths(job, file);
	// set locale to the one of the test data
	job.set("hadoopoffice.locale.bcp47","de");
   	ExcelFileInputFormat format = new ExcelFileInputFormat();
    	format.configure(job);
    	InputSplit[] inputSplits = format.getSplits(job,1);
    	assertEquals(1, inputSplits.length,"Only one split generated for Excel file");
    	RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter);
	assertNotNull(reader,"Format returned  null RecordReader");
	Text spreadSheetKey = new Text();	
	ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
	assertTrue( reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 1");
	assertEquals(0,spreadSheetValue.get().length,"Input Split for Excel file contain row 1 and is empty");	
	assertFalse(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains no further row");	
    }
 
开发者ID:ZuInnoTe,项目名称:hadoopoffice,代码行数:23,代码来源:OfficeFormatHadoopExcelTest.java

示例14: readExcelInputFormatExcel2013Empty

import org.apache.hadoop.mapred.RecordReader; //导入依赖的package包/类
@Test
   public void readExcelInputFormatExcel2013Empty() throws IOException {
       JobConf job = new JobConf(defaultConf);
   	ClassLoader classLoader = getClass().getClassLoader();
   	String fileName="excel2013empty.xlsx";
   	String fileNameSpreadSheet=classLoader.getResource(fileName).getFile();	
   	Path file = new Path(fileNameSpreadSheet);
   	FileInputFormat.setInputPaths(job, file);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
  	ExcelFileInputFormat format = new ExcelFileInputFormat();
   	format.configure(job);
   	InputSplit[] inputSplits = format.getSplits(job,1);
   	assertEquals(1, inputSplits.length,"Only one split generated for Excel file");
   	RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull(reader,"Format returned  null RecordReader");
Text spreadSheetKey = new Text();	
ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
assertTrue( reader.next(spreadSheetKey,spreadSheetValue), "Input Split for Excel file contains row 1");
assertEquals(0,spreadSheetValue.get().length, "Input Split for Excel file contain row 1 and is empty");	
assertFalse(reader.next(spreadSheetKey,spreadSheetValue), "Input Split for Excel file contains no further row");		
   }
 
开发者ID:ZuInnoTe,项目名称:hadoopoffice,代码行数:23,代码来源:OfficeFormatHadoopExcelTest.java

示例15: readEthereumBlockInputFormatBlock3346406

import org.apache.hadoop.mapred.RecordReader; //导入依赖的package包/类
@Test
 public void readEthereumBlockInputFormatBlock3346406() throws IOException, EthereumBlockReadException, ParseException, InterruptedException {
JobConf job = new JobConf(defaultConf);
ClassLoader classLoader = getClass().getClassLoader();
String fileName="eth3346406.bin";
String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();	
Path file = new Path(fileNameBlock);
   FileInputFormat.setInputPaths(job, file);
   EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat();
   format.configure(job);
   InputSplit[] inputSplits = format.getSplits(job,1);
 
   assertEquals( 1, inputSplits.length,"Only one split generated for genesis block");
   	RecordReader<BytesWritable, EthereumBlock> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull( reader,"Format returned  null RecordReader");

BytesWritable key = new BytesWritable();	
EthereumBlock block = new EthereumBlock();
assertTrue( reader.next(key,block),"Input Split for block 3346406 contains at least one block");
assertEquals( 7, block.getEthereumTransactions().size(),"Block 3346406 must have 7 transactions");
   	assertFalse( reader.next(key,block),"No further blocks in block 3346406");
   	reader.close();
}
 
开发者ID:ZuInnoTe,项目名称:hadoopcryptoledger,代码行数:24,代码来源:EthereumFormatHadoopTest.java


注:本文中的org.apache.hadoop.mapred.RecordReader类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。