当前位置: 首页>>代码示例>>Java>>正文


Java RecordReader.createValue方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapred.RecordReader.createValue方法的典型用法代码示例。如果您正苦于以下问题:Java RecordReader.createValue方法的具体用法?Java RecordReader.createValue怎么用?Java RecordReader.createValue使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.mapred.RecordReader的用法示例。


在下文中一共展示了RecordReader.createValue方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getSample

import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
/**
 * From each split sampled, take the first numSamples / numSplits records.
 */
@SuppressWarnings("unchecked") // ArrayList::toArray doesn't preserve type
public K[] getSample(InputFormat<K,V> inf, JobConf job) throws IOException {
  InputSplit[] splits = inf.getSplits(job, job.getNumMapTasks());
  ArrayList<K> samples = new ArrayList<K>(numSamples);
  int splitsToSample = Math.min(maxSplitsSampled, splits.length);
  int splitStep = splits.length / splitsToSample;
  int samplesPerSplit = numSamples / splitsToSample;
  long records = 0;
  for (int i = 0; i < splitsToSample; ++i) {
    RecordReader<K,V> reader = inf.getRecordReader(splits[i * splitStep],
        job, Reporter.NULL);
    K key = reader.createKey();
    V value = reader.createValue();
    while (reader.next(key, value)) {
      samples.add(key);
      key = reader.createKey();
      ++records;
      if ((i+1) * samplesPerSplit <= records) {
        break;
      }
    }
    reader.close();
  }
  return (K[])samples.toArray();
}
 
开发者ID:naver,项目名称:hadoop,代码行数:29,代码来源:InputSampler.java

示例2: dumpTypedBytes

import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
/**
 * Dump given list of files to standard output as typed bytes.
 */
@SuppressWarnings("unchecked")
private int dumpTypedBytes(List<FileStatus> files) throws IOException {
  JobConf job = new JobConf(getConf()); 
  DataOutputStream dout = new DataOutputStream(System.out);
  AutoInputFormat autoInputFormat = new AutoInputFormat();
  for (FileStatus fileStatus : files) {
    FileSplit split = new FileSplit(fileStatus.getPath(), 0,
      fileStatus.getLen() * fileStatus.getBlockSize(),
      (String[]) null);
    RecordReader recReader = null;
    try {
      recReader = autoInputFormat.getRecordReader(split, job, Reporter.NULL);
      Object key = recReader.createKey();
      Object value = recReader.createValue();
      while (recReader.next(key, value)) {
        if (key instanceof Writable) {
          TypedBytesWritableOutput.get(dout).write((Writable) key);
        } else {
          TypedBytesOutput.get(dout).write(key);
        }
        if (value instanceof Writable) {
          TypedBytesWritableOutput.get(dout).write((Writable) value);
        } else {
          TypedBytesOutput.get(dout).write(value);
        }
      }
    } finally {
      if (recReader != null) {
        recReader.close();
      }
    }
  }
  dout.flush();
  return 0;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:39,代码来源:DumpTypedBytes.java

示例3: verifyWithMockedMapReduce

import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
private void verifyWithMockedMapReduce(JobConf job, int numRegions, int expectedNumSplits,
    byte[] startRow, byte[] stopRow) throws IOException, InterruptedException {
  TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
  InputSplit[] splits = tsif.getSplits(job, 0);

  Assert.assertEquals(expectedNumSplits, splits.length);

  HBaseTestingUtility.SeenRowTracker rowTracker =
    new HBaseTestingUtility.SeenRowTracker(startRow, stopRow);

  for (int i = 0; i < splits.length; i++) {
    // validate input split
    InputSplit split = splits[i];
    Assert.assertTrue(split instanceof TableSnapshotInputFormat.TableSnapshotRegionSplit);

    // validate record reader
    OutputCollector collector = mock(OutputCollector.class);
    Reporter reporter = mock(Reporter.class);
    RecordReader<ImmutableBytesWritable, Result> rr = tsif.getRecordReader(split, job, reporter);

    // validate we can read all the data back
    ImmutableBytesWritable key = rr.createKey();
    Result value = rr.createValue();
    while (rr.next(key, value)) {
      verifyRowFromMap(key, value);
      rowTracker.addRow(key.copyBytes());
    }

    rr.close();
  }

  // validate all rows are seen
  rowTracker.validate();
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:35,代码来源:TestTableSnapshotInputFormat.java

示例4: run

import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
/**
 * Run the map task.
 * @param input the set of inputs
 * @param output the object to collect the outputs of the map
 * @param reporter the object to update with status
 */
@SuppressWarnings("unchecked")
public void run(RecordReader<K1, V1> input, OutputCollector<K2, V2> output,
                Reporter reporter) throws IOException {
  Application<K1, V1, K2, V2> application = null;
  try {
    RecordReader<FloatWritable, NullWritable> fakeInput = 
      (!Submitter.getIsJavaRecordReader(job) && 
       !Submitter.getIsJavaMapper(job)) ? 
 (RecordReader<FloatWritable, NullWritable>) input : null;
    application = new Application<K1, V1, K2, V2>(job, fakeInput, output, 
                                                  reporter,
        (Class<? extends K2>) job.getOutputKeyClass(), 
        (Class<? extends V2>) job.getOutputValueClass());
  } catch (InterruptedException ie) {
    throw new RuntimeException("interrupted", ie);
  }
  DownwardProtocol<K1, V1> downlink = application.getDownlink();
  boolean isJavaInput = Submitter.getIsJavaRecordReader(job);
  downlink.runMap(reporter.getInputSplit(), 
                  job.getNumReduceTasks(), isJavaInput);
  boolean skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
  try {
    if (isJavaInput) {
      // allocate key & value instances that are re-used for all entries
      K1 key = input.createKey();
      V1 value = input.createValue();
      downlink.setInputTypes(key.getClass().getName(),
                             value.getClass().getName());
      
      while (input.next(key, value)) {
        // map pair to output
        downlink.mapItem(key, value);
        if(skipping) {
          //flush the streams on every record input if running in skip mode
          //so that we don't buffer other records surrounding a bad record.
          downlink.flush();
        }
      }
      downlink.endOfInput();
    }
    application.waitForFinish();
  } catch (Throwable t) {
    application.abort(t);
  } finally {
    application.cleanup();
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:54,代码来源:PipesMapRunner.java

示例5: run

import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
public void run(RecordReader<K1, V1> input, OutputCollector<K2, V2> output,
                Reporter reporter)
  throws IOException {
  try {
    // allocate key & value instances these objects will not be reused
    // because execution of Mapper.map is not serialized.
    K1 key = input.createKey();
    V1 value = input.createValue();

    while (input.next(key, value)) {

      executorService.execute(new MapperInvokeRunable(key, value, output,
                              reporter));

      checkForExceptionsFromProcessingThreads();

      // Allocate new key & value instances as mapper is running in parallel
      key = input.createKey();
      value = input.createValue();
    }

    if (LOG.isDebugEnabled()) {
      LOG.debug("Finished dispatching all Mappper.map calls, job "
                + job.getJobName());
    }

    // Graceful shutdown of the Threadpool, it will let all scheduled
    // Runnables to end.
    executorService.shutdown();

    try {

      // Now waiting for all Runnables to end.
      while (!executorService.awaitTermination(100, TimeUnit.MILLISECONDS)) {
        if (LOG.isDebugEnabled()) {
          LOG.debug("Awaiting all running Mappper.map calls to finish, job "
                    + job.getJobName());
        }

        // NOTE: while Mapper.map dispatching has concluded there are still
        // map calls in progress and exceptions would be thrown.
        checkForExceptionsFromProcessingThreads();

      }

      // NOTE: it could be that a map call has had an exception after the
      // call for awaitTermination() returing true. And edge case but it
      // could happen.
      checkForExceptionsFromProcessingThreads();

    } catch (IOException ioEx) {
      // Forcing a shutdown of all thread of the threadpool and rethrowing
      // the IOException
      executorService.shutdownNow();
      throw ioEx;
    } catch (InterruptedException iEx) {
      throw new RuntimeException(iEx);
    }

  } finally {
    mapper.close();
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:64,代码来源:MultithreadedMapRunner.java

示例6: internalInit

import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
public  void internalInit(Properties tableProperties, RecordReader<Object, Object> reader) {
  this.key = reader.createKey();
  this.value = reader.createValue();
}
 
开发者ID:dremio,项目名称:dremio-oss,代码行数:5,代码来源:HiveRecordReaders.java

示例7: testHiveInputFormat

import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
public void testHiveInputFormat() throws Exception {
  getConnection();
  Connection conn = startNetserverAndGetLocalNetConnection();
  
  Statement st = conn.createStatement();
  st.execute("create hdfsstore myhdfs namenode 'localhost' homedir '" + HDFS_DIR + "' batchtimeinterval 5000 milliseconds");
  st.execute("create table app.mytab1 (col1 int primary key, col2 varchar(100)) persistent hdfsstore (myhdfs) BUCKETS 1");

  PreparedStatement ps = conn.prepareStatement("insert into mytab1 values (?, ?)");
  int NUM_ENTRIES = 20;
  for(int i = 0; i < NUM_ENTRIES; i++) {
    ps.setInt(1, i);
    ps.setString(2, "Value-" + System.nanoTime());
    ps.execute();
  }
  //Wait for data to get to HDFS...
  String qname = HDFSStoreFactoryImpl.getEventQueueName("/APP/MYTAB1");
  st.execute("CALL SYS.WAIT_FOR_SENDER_QUEUE_FLUSH('" + qname + "', 1, 0)");
  
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  FileStatus[] list = fs.listStatus(new Path(HDFS_DIR + "/APP_MYTAB1/0/"));
  assertEquals(1, list.length);
  
  conf.set(RowInputFormat.INPUT_TABLE, "MYTAB1");
  conf.set(RowInputFormat.HOME_DIR, HDFS_DIR);
  
  JobConf job = new JobConf(conf);
  job.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);
  GFXDHiveInputFormat ipformat = new GFXDHiveInputFormat();
  InputSplit[] splits = ipformat.getSplits(job, 2);
  assertEquals(1, splits.length);
  GFXDHiveSplit split = (GFXDHiveSplit) splits[0];
  assertEquals(1, split.getPaths().length);
  assertEquals(list[0].getPath().toString(), split.getPath(0).toString());
  assertEquals(0, split.getOffset(0));
  assertEquals(list[0].getLen(), split.getLength(0));

  RecordReader<Key, Row> rr = ipformat.getRecordReader(split, job, null);
  assertTrue("Row record reader should be an instace of GFXDHiveRowRecordReader " +
  		"but it is an instance of " + rr.getClass(),  (rr instanceof GFXDHiveRowRecordReader));
  Key key = rr.createKey();
  Row value = rr.createValue();

  int count = 0;
  while (rr.next(key, value)) {
    assertEquals(count++, value.getRowAsResultSet().getInt("col1"));
  }
  
  assertEquals(20, count);
  
  TestUtil.shutDown();
}
 
开发者ID:gemxd,项目名称:gemfirexd-oss,代码行数:54,代码来源:GFXDHiveInputFormatTest.java

示例8: testEventInputFormat

import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
public void testEventInputFormat() throws Exception {
  getConnection();
  Connection conn = startNetserverAndGetLocalNetConnection();
  
  Statement st = conn.createStatement();
  st.execute("create hdfsstore myhdfs namenode 'localhost' homedir '" + HDFS_DIR + "' batchtimeinterval 5000 milliseconds");
  st.execute("create table app.mytab1 (col1 int primary key, col2 varchar(100)) persistent hdfsstore (myhdfs) BUCKETS 1");

  PreparedStatement ps = conn.prepareStatement("insert into mytab1 values (?, ?)");
  int NUM_ENTRIES = 20;
  for(int i = 0; i < NUM_ENTRIES; i++) {
    ps.setInt(1, i);
    ps.setString(2, "Value-" + System.nanoTime());
    ps.execute();
  }
  //Wait for data to get to HDFS...
  String qname = HDFSStoreFactoryImpl.getEventQueueName("/APP/MYTAB1");
  st.execute("CALL SYS.WAIT_FOR_SENDER_QUEUE_FLUSH('" + qname + "', 1, 0)");
  
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  FileStatus[] list = fs.listStatus(new Path(HDFS_DIR + "/APP_MYTAB1/0/"));
  assertEquals(1, list.length);
  
  conf.set(RowInputFormat.INPUT_TABLE, "MYTAB1");
  conf.set(RowInputFormat.HOME_DIR, HDFS_DIR);
  
  JobConf job = new JobConf(conf);
  job.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);
  RowInputFormat ipformat = new RowInputFormat();
  InputSplit[] splits = ipformat.getSplits(job, 2);
  assertEquals(1, splits.length);
  CombineFileSplit split = (CombineFileSplit) splits[0];
  assertEquals(1, split.getPaths().length);
  assertEquals(list[0].getPath().toString(), split.getPath(0).toString());
  assertEquals(0, split.getOffset(0));
  assertEquals(list[0].getLen(), split.getLength(0));

  RecordReader<Key, Row> rr = ipformat.getRecordReader(split, job, null);
  Key key = rr.createKey();
  Row value = rr.createValue();

  int count = 0;
  while (rr.next(key, value)) {
    assertEquals(count++, value.getRowAsResultSet().getInt("col1"));
  }
  
  assertEquals(20, count);
  
  TestUtil.shutDown();
}
 
开发者ID:gemxd,项目名称:gemfirexd-oss,代码行数:52,代码来源:EventInputFormatTest.java

示例9: testNoSecureHdfsCheck

import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
public void testNoSecureHdfsCheck() throws Exception {
  getConnection();
  Connection conn = startNetserverAndGetLocalNetConnection();
  
  
  Statement st = conn.createStatement();
  st.execute("create hdfsstore myhdfs namenode 'localhost' homedir '" + HDFS_DIR + "'  batchtimeinterval 5000 milliseconds");
  st.execute("create table app.mytab1 (col1 int primary key, col2 varchar(100)) persistent hdfsstore (myhdfs) BUCKETS 1");
  
  PreparedStatement ps = conn.prepareStatement("insert into mytab1 values (?, ?)");
  int NUM_ENTRIES = 20;
  for(int i = 0; i < NUM_ENTRIES; i++) {
    ps.setInt(1, i);
    ps.setString(2, "Value-" + System.nanoTime());
    ps.execute();
  }
  //Wait for data to get to HDFS...
  String qname = HDFSStoreFactoryImpl.getEventQueueName("/APP/MYTAB1");
  st.execute("CALL SYS.WAIT_FOR_SENDER_QUEUE_FLUSH('" + qname + "', 1, 0)");
  
  stopNetServer();
  FabricServiceManager.currentFabricServiceInstance().stop(new Properties());
  
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  FileStatus[] list = fs.listStatus(new Path(HDFS_DIR + "/APP_MYTAB1/0/"));
  assertEquals(1, list.length);
  
  conf.set(RowInputFormat.INPUT_TABLE, "MYTAB1");
  conf.set(RowInputFormat.HOME_DIR, HDFS_DIR);
  conf.set("hadoop.security.authentication", "kerberos");
  
  JobConf job = new JobConf(conf);
  job.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);
  RowInputFormat ipformat = new RowInputFormat();
  InputSplit[] splits = ipformat.getSplits(job, 2);
  assertEquals(1, splits.length);
  CombineFileSplit split = (CombineFileSplit) splits[0];
  assertEquals(1, split.getPaths().length);
  assertEquals(list[0].getPath().toString(), split.getPath(0).toString());
  assertEquals(0, split.getOffset(0));
  assertEquals(list[0].getLen(), split.getLength(0));
  
  RecordReader<Key, Row> rr = ipformat.getRecordReader(split, job, null);
  Key key = rr.createKey();
  Row value = rr.createValue();
  
  int count = 0;
  while (rr.next(key, value)) {
    assertEquals(count++, value.getRowAsResultSet().getInt("col1"));
  }
  
  assertEquals(20, count);
  
  TestUtil.shutDown();
}
 
开发者ID:gemxd,项目名称:gemfirexd-oss,代码行数:57,代码来源:EventInputFormatTest.java

示例10: doTestRowSerDe

import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
private void doTestRowSerDe(boolean concurrencyChecks) throws Exception {
  getConnection();
  Connection conn = startNetserverAndGetLocalNetConnection();
  final long statTS = System.currentTimeMillis();
  Statement st = conn.createStatement();
  st.execute("create hdfsstore myhdfs namenode 'localhost' homedir '" + HDFS_DIR + "' batchtimeinterval 5000 milliseconds");
  String concurrency = "persistent ENABLE CONCURRENCY CHECKS";
  st.execute("create table app.mytab1 (col1 int primary key, col2 varchar(100)) partition by primary key buckets 1 hdfsstore (myhdfs) "
      +(concurrencyChecks ? concurrency : ""));

  PreparedStatement ps = conn.prepareStatement("insert into mytab1 values (?, ?)");
  ps.setInt(1, 1);
  ps.setString(2, "Value-1");
  ps.execute();
  
  //Wait for data to get to HDFS...
  String qname = HDFSStoreFactoryImpl.getEventQueueName("/APP/MYTAB1");
  st.execute("CALL SYS.WAIT_FOR_SENDER_QUEUE_FLUSH('" + qname + "', 1, 0)");
  
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  FileStatus[] list = fs.listStatus(new Path(HDFS_DIR + "/APP_MYTAB1/0/"));
  assertEquals(1, list.length);
  
  conf.set(RowInputFormat.INPUT_TABLE, "MYTAB1");
  conf.set(RowInputFormat.HOME_DIR, HDFS_DIR);
  
  JobConf job = new JobConf(conf);
  job.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);
  RowInputFormat ipformat = new RowInputFormat();
  InputSplit[] splits = ipformat.getSplits(job, 2);
  assertEquals(1, splits.length);
  RecordReader<Key, Row> rr = ipformat.getRecordReader(splits[0], job, null);
  Key key = rr.createKey();
  Row value = rr.createValue();
  assertTrue(rr.next(key, value));
  assertEquals(1, value.getRowAsResultSet().getInt(1));
  assertEquals("Value-1", value.getRowAsResultSet().getString(2));
  assertTrue(value.getTimestamp() > statTS);
  assertFalse(value.getRowAsResultSet().next());
  
  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  DataOutputStream dos = new DataOutputStream(baos);
  value.write(dos);
  dos.close();
  
  byte[] buf = baos.toByteArray();
  DataInputStream dis = new DataInputStream(new ByteArrayInputStream(buf));
  Row row = new Row();
  row.readFields(dis);
  dis.close();
  
  assertEquals(1, row.getRowAsResultSet().getInt(1));
  assertEquals("Value-1", row.getRowAsResultSet().getString(2));
  assertFalse(value.getRowAsResultSet().next());
  
  TestUtil.shutDown();
}
 
开发者ID:gemxd,项目名称:gemfirexd-oss,代码行数:59,代码来源:EventInputFormatTest.java

示例11: deleteTest

import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
private void deleteTest( boolean writeOnly, boolean primaryKey, boolean isTransactional) throws Exception {
  //getConnection();
  Connection conn = null;
  if (isTransactional) {
    conn = getTxConnection();//startNetserverAndGetLocalNetConnection();
  } else {
    conn = getConnection();
  }
  
  Statement st = conn.createStatement();
  st.execute("create hdfsstore myhdfs namenode 'localhost' homedir '" + HDFS_DIR + "' batchtimeinterval 2000 milliseconds");
  String primaryKeyString = primaryKey ? "primary key" : "";
  st.execute("create table app.mytab1 (col1 int " + primaryKeyString + ", col2 varchar(100)) BUCKETS 1 persistent hdfsstore (myhdfs) " + (writeOnly? " WRITEONLY " : ""));
  

  PreparedStatement ps = conn.prepareStatement("insert into mytab1 values (?, ?)");
  for(int i = 0; i < 3; i++) {
    ps.setInt(1, i);
    ps.setString(2, "Value-" + System.nanoTime());
    ps.execute();
  }
  
  st.execute("delete from mytab1 where col1 = 1");
  //Wait for data to get to HDFS...
  String qname = HDFSStoreFactoryImpl.getEventQueueName("/APP/MYTAB1");
  st.execute("CALL SYS.WAIT_FOR_SENDER_QUEUE_FLUSH('" + qname + "', 1, 0)");
  
  TestUtil.shutDown();

  FileStatus[] list = null;
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  for (int i = 0; i < 20; i++) {
    list = fs.listStatus(new Path(HDFS_DIR + "/APP_MYTAB1/0/"));
    if (list.length == 1) {
      break;
    }
    Thread.sleep(500);
  }
  if (list.length != 1) {
    fail("unexpected files: " + java.util.Arrays.toString(list));
  }

  conf.set(RowInputFormat.INPUT_TABLE, "MYTAB1");
  conf.set(RowInputFormat.HOME_DIR, HDFS_DIR);
  
  JobConf job = new JobConf(conf);
  job.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);
  RowInputFormat ipformat = new RowInputFormat();
  InputSplit[] splits = ipformat.getSplits(job, 2);
  RecordReader<Key, Row> rr = ipformat.getRecordReader(splits[0], job, null);
  Key key = rr.createKey();
  Row value = rr.createValue();

  rr.next(key, value); 
  assertEquals(0, value.getRowAsResultSet().getInt("col1"));
  
  if (!writeOnly) {
    rr.next(key, value);
    checkForDeletedRow(value, primaryKey);
  }
  rr.next(key, value); 
  assertEquals(1, value.getRowAsResultSet().getInt("col1"));
  
  rr.next(key, value); 
  assertEquals(2, value.getRowAsResultSet().getInt("col1"));
  
  if (writeOnly) {
    rr.next(key, value);
    checkForDeletedRow(value, primaryKey);
  }
  assertFalse(rr.next(key,  value));
  
  TestUtil.shutDown();
}
 
开发者ID:gemxd,项目名称:gemfirexd-oss,代码行数:76,代码来源:EventInputFormatTest.java


注:本文中的org.apache.hadoop.mapred.RecordReader.createValue方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。