本文整理汇总了Java中org.apache.hadoop.mapred.RecordReader.createKey方法的典型用法代码示例。如果您正苦于以下问题:Java RecordReader.createKey方法的具体用法?Java RecordReader.createKey怎么用?Java RecordReader.createKey使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapred.RecordReader
的用法示例。
在下文中一共展示了RecordReader.createKey方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getSample
import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
/**
* From each split sampled, take the first numSamples / numSplits records.
*/
@SuppressWarnings("unchecked") // ArrayList::toArray doesn't preserve type
public K[] getSample(InputFormat<K,V> inf, JobConf job) throws IOException {
InputSplit[] splits = inf.getSplits(job, job.getNumMapTasks());
ArrayList<K> samples = new ArrayList<K>(numSamples);
int splitsToSample = Math.min(maxSplitsSampled, splits.length);
int splitStep = splits.length / splitsToSample;
int samplesPerSplit = numSamples / splitsToSample;
long records = 0;
for (int i = 0; i < splitsToSample; ++i) {
RecordReader<K,V> reader = inf.getRecordReader(splits[i * splitStep],
job, Reporter.NULL);
K key = reader.createKey();
V value = reader.createValue();
while (reader.next(key, value)) {
samples.add(key);
key = reader.createKey();
++records;
if ((i+1) * samplesPerSplit <= records) {
break;
}
}
reader.close();
}
return (K[])samples.toArray();
}
示例2: dumpTypedBytes
import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
/**
* Dump given list of files to standard output as typed bytes.
*/
@SuppressWarnings("unchecked")
private int dumpTypedBytes(List<FileStatus> files) throws IOException {
JobConf job = new JobConf(getConf());
DataOutputStream dout = new DataOutputStream(System.out);
AutoInputFormat autoInputFormat = new AutoInputFormat();
for (FileStatus fileStatus : files) {
FileSplit split = new FileSplit(fileStatus.getPath(), 0,
fileStatus.getLen() * fileStatus.getBlockSize(),
(String[]) null);
RecordReader recReader = null;
try {
recReader = autoInputFormat.getRecordReader(split, job, Reporter.NULL);
Object key = recReader.createKey();
Object value = recReader.createValue();
while (recReader.next(key, value)) {
if (key instanceof Writable) {
TypedBytesWritableOutput.get(dout).write((Writable) key);
} else {
TypedBytesOutput.get(dout).write(key);
}
if (value instanceof Writable) {
TypedBytesWritableOutput.get(dout).write((Writable) value);
} else {
TypedBytesOutput.get(dout).write(value);
}
}
} finally {
if (recReader != null) {
recReader.close();
}
}
}
dout.flush();
return 0;
}
示例3: verifyWithMockedMapReduce
import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
private void verifyWithMockedMapReduce(JobConf job, int numRegions, int expectedNumSplits,
byte[] startRow, byte[] stopRow) throws IOException, InterruptedException {
TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
InputSplit[] splits = tsif.getSplits(job, 0);
Assert.assertEquals(expectedNumSplits, splits.length);
HBaseTestingUtility.SeenRowTracker rowTracker =
new HBaseTestingUtility.SeenRowTracker(startRow, stopRow);
for (int i = 0; i < splits.length; i++) {
// validate input split
InputSplit split = splits[i];
Assert.assertTrue(split instanceof TableSnapshotInputFormat.TableSnapshotRegionSplit);
// validate record reader
OutputCollector collector = mock(OutputCollector.class);
Reporter reporter = mock(Reporter.class);
RecordReader<ImmutableBytesWritable, Result> rr = tsif.getRecordReader(split, job, reporter);
// validate we can read all the data back
ImmutableBytesWritable key = rr.createKey();
Result value = rr.createValue();
while (rr.next(key, value)) {
verifyRowFromMap(key, value);
rowTracker.addRow(key.copyBytes());
}
rr.close();
}
// validate all rows are seen
rowTracker.validate();
}
示例4: run
import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
/**
* Run the map task.
* @param input the set of inputs
* @param output the object to collect the outputs of the map
* @param reporter the object to update with status
*/
@SuppressWarnings("unchecked")
public void run(RecordReader<K1, V1> input, OutputCollector<K2, V2> output,
Reporter reporter) throws IOException {
Application<K1, V1, K2, V2> application = null;
try {
RecordReader<FloatWritable, NullWritable> fakeInput =
(!Submitter.getIsJavaRecordReader(job) &&
!Submitter.getIsJavaMapper(job)) ?
(RecordReader<FloatWritable, NullWritable>) input : null;
application = new Application<K1, V1, K2, V2>(job, fakeInput, output,
reporter,
(Class<? extends K2>) job.getOutputKeyClass(),
(Class<? extends V2>) job.getOutputValueClass());
} catch (InterruptedException ie) {
throw new RuntimeException("interrupted", ie);
}
DownwardProtocol<K1, V1> downlink = application.getDownlink();
boolean isJavaInput = Submitter.getIsJavaRecordReader(job);
downlink.runMap(reporter.getInputSplit(),
job.getNumReduceTasks(), isJavaInput);
boolean skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
try {
if (isJavaInput) {
// allocate key & value instances that are re-used for all entries
K1 key = input.createKey();
V1 value = input.createValue();
downlink.setInputTypes(key.getClass().getName(),
value.getClass().getName());
while (input.next(key, value)) {
// map pair to output
downlink.mapItem(key, value);
if(skipping) {
//flush the streams on every record input if running in skip mode
//so that we don't buffer other records surrounding a bad record.
downlink.flush();
}
}
downlink.endOfInput();
}
application.waitForFinish();
} catch (Throwable t) {
application.abort(t);
} finally {
application.cleanup();
}
}
示例5: run
import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
public void run(RecordReader<K1, V1> input, OutputCollector<K2, V2> output,
Reporter reporter)
throws IOException {
try {
// allocate key & value instances these objects will not be reused
// because execution of Mapper.map is not serialized.
K1 key = input.createKey();
V1 value = input.createValue();
while (input.next(key, value)) {
executorService.execute(new MapperInvokeRunable(key, value, output,
reporter));
checkForExceptionsFromProcessingThreads();
// Allocate new key & value instances as mapper is running in parallel
key = input.createKey();
value = input.createValue();
}
if (LOG.isDebugEnabled()) {
LOG.debug("Finished dispatching all Mappper.map calls, job "
+ job.getJobName());
}
// Graceful shutdown of the Threadpool, it will let all scheduled
// Runnables to end.
executorService.shutdown();
try {
// Now waiting for all Runnables to end.
while (!executorService.awaitTermination(100, TimeUnit.MILLISECONDS)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Awaiting all running Mappper.map calls to finish, job "
+ job.getJobName());
}
// NOTE: while Mapper.map dispatching has concluded there are still
// map calls in progress and exceptions would be thrown.
checkForExceptionsFromProcessingThreads();
}
// NOTE: it could be that a map call has had an exception after the
// call for awaitTermination() returing true. And edge case but it
// could happen.
checkForExceptionsFromProcessingThreads();
} catch (IOException ioEx) {
// Forcing a shutdown of all thread of the threadpool and rethrowing
// the IOException
executorService.shutdownNow();
throw ioEx;
} catch (InterruptedException iEx) {
throw new RuntimeException(iEx);
}
} finally {
mapper.close();
}
}
示例6: internalInit
import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
@Override
public void internalInit(Properties tableProperties, RecordReader<Object, Object> reader) {
key = reader.createKey();
skipRecordsInspector = new SkipRecordsInspector(tableProperties, reader);
}
示例7: internalInit
import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
public void internalInit(Properties tableProperties, RecordReader<Object, Object> reader) {
this.key = reader.createKey();
this.value = reader.createValue();
}
示例8: testHiveInputFormat
import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
public void testHiveInputFormat() throws Exception {
getConnection();
Connection conn = startNetserverAndGetLocalNetConnection();
Statement st = conn.createStatement();
st.execute("create hdfsstore myhdfs namenode 'localhost' homedir '" + HDFS_DIR + "' batchtimeinterval 5000 milliseconds");
st.execute("create table app.mytab1 (col1 int primary key, col2 varchar(100)) persistent hdfsstore (myhdfs) BUCKETS 1");
PreparedStatement ps = conn.prepareStatement("insert into mytab1 values (?, ?)");
int NUM_ENTRIES = 20;
for(int i = 0; i < NUM_ENTRIES; i++) {
ps.setInt(1, i);
ps.setString(2, "Value-" + System.nanoTime());
ps.execute();
}
//Wait for data to get to HDFS...
String qname = HDFSStoreFactoryImpl.getEventQueueName("/APP/MYTAB1");
st.execute("CALL SYS.WAIT_FOR_SENDER_QUEUE_FLUSH('" + qname + "', 1, 0)");
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
FileStatus[] list = fs.listStatus(new Path(HDFS_DIR + "/APP_MYTAB1/0/"));
assertEquals(1, list.length);
conf.set(RowInputFormat.INPUT_TABLE, "MYTAB1");
conf.set(RowInputFormat.HOME_DIR, HDFS_DIR);
JobConf job = new JobConf(conf);
job.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);
GFXDHiveInputFormat ipformat = new GFXDHiveInputFormat();
InputSplit[] splits = ipformat.getSplits(job, 2);
assertEquals(1, splits.length);
GFXDHiveSplit split = (GFXDHiveSplit) splits[0];
assertEquals(1, split.getPaths().length);
assertEquals(list[0].getPath().toString(), split.getPath(0).toString());
assertEquals(0, split.getOffset(0));
assertEquals(list[0].getLen(), split.getLength(0));
RecordReader<Key, Row> rr = ipformat.getRecordReader(split, job, null);
assertTrue("Row record reader should be an instace of GFXDHiveRowRecordReader " +
"but it is an instance of " + rr.getClass(), (rr instanceof GFXDHiveRowRecordReader));
Key key = rr.createKey();
Row value = rr.createValue();
int count = 0;
while (rr.next(key, value)) {
assertEquals(count++, value.getRowAsResultSet().getInt("col1"));
}
assertEquals(20, count);
TestUtil.shutDown();
}
示例9: testEventInputFormat
import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
public void testEventInputFormat() throws Exception {
getConnection();
Connection conn = startNetserverAndGetLocalNetConnection();
Statement st = conn.createStatement();
st.execute("create hdfsstore myhdfs namenode 'localhost' homedir '" + HDFS_DIR + "' batchtimeinterval 5000 milliseconds");
st.execute("create table app.mytab1 (col1 int primary key, col2 varchar(100)) persistent hdfsstore (myhdfs) BUCKETS 1");
PreparedStatement ps = conn.prepareStatement("insert into mytab1 values (?, ?)");
int NUM_ENTRIES = 20;
for(int i = 0; i < NUM_ENTRIES; i++) {
ps.setInt(1, i);
ps.setString(2, "Value-" + System.nanoTime());
ps.execute();
}
//Wait for data to get to HDFS...
String qname = HDFSStoreFactoryImpl.getEventQueueName("/APP/MYTAB1");
st.execute("CALL SYS.WAIT_FOR_SENDER_QUEUE_FLUSH('" + qname + "', 1, 0)");
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
FileStatus[] list = fs.listStatus(new Path(HDFS_DIR + "/APP_MYTAB1/0/"));
assertEquals(1, list.length);
conf.set(RowInputFormat.INPUT_TABLE, "MYTAB1");
conf.set(RowInputFormat.HOME_DIR, HDFS_DIR);
JobConf job = new JobConf(conf);
job.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);
RowInputFormat ipformat = new RowInputFormat();
InputSplit[] splits = ipformat.getSplits(job, 2);
assertEquals(1, splits.length);
CombineFileSplit split = (CombineFileSplit) splits[0];
assertEquals(1, split.getPaths().length);
assertEquals(list[0].getPath().toString(), split.getPath(0).toString());
assertEquals(0, split.getOffset(0));
assertEquals(list[0].getLen(), split.getLength(0));
RecordReader<Key, Row> rr = ipformat.getRecordReader(split, job, null);
Key key = rr.createKey();
Row value = rr.createValue();
int count = 0;
while (rr.next(key, value)) {
assertEquals(count++, value.getRowAsResultSet().getInt("col1"));
}
assertEquals(20, count);
TestUtil.shutDown();
}
示例10: testNoSecureHdfsCheck
import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
public void testNoSecureHdfsCheck() throws Exception {
getConnection();
Connection conn = startNetserverAndGetLocalNetConnection();
Statement st = conn.createStatement();
st.execute("create hdfsstore myhdfs namenode 'localhost' homedir '" + HDFS_DIR + "' batchtimeinterval 5000 milliseconds");
st.execute("create table app.mytab1 (col1 int primary key, col2 varchar(100)) persistent hdfsstore (myhdfs) BUCKETS 1");
PreparedStatement ps = conn.prepareStatement("insert into mytab1 values (?, ?)");
int NUM_ENTRIES = 20;
for(int i = 0; i < NUM_ENTRIES; i++) {
ps.setInt(1, i);
ps.setString(2, "Value-" + System.nanoTime());
ps.execute();
}
//Wait for data to get to HDFS...
String qname = HDFSStoreFactoryImpl.getEventQueueName("/APP/MYTAB1");
st.execute("CALL SYS.WAIT_FOR_SENDER_QUEUE_FLUSH('" + qname + "', 1, 0)");
stopNetServer();
FabricServiceManager.currentFabricServiceInstance().stop(new Properties());
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
FileStatus[] list = fs.listStatus(new Path(HDFS_DIR + "/APP_MYTAB1/0/"));
assertEquals(1, list.length);
conf.set(RowInputFormat.INPUT_TABLE, "MYTAB1");
conf.set(RowInputFormat.HOME_DIR, HDFS_DIR);
conf.set("hadoop.security.authentication", "kerberos");
JobConf job = new JobConf(conf);
job.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);
RowInputFormat ipformat = new RowInputFormat();
InputSplit[] splits = ipformat.getSplits(job, 2);
assertEquals(1, splits.length);
CombineFileSplit split = (CombineFileSplit) splits[0];
assertEquals(1, split.getPaths().length);
assertEquals(list[0].getPath().toString(), split.getPath(0).toString());
assertEquals(0, split.getOffset(0));
assertEquals(list[0].getLen(), split.getLength(0));
RecordReader<Key, Row> rr = ipformat.getRecordReader(split, job, null);
Key key = rr.createKey();
Row value = rr.createValue();
int count = 0;
while (rr.next(key, value)) {
assertEquals(count++, value.getRowAsResultSet().getInt("col1"));
}
assertEquals(20, count);
TestUtil.shutDown();
}
示例11: doTestRowSerDe
import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
private void doTestRowSerDe(boolean concurrencyChecks) throws Exception {
getConnection();
Connection conn = startNetserverAndGetLocalNetConnection();
final long statTS = System.currentTimeMillis();
Statement st = conn.createStatement();
st.execute("create hdfsstore myhdfs namenode 'localhost' homedir '" + HDFS_DIR + "' batchtimeinterval 5000 milliseconds");
String concurrency = "persistent ENABLE CONCURRENCY CHECKS";
st.execute("create table app.mytab1 (col1 int primary key, col2 varchar(100)) partition by primary key buckets 1 hdfsstore (myhdfs) "
+(concurrencyChecks ? concurrency : ""));
PreparedStatement ps = conn.prepareStatement("insert into mytab1 values (?, ?)");
ps.setInt(1, 1);
ps.setString(2, "Value-1");
ps.execute();
//Wait for data to get to HDFS...
String qname = HDFSStoreFactoryImpl.getEventQueueName("/APP/MYTAB1");
st.execute("CALL SYS.WAIT_FOR_SENDER_QUEUE_FLUSH('" + qname + "', 1, 0)");
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
FileStatus[] list = fs.listStatus(new Path(HDFS_DIR + "/APP_MYTAB1/0/"));
assertEquals(1, list.length);
conf.set(RowInputFormat.INPUT_TABLE, "MYTAB1");
conf.set(RowInputFormat.HOME_DIR, HDFS_DIR);
JobConf job = new JobConf(conf);
job.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);
RowInputFormat ipformat = new RowInputFormat();
InputSplit[] splits = ipformat.getSplits(job, 2);
assertEquals(1, splits.length);
RecordReader<Key, Row> rr = ipformat.getRecordReader(splits[0], job, null);
Key key = rr.createKey();
Row value = rr.createValue();
assertTrue(rr.next(key, value));
assertEquals(1, value.getRowAsResultSet().getInt(1));
assertEquals("Value-1", value.getRowAsResultSet().getString(2));
assertTrue(value.getTimestamp() > statTS);
assertFalse(value.getRowAsResultSet().next());
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(baos);
value.write(dos);
dos.close();
byte[] buf = baos.toByteArray();
DataInputStream dis = new DataInputStream(new ByteArrayInputStream(buf));
Row row = new Row();
row.readFields(dis);
dis.close();
assertEquals(1, row.getRowAsResultSet().getInt(1));
assertEquals("Value-1", row.getRowAsResultSet().getString(2));
assertFalse(value.getRowAsResultSet().next());
TestUtil.shutDown();
}
示例12: deleteTest
import org.apache.hadoop.mapred.RecordReader; //导入方法依赖的package包/类
private void deleteTest( boolean writeOnly, boolean primaryKey, boolean isTransactional) throws Exception {
//getConnection();
Connection conn = null;
if (isTransactional) {
conn = getTxConnection();//startNetserverAndGetLocalNetConnection();
} else {
conn = getConnection();
}
Statement st = conn.createStatement();
st.execute("create hdfsstore myhdfs namenode 'localhost' homedir '" + HDFS_DIR + "' batchtimeinterval 2000 milliseconds");
String primaryKeyString = primaryKey ? "primary key" : "";
st.execute("create table app.mytab1 (col1 int " + primaryKeyString + ", col2 varchar(100)) BUCKETS 1 persistent hdfsstore (myhdfs) " + (writeOnly? " WRITEONLY " : ""));
PreparedStatement ps = conn.prepareStatement("insert into mytab1 values (?, ?)");
for(int i = 0; i < 3; i++) {
ps.setInt(1, i);
ps.setString(2, "Value-" + System.nanoTime());
ps.execute();
}
st.execute("delete from mytab1 where col1 = 1");
//Wait for data to get to HDFS...
String qname = HDFSStoreFactoryImpl.getEventQueueName("/APP/MYTAB1");
st.execute("CALL SYS.WAIT_FOR_SENDER_QUEUE_FLUSH('" + qname + "', 1, 0)");
TestUtil.shutDown();
FileStatus[] list = null;
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
for (int i = 0; i < 20; i++) {
list = fs.listStatus(new Path(HDFS_DIR + "/APP_MYTAB1/0/"));
if (list.length == 1) {
break;
}
Thread.sleep(500);
}
if (list.length != 1) {
fail("unexpected files: " + java.util.Arrays.toString(list));
}
conf.set(RowInputFormat.INPUT_TABLE, "MYTAB1");
conf.set(RowInputFormat.HOME_DIR, HDFS_DIR);
JobConf job = new JobConf(conf);
job.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);
RowInputFormat ipformat = new RowInputFormat();
InputSplit[] splits = ipformat.getSplits(job, 2);
RecordReader<Key, Row> rr = ipformat.getRecordReader(splits[0], job, null);
Key key = rr.createKey();
Row value = rr.createValue();
rr.next(key, value);
assertEquals(0, value.getRowAsResultSet().getInt("col1"));
if (!writeOnly) {
rr.next(key, value);
checkForDeletedRow(value, primaryKey);
}
rr.next(key, value);
assertEquals(1, value.getRowAsResultSet().getInt("col1"));
rr.next(key, value);
assertEquals(2, value.getRowAsResultSet().getInt("col1"));
if (writeOnly) {
rr.next(key, value);
checkForDeletedRow(value, primaryKey);
}
assertFalse(rr.next(key, value));
TestUtil.shutDown();
}