本文整理汇总了Java中org.apache.hadoop.mapreduce.InputFormat.getSplits方法的典型用法代码示例。如果您正苦于以下问题:Java InputFormat.getSplits方法的具体用法?Java InputFormat.getSplits怎么用?Java InputFormat.getSplits使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.InputFormat
的用法示例。
在下文中一共展示了InputFormat.getSplits方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: HadoopElementIterator
import org.apache.hadoop.mapreduce.InputFormat; //导入方法依赖的package包/类
public HadoopElementIterator(final HadoopGraph graph) {
try {
this.graph = graph;
final Configuration configuration = ConfUtil.makeHadoopConfiguration(this.graph.configuration());
final InputFormat<NullWritable, VertexWritable> inputFormat = ConfUtil.getReaderAsInputFormat(configuration);
if (inputFormat instanceof FileInputFormat) {
final Storage storage = FileSystemStorage.open(configuration);
if (!this.graph.configuration().containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION))
return; // there is no input location and thus, no data (empty graph)
if (!Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).isPresent())
return; // there is no data at the input location (empty graph)
configuration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).get());
}
final List<InputSplit> splits = inputFormat.getSplits(new JobContextImpl(configuration, new JobID(UUID.randomUUID().toString(), 1)));
for (final InputSplit split : splits) {
this.readers.add(inputFormat.createRecordReader(split, new TaskAttemptContextImpl(configuration, new TaskAttemptID())));
}
} catch (final Exception e) {
throw new IllegalStateException(e.getMessage(), e);
}
}
示例2: countRecords
import org.apache.hadoop.mapreduce.InputFormat; //导入方法依赖的package包/类
private int countRecords(int numSplits)
throws IOException, InterruptedException {
InputFormat<Text, BytesWritable> format =
new SequenceFileInputFilter<Text, BytesWritable>();
if (numSplits == 0) {
numSplits =
random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
}
FileInputFormat.setMaxInputSplitSize(job,
fs.getFileStatus(inFile).getLen() / numSplits);
TaskAttemptContext context = MapReduceTestUtil.
createDummyMapTaskAttemptContext(job.getConfiguration());
// check each split
int count = 0;
for (InputSplit split : format.getSplits(job)) {
RecordReader<Text, BytesWritable> reader =
format.createRecordReader(split, context);
MapContext<Text, BytesWritable, Text, BytesWritable> mcontext =
new MapContextImpl<Text, BytesWritable, Text, BytesWritable>(
job.getConfiguration(),
context.getTaskAttemptID(), reader, null, null,
MapReduceTestUtil.createDummyReporter(), split);
reader.initialize(split, mcontext);
try {
while (reader.nextKeyValue()) {
LOG.info("Accept record " + reader.getCurrentKey().toString());
count++;
}
} finally {
reader.close();
}
}
return count;
}
示例3: getSample
import org.apache.hadoop.mapreduce.InputFormat; //导入方法依赖的package包/类
/**
* From each split sampled, take the first numSamples / numSplits records.
*/
@SuppressWarnings("unchecked") // ArrayList::toArray doesn't preserve type
public K[] getSample(InputFormat<K,V> inf, Job job)
throws IOException, InterruptedException {
List<InputSplit> splits = inf.getSplits(job);
ArrayList<K> samples = new ArrayList<K>(numSamples);
int splitsToSample = Math.min(maxSplitsSampled, splits.size());
int samplesPerSplit = numSamples / splitsToSample;
long records = 0;
for (int i = 0; i < splitsToSample; ++i) {
TaskAttemptContext samplingContext = new TaskAttemptContextImpl(
job.getConfiguration(), new TaskAttemptID());
RecordReader<K,V> reader = inf.createRecordReader(
splits.get(i), samplingContext);
reader.initialize(splits.get(i), samplingContext);
while (reader.nextKeyValue()) {
samples.add(ReflectionUtils.copy(job.getConfiguration(),
reader.getCurrentKey(), null));
++records;
if ((i+1) * samplesPerSplit <= records) {
break;
}
}
reader.close();
}
return (K[])samples.toArray();
}
示例4: testBinary
import org.apache.hadoop.mapreduce.InputFormat; //导入方法依赖的package包/类
public void testBinary() throws IOException, InterruptedException {
Job job = Job.getInstance();
FileSystem fs = FileSystem.getLocal(job.getConfiguration());
Path dir = new Path(System.getProperty("test.build.data",".") + "/mapred");
Path file = new Path(dir, "testbinary.seq");
Random r = new Random();
long seed = r.nextLong();
r.setSeed(seed);
fs.delete(dir, true);
FileInputFormat.setInputPaths(job, dir);
Text tkey = new Text();
Text tval = new Text();
SequenceFile.Writer writer = new SequenceFile.Writer(fs,
job.getConfiguration(), file, Text.class, Text.class);
try {
for (int i = 0; i < RECORDS; ++i) {
tkey.set(Integer.toString(r.nextInt(), 36));
tval.set(Long.toString(r.nextLong(), 36));
writer.append(tkey, tval);
}
} finally {
writer.close();
}
TaskAttemptContext context = MapReduceTestUtil.
createDummyMapTaskAttemptContext(job.getConfiguration());
InputFormat<BytesWritable,BytesWritable> bformat =
new SequenceFileAsBinaryInputFormat();
int count = 0;
r.setSeed(seed);
BytesWritable bkey = new BytesWritable();
BytesWritable bval = new BytesWritable();
Text cmpkey = new Text();
Text cmpval = new Text();
DataInputBuffer buf = new DataInputBuffer();
FileInputFormat.setInputPaths(job, file);
for (InputSplit split : bformat.getSplits(job)) {
RecordReader<BytesWritable, BytesWritable> reader =
bformat.createRecordReader(split, context);
MapContext<BytesWritable, BytesWritable, BytesWritable, BytesWritable>
mcontext = new MapContextImpl<BytesWritable, BytesWritable,
BytesWritable, BytesWritable>(job.getConfiguration(),
context.getTaskAttemptID(), reader, null, null,
MapReduceTestUtil.createDummyReporter(),
split);
reader.initialize(split, mcontext);
try {
while (reader.nextKeyValue()) {
bkey = reader.getCurrentKey();
bval = reader.getCurrentValue();
tkey.set(Integer.toString(r.nextInt(), 36));
tval.set(Long.toString(r.nextLong(), 36));
buf.reset(bkey.getBytes(), bkey.getLength());
cmpkey.readFields(buf);
buf.reset(bval.getBytes(), bval.getLength());
cmpval.readFields(buf);
assertTrue(
"Keys don't match: " + "*" + cmpkey.toString() + ":" +
tkey.toString() + "*",
cmpkey.toString().equals(tkey.toString()));
assertTrue(
"Vals don't match: " + "*" + cmpval.toString() + ":" +
tval.toString() + "*",
cmpval.toString().equals(tval.toString()));
++count;
}
} finally {
reader.close();
}
}
assertEquals("Some records not found", RECORDS, count);
}
示例5: testFormat
import org.apache.hadoop.mapreduce.InputFormat; //导入方法依赖的package包/类
@Test(timeout=10000)
public void testFormat() throws IOException, InterruptedException {
Job job = Job.getInstance(conf);
Random random = new Random();
long seed = random.nextLong();
random.setSeed(seed);
localFs.delete(workDir, true);
FileInputFormat.setInputPaths(job, workDir);
final int length = 10000;
final int numFiles = 10;
// create files with a variety of lengths
createFiles(length, numFiles, random, job);
TaskAttemptContext context = MapReduceTestUtil.
createDummyMapTaskAttemptContext(job.getConfiguration());
// create a combine split for the files
InputFormat<IntWritable,BytesWritable> format =
new CombineSequenceFileInputFormat<IntWritable,BytesWritable>();
for (int i = 0; i < 3; i++) {
int numSplits =
random.nextInt(length/(SequenceFile.SYNC_INTERVAL/20)) + 1;
LOG.info("splitting: requesting = " + numSplits);
List<InputSplit> splits = format.getSplits(job);
LOG.info("splitting: got = " + splits.size());
// we should have a single split as the length is comfortably smaller than
// the block size
assertEquals("We got more than one splits!", 1, splits.size());
InputSplit split = splits.get(0);
assertEquals("It should be CombineFileSplit",
CombineFileSplit.class, split.getClass());
// check the split
BitSet bits = new BitSet(length);
RecordReader<IntWritable,BytesWritable> reader =
format.createRecordReader(split, context);
MapContext<IntWritable,BytesWritable,IntWritable,BytesWritable> mcontext =
new MapContextImpl<IntWritable,BytesWritable,IntWritable,BytesWritable>(job.getConfiguration(),
context.getTaskAttemptID(), reader, null, null,
MapReduceTestUtil.createDummyReporter(), split);
reader.initialize(split, mcontext);
assertEquals("reader class is CombineFileRecordReader.",
CombineFileRecordReader.class, reader.getClass());
try {
while (reader.nextKeyValue()) {
IntWritable key = reader.getCurrentKey();
BytesWritable value = reader.getCurrentValue();
assertNotNull("Value should not be null.", value);
final int k = key.get();
LOG.debug("read " + k);
assertFalse("Key in multiple partitions.", bits.get(k));
bits.set(k);
}
} finally {
reader.close();
}
assertEquals("Some keys in no partition.", length, bits.cardinality());
}
}