本文整理汇总了Java中org.apache.hadoop.mapred.FileInputFormat类的典型用法代码示例。如果您正苦于以下问题:Java FileInputFormat类的具体用法?Java FileInputFormat怎么用?Java FileInputFormat使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
FileInputFormat类属于org.apache.hadoop.mapred包,在下文中一共展示了FileInputFormat类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: runTests
import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
/**
* Run the test
*
* @throws IOException on error
*/
public static void runTests() throws IOException {
config.setLong("io.bytes.per.checksum", bytesPerChecksum);
JobConf job = new JobConf(config, NNBench.class);
job.setJobName("NNBench-" + operation);
FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
job.setInputFormat(SequenceFileInputFormat.class);
// Explicitly set number of max map attempts to 1.
job.setMaxMapAttempts(1);
// Explicitly turn off speculative execution
job.setSpeculativeExecution(false);
job.setMapperClass(NNBenchMapper.class);
job.setReducerClass(NNBenchReducer.class);
FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks((int) numberOfReduces);
JobClient.runJob(job);
}
示例2: runIOTest
import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
private void runIOTest(
Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass,
Path outputDir) throws IOException {
JobConf job = new JobConf(config, TestDFSIO.class);
FileInputFormat.setInputPaths(job, getControlDir(config));
job.setInputFormat(SequenceFileInputFormat.class);
job.setMapperClass(mapperClass);
job.setReducerClass(AccumulatingReducer.class);
FileOutputFormat.setOutputPath(job, outputDir);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(1);
JobClient.runJob(job);
}
示例3: configure
import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
public void configure(String keySpec, int expect) throws Exception {
Path testdir = new Path(TEST_DIR.getAbsolutePath());
Path inDir = new Path(testdir, "in");
Path outDir = new Path(testdir, "out");
FileSystem fs = getFileSystem();
fs.delete(testdir, true);
conf.setInputFormat(TextInputFormat.class);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(LongWritable.class);
conf.setNumMapTasks(1);
conf.setNumReduceTasks(1);
conf.setOutputFormat(TextOutputFormat.class);
conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
conf.setKeyFieldComparatorOptions(keySpec);
conf.setKeyFieldPartitionerOptions("-k1.1,1.1");
conf.set(JobContext.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");
conf.setMapperClass(InverseMapper.class);
conf.setReducerClass(IdentityReducer.class);
if (!fs.mkdirs(testdir)) {
throw new IOException("Mkdirs failed to create " + testdir.toString());
}
if (!fs.mkdirs(inDir)) {
throw new IOException("Mkdirs failed to create " + inDir.toString());
}
// set up input data in 2 files
Path inFile = new Path(inDir, "part0");
FileOutputStream fos = new FileOutputStream(inFile.toString());
fos.write((line1 + "\n").getBytes());
fos.write((line2 + "\n").getBytes());
fos.close();
JobClient jc = new JobClient(conf);
RunningJob r_job = jc.submitJob(conf);
while (!r_job.isComplete()) {
Thread.sleep(1000);
}
if (!r_job.isSuccessful()) {
fail("Oops! The job broke due to an unexpected error");
}
Path[] outputFiles = FileUtil.stat2Paths(
getFileSystem().listStatus(outDir,
new Utils.OutputFileUtils.OutputFilesFilter()));
if (outputFiles.length > 0) {
InputStream is = getFileSystem().open(outputFiles[0]);
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
String line = reader.readLine();
//make sure we get what we expect as the first line, and also
//that we have two lines
if (expect == 1) {
assertTrue(line.startsWith(line1));
} else if (expect == 2) {
assertTrue(line.startsWith(line2));
}
line = reader.readLine();
if (expect == 1) {
assertTrue(line.startsWith(line2));
} else if (expect == 2) {
assertTrue(line.startsWith(line1));
}
reader.close();
}
}
示例4: createCopyJob
import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
/**
* Creates a simple copy job.
*
* @param indirs List of input directories.
* @param outdir Output directory.
* @return JobConf initialised for a simple copy job.
* @throws Exception If an error occurs creating job configuration.
*/
static JobConf createCopyJob(List<Path> indirs, Path outdir) throws Exception {
Configuration defaults = new Configuration();
JobConf theJob = new JobConf(defaults, TestJobControl.class);
theJob.setJobName("DataMoveJob");
FileInputFormat.setInputPaths(theJob, indirs.toArray(new Path[0]));
theJob.setMapperClass(DataCopy.class);
FileOutputFormat.setOutputPath(theJob, outdir);
theJob.setOutputKeyClass(Text.class);
theJob.setOutputValueClass(Text.class);
theJob.setReducerClass(DataCopy.class);
theJob.setNumMapTasks(12);
theJob.setNumReduceTasks(4);
return theJob;
}
示例5: validateInput
import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
public void validateInput(JobConf job) throws IOException {
// expecting exactly one path
Path [] tableNames = FileInputFormat.getInputPaths(job);
if (tableNames == null || tableNames.length > 1) {
throw new IOException("expecting one table name");
}
// connected to table?
if (getHTable() == null) {
throw new IOException("could not connect to table '" +
tableNames[0].getName() + "'");
}
// expecting at least one column
String colArg = job.get(COLUMN_LIST);
if (colArg == null || colArg.length() == 0) {
throw new IOException("expecting at least one column");
}
}
示例6: getOldAPIJobconf
import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
private static JobConf getOldAPIJobconf(Configuration configuration, String name,
String input, String output)
throws Exception {
final JobConf jobConf = new JobConf(configuration);
final FileSystem fs = FileSystem.get(configuration);
if (fs.exists(new Path(output))) {
fs.delete(new Path(output), true);
}
fs.close();
jobConf.setJobName(name);
jobConf.setOutputKeyClass(Text.class);
jobConf.setOutputValueClass(IntWritable.class);
jobConf.setMapperClass(WordCountWithOldAPI.TokenizerMapperWithOldAPI.class);
jobConf.setCombinerClass(WordCountWithOldAPI.IntSumReducerWithOldAPI.class);
jobConf.setReducerClass(WordCountWithOldAPI.IntSumReducerWithOldAPI.class);
jobConf.setInputFormat(SequenceFileInputFormat.class);
jobConf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(jobConf, new Path(input));
FileOutputFormat.setOutputPath(jobConf, new Path(output));
return jobConf;
}
示例7: configure
import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
public void configure(JobConf job) {
// Set the mapper and reducers
job.setMapperClass(TestMapper.class);
// job.setReducerClass(TestReducer.class);
// Set the output types of the mapper and reducer
// job.setMapOutputKeyClass(IntWritable.class);
// job.setMapOutputValueClass(NullWritable.class);
// job.setOutputKeyClass(NullWritable.class);
// job.setOutputValueClass(NullWritable.class);
// Make sure this jar is included
job.setJarByClass(TestMapper.class);
// Specify the input and output data formats
job.setInputFormat(TextInputFormat.class);
job.setOutputFormat(NullOutputFormat.class);
// Turn off speculative execution
job.setMapSpeculativeExecution(false);
job.setReduceSpeculativeExecution(false);
// Add the job input path
FileInputFormat.addInputPath(job, new Path(this.input_filename));
}
示例8: readEthereumBlockInputFormatGenesisBlock
import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
@Test
public void readEthereumBlockInputFormatGenesisBlock() throws IOException, EthereumBlockReadException, ParseException, InterruptedException {
JobConf job = new JobConf(defaultConf);
ClassLoader classLoader = getClass().getClassLoader();
String fileName="ethgenesis.bin";
String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();
Path file = new Path(fileNameBlock);
FileInputFormat.setInputPaths(job, file);
EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat();
format.configure(job);
InputSplit[] inputSplits = format.getSplits(job,1);
assertEquals( 1, inputSplits.length,"Only one split generated for genesis block");
RecordReader<BytesWritable, EthereumBlock> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull( reader,"Format returned null RecordReader");
BytesWritable key = new BytesWritable();
EthereumBlock block = new EthereumBlock();
assertTrue( reader.next(key,block),"Input Split for genesis block contains at least one block");
assertEquals( 0, block.getEthereumTransactions().size(),"Genesis Block must have 0 transactions");
assertFalse( reader.next(key,block),"No further blocks in genesis Block");
reader.close();
}
示例9: main
import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
JobConf conf = new JobConf(WeatherData.class);
conf.setJobName("temp");
// Note:- As Mapper's output types are not default so we have to define
// the
// following properties.
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(Text.class);
conf.setMapperClass(MaxTemperatureMapper.class);
conf.setReducerClass(MaxTemperatureReducer.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
}
示例10: readEthereumBlockInputFormatBlock3346406
import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
@Test
public void readEthereumBlockInputFormatBlock3346406() throws IOException, EthereumBlockReadException, ParseException, InterruptedException {
JobConf job = new JobConf(defaultConf);
ClassLoader classLoader = getClass().getClassLoader();
String fileName="eth3346406.bin";
String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();
Path file = new Path(fileNameBlock);
FileInputFormat.setInputPaths(job, file);
EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat();
format.configure(job);
InputSplit[] inputSplits = format.getSplits(job,1);
assertEquals( 1, inputSplits.length,"Only one split generated for genesis block");
RecordReader<BytesWritable, EthereumBlock> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull( reader,"Format returned null RecordReader");
BytesWritable key = new BytesWritable();
EthereumBlock block = new EthereumBlock();
assertTrue( reader.next(key,block),"Input Split for block 3346406 contains at least one block");
assertEquals( 7, block.getEthereumTransactions().size(),"Block 3346406 must have 7 transactions");
assertFalse( reader.next(key,block),"No further blocks in block 3346406");
reader.close();
}
示例11: merge
import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
public void merge(Path output, Path[] dbs, boolean normalize, boolean filter)
throws Exception {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
long start = System.currentTimeMillis();
LOG.info("LinkDb merge: starting at " + sdf.format(start));
JobConf job = createMergeJob(getConf(), output, normalize, filter);
for (int i = 0; i < dbs.length; i++) {
FileInputFormat.addInputPath(job, new Path(dbs[i], LinkDb.CURRENT_NAME));
}
JobClient.runJob(job);
FileSystem fs = FileSystem.get(getConf());
fs.mkdirs(output);
fs.rename(FileOutputFormat.getOutputPath(job), new Path(output,
LinkDb.CURRENT_NAME));
long end = System.currentTimeMillis();
LOG.info("LinkDb merge: finished at " + sdf.format(end) + ", elapsed: "
+ TimingUtil.elapsedTime(start, end));
}
示例12: readEthereumBlockInputFormatBlock1346406GzipCompressed
import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
@Test
public void readEthereumBlockInputFormatBlock1346406GzipCompressed() throws IOException, EthereumBlockReadException, ParseException, InterruptedException {
JobConf job = new JobConf(defaultConf);
ClassLoader classLoader = getClass().getClassLoader();
String fileName="eth1346406.bin.gz";
String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();
Path file = new Path(fileNameBlock);
FileInputFormat.setInputPaths(job, file);
EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat();
format.configure(job);
InputSplit[] inputSplits = format.getSplits(job,1);
assertEquals( 1, inputSplits.length,"Only one split generated for genesis block");
RecordReader<BytesWritable, EthereumBlock> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull( reader,"Format returned null RecordReader");
BytesWritable key = new BytesWritable();
EthereumBlock block = new EthereumBlock();
assertTrue( reader.next(key,block),"Input Split for block 1346406 contains at least one block");
assertEquals( 6, block.getEthereumTransactions().size(),"Block 1346406 must have 6 transactions");
assertFalse( reader.next(key,block),"No further blocks in block 1346406");
reader.close();
}
示例13: createJobConf
import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
private JobConf createJobConf() throws IOException {
JobConf conf = HdpBootstrap.hadoopConfig();
conf.setInputFormat(EsInputFormat.class);
conf.setOutputFormat(PrintStreamOutputFormat.class);
conf.setOutputKeyClass(Text.class);
boolean type = random.nextBoolean();
Class<?> mapType = (type ? MapWritable.class : LinkedMapWritable.class);
conf.setOutputValueClass(mapType);
HadoopCfgUtils.setGenericOptions(conf);
conf.set(ConfigurationOptions.ES_QUERY, query);
conf.setNumReduceTasks(0);
conf.set(ConfigurationOptions.ES_READ_METADATA, String.valueOf(readMetadata));
conf.set(ConfigurationOptions.ES_READ_METADATA_VERSION, String.valueOf(true));
conf.set(ConfigurationOptions.ES_OUTPUT_JSON, String.valueOf(readAsJson));
QueryTestParams.provisionQueries(conf);
FileInputFormat.setInputPaths(conf, new Path(TestUtils.sampleArtistsDat()));
HdpBootstrap.addProperties(conf, TestSettings.TESTING_PROPS, false);
return conf;
}
示例14: readEthereumBlockInputFormatBlock1346406
import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
@Test
public void readEthereumBlockInputFormatBlock1346406() throws IOException, EthereumBlockReadException, ParseException, InterruptedException {
JobConf job = new JobConf(defaultConf);
ClassLoader classLoader = getClass().getClassLoader();
String fileName="eth1346406.bin";
String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();
Path file = new Path(fileNameBlock);
FileInputFormat.setInputPaths(job, file);
EthereumBlockFileInputFormat format = new EthereumBlockFileInputFormat();
format.configure(job);
InputSplit[] inputSplits = format.getSplits(job,1);
assertEquals( 1, inputSplits.length,"Only one split generated for genesis block");
RecordReader<BytesWritable, EthereumBlock> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull( reader,"Format returned null RecordReader");
BytesWritable key = new BytesWritable();
EthereumBlock block = new EthereumBlock();
assertTrue( reader.next(key,block),"Input Split for block 1346406 contains at least one block");
assertEquals( 6, block.getEthereumTransactions().size(),"Block 1346406 must have 6 transactions");
assertFalse( reader.next(key,block),"No further blocks in block 1346406");
reader.close();
}
示例15: setInputPaths
import org.apache.hadoop.mapred.FileInputFormat; //导入依赖的package包/类
/**
* setInputPaths add all the paths in the provided list to the Job conf object
* as input paths for the job.
*
* @param job
* @param pathsToAdd
*/
public static void setInputPaths(JobConf job, List<Path> pathsToAdd) {
Path[] addedPaths = FileInputFormat.getInputPaths(job);
if (addedPaths == null) {
addedPaths = new Path[0];
}
Path[] combined = new Path[addedPaths.length + pathsToAdd.size()];
System.arraycopy(addedPaths, 0, combined, 0, addedPaths.length);
int i = 0;
for(Path p: pathsToAdd) {
combined[addedPaths.length + (i++)] = p;
}
FileInputFormat.setInputPaths(job, combined);
}