本文整理匯總了Java中org.apache.hadoop.mapred.SequenceFileInputFormat類的典型用法代碼示例。如果您正苦於以下問題:Java SequenceFileInputFormat類的具體用法?Java SequenceFileInputFormat怎麽用?Java SequenceFileInputFormat使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
SequenceFileInputFormat類屬於org.apache.hadoop.mapred包,在下文中一共展示了SequenceFileInputFormat類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: runTests
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
/**
* Run the test
*
* @throws IOException on error
*/
public static void runTests() throws IOException {
config.setLong("io.bytes.per.checksum", bytesPerChecksum);
JobConf job = new JobConf(config, NNBench.class);
job.setJobName("NNBench-" + operation);
FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
job.setInputFormat(SequenceFileInputFormat.class);
// Explicitly set number of max map attempts to 1.
job.setMaxMapAttempts(1);
// Explicitly turn off speculative execution
job.setSpeculativeExecution(false);
job.setMapperClass(NNBenchMapper.class);
job.setReducerClass(NNBenchReducer.class);
FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks((int) numberOfReduces);
JobClient.runJob(job);
}
示例2: runIOTest
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
private void runIOTest(
Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass,
Path outputDir) throws IOException {
JobConf job = new JobConf(config, TestDFSIO.class);
FileInputFormat.setInputPaths(job, getControlDir(config));
job.setInputFormat(SequenceFileInputFormat.class);
job.setMapperClass(mapperClass);
job.setReducerClass(AccumulatingReducer.class);
FileOutputFormat.setOutputPath(job, outputDir);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(1);
JobClient.runJob(job);
}
示例3: joinAs
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
private static void joinAs(String jointype,
Class<? extends SimpleCheckerBase> c) throws Exception {
final int srcs = 4;
Configuration conf = new Configuration();
JobConf job = new JobConf(conf, c);
Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
Path[] src = writeSimpleSrc(base, conf, srcs);
job.set("mapreduce.join.expr", CompositeInputFormat.compose(jointype,
SequenceFileInputFormat.class, src));
job.setInt("testdatamerge.sources", srcs);
job.setInputFormat(CompositeInputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(base, "out"));
job.setMapperClass(c);
job.setReducerClass(c);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
JobClient.runJob(job);
base.getFileSystem(job).delete(base, true);
}
示例4: getOldAPIJobconf
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
private static JobConf getOldAPIJobconf(Configuration configuration, String name,
String input, String output)
throws Exception {
final JobConf jobConf = new JobConf(configuration);
final FileSystem fs = FileSystem.get(configuration);
if (fs.exists(new Path(output))) {
fs.delete(new Path(output), true);
}
fs.close();
jobConf.setJobName(name);
jobConf.setOutputKeyClass(Text.class);
jobConf.setOutputValueClass(IntWritable.class);
jobConf.setMapperClass(WordCountWithOldAPI.TokenizerMapperWithOldAPI.class);
jobConf.setCombinerClass(WordCountWithOldAPI.IntSumReducerWithOldAPI.class);
jobConf.setReducerClass(WordCountWithOldAPI.IntSumReducerWithOldAPI.class);
jobConf.setInputFormat(SequenceFileInputFormat.class);
jobConf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(jobConf, new Path(input));
FileOutputFormat.setOutputPath(jobConf, new Path(output));
return jobConf;
}
示例5: createMergeJob
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
public static JobConf createMergeJob(Configuration config, Path linkDb,
boolean normalize, boolean filter) {
Path newLinkDb = new Path("linkdb-merge-"
+ Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
JobConf job = new NutchJob(config);
job.setJobName("linkdb merge " + linkDb);
job.setInputFormat(SequenceFileInputFormat.class);
job.setMapperClass(LinkDbFilter.class);
job.setBoolean(LinkDbFilter.URL_NORMALIZING, normalize);
job.setBoolean(LinkDbFilter.URL_FILTERING, filter);
job.setReducerClass(LinkDbMerger.class);
FileOutputFormat.setOutputPath(job, newLinkDb);
job.setOutputFormat(MapFileOutputFormat.class);
job.setBoolean("mapred.output.compress", true);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Inlinks.class);
// https://issues.apache.org/jira/browse/NUTCH-1069
job.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
return job;
}
示例6: writeTest
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
public static void writeTest(FileSystem fs, boolean fastCheck)
throws Exception {
fs.delete(DATA_DIR, true);
fs.delete(WRITE_DIR, true);
JobConf job = new JobConf(conf, TestFileSystem.class);
job.setBoolean("fs.test.fastCheck", fastCheck);
FileInputFormat.setInputPaths(job, CONTROL_DIR);
job.setInputFormat(SequenceFileInputFormat.class);
job.setMapperClass(WriteMapper.class);
job.setReducerClass(LongSumReducer.class);
FileOutputFormat.setOutputPath(job, WRITE_DIR);
job.setOutputKeyClass(UTF8.class);
job.setOutputValueClass(LongWritable.class);
job.setNumReduceTasks(1);
JobClient.runJob(job);
}
示例7: readTest
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
public static void readTest(FileSystem fs, boolean fastCheck)
throws Exception {
fs.delete(READ_DIR, true);
JobConf job = new JobConf(conf, TestFileSystem.class);
job.setBoolean("fs.test.fastCheck", fastCheck);
FileInputFormat.setInputPaths(job, CONTROL_DIR);
job.setInputFormat(SequenceFileInputFormat.class);
job.setMapperClass(ReadMapper.class);
job.setReducerClass(LongSumReducer.class);
FileOutputFormat.setOutputPath(job, READ_DIR);
job.setOutputKeyClass(UTF8.class);
job.setOutputValueClass(LongWritable.class);
job.setNumReduceTasks(1);
JobClient.runJob(job);
}
示例8: seekTest
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
public static void seekTest(FileSystem fs, boolean fastCheck)
throws Exception {
fs.delete(READ_DIR, true);
JobConf job = new JobConf(conf, TestFileSystem.class);
job.setBoolean("fs.test.fastCheck", fastCheck);
FileInputFormat.setInputPaths(job,CONTROL_DIR);
job.setInputFormat(SequenceFileInputFormat.class);
job.setMapperClass(SeekMapper.class);
job.setReducerClass(LongSumReducer.class);
FileOutputFormat.setOutputPath(job, READ_DIR);
job.setOutputKeyClass(UTF8.class);
job.setOutputValueClass(LongWritable.class);
job.setNumReduceTasks(1);
JobClient.runJob(job);
}
示例9: joinAs
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
private static void joinAs(String jointype,
Class<? extends SimpleCheckerBase> c) throws Exception {
final int srcs = 4;
Configuration conf = new Configuration();
JobConf job = new JobConf(conf, c);
Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
Path[] src = writeSimpleSrc(base, conf, srcs);
job.set("mapred.join.expr", CompositeInputFormat.compose(jointype,
SequenceFileInputFormat.class, src));
job.setInt("testdatamerge.sources", srcs);
job.setInputFormat(CompositeInputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(base, "out"));
job.setMapperClass(c);
job.setReducerClass(c);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
JobClient.runJob(job);
base.getFileSystem(job).delete(base, true);
}
示例10: runTests
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
/**
* Run the test
*
* @throws IOException on error
*/
public static void runTests(Configuration config) throws IOException {
config.setLong("io.bytes.per.checksum", bytesPerChecksum);
JobConf job = new JobConf(config, NNBench.class);
job.setJobName("NNBench-" + operation);
FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
job.setInputFormat(SequenceFileInputFormat.class);
// Explicitly set number of max map attempts to 1.
job.setMaxMapAttempts(1);
// Explicitly turn off speculative execution
job.setSpeculativeExecution(false);
job.setMapperClass(NNBenchMapper.class);
job.setReducerClass(NNBenchReducer.class);
FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks((int) numberOfReduces);
JobClient.runJob(job);
}
示例11: updateJobConf
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
private void updateJobConf(JobConf conf, Path inputPath, Path outputPath) {
// set specific job config
conf.setLong(NUMBER_OF_MAPS_KEY, nmaps);
conf.setLong(NUMBER_OF_THREADS_KEY, nthreads);
conf.setInt(BUFFER_SIZE_KEY, buffersize);
conf.setLong(WRITER_DATARATE_KEY, datarate);
conf.setLong("mapred.task.timeout", Long.MAX_VALUE);
conf.set(OUTPUT_DIR_KEY, output);
// set the output and input for the map reduce
FileInputFormat.setInputPaths(conf, inputPath);
FileOutputFormat.setOutputPath(conf, outputPath);
conf.setInputFormat(SequenceFileInputFormat.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
conf.setNumReduceTasks(1);
conf.setSpeculativeExecution(false);
}
示例12: createMergeJob
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
public static JobConf createMergeJob(Configuration config, Path linkDb, boolean normalize, boolean filter) {
Path newLinkDb =
new Path("linkdb-merge-" +
Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
JobConf job = new NutchJob(config);
job.setJobName("linkdb merge " + linkDb);
job.setInputFormat(SequenceFileInputFormat.class);
job.setMapperClass(LinkDbFilter.class);
job.setBoolean(LinkDbFilter.URL_NORMALIZING, normalize);
job.setBoolean(LinkDbFilter.URL_FILTERING, filter);
job.setReducerClass(LinkDbMerger.class);
FileOutputFormat.setOutputPath(job, newLinkDb);
job.setOutputFormat(MapFileOutputFormat.class);
job.setBoolean("mapred.output.compress", true);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Inlinks.class);
// https://issues.apache.org/jira/browse/NUTCH-1069
job.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
return job;
}
示例13: delete
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
public void delete(String crawldb, String solrUrl, boolean noCommit) throws IOException {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
long start = System.currentTimeMillis();
LOG.info("SolrClean: starting at " + sdf.format(start));
JobConf job = new NutchJob(getConf());
FileInputFormat.addInputPath(job, new Path(crawldb, CrawlDb.CURRENT_NAME));
job.setBoolean("noCommit", noCommit);
job.set(SolrConstants.SERVER_URL, solrUrl);
job.setInputFormat(SequenceFileInputFormat.class);
job.setOutputFormat(NullOutputFormat.class);
job.setMapOutputKeyClass(ByteWritable.class);
job.setMapOutputValueClass(Text.class);
job.setMapperClass(DBFilter.class);
job.setReducerClass(SolrDeleter.class);
JobClient.runJob(job);
long end = System.currentTimeMillis();
LOG.info("SolrClean: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
}
示例14: runTests
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
/**
* Run the test
*
* @throws IOException on error
*/
private void runTests() throws IOException {
getConf().setLong("io.bytes.per.checksum", bytesPerChecksum);
JobConf job = new JobConf(getConf(), NNBench.class);
job.setJobName("NNBench-" + operation);
FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
job.setInputFormat(SequenceFileInputFormat.class);
// Explicitly set number of max map attempts to 1.
job.setMaxMapAttempts(1);
// Explicitly turn off speculative execution
job.setSpeculativeExecution(false);
job.setMapperClass(NNBenchMapper.class);
job.setReducerClass(NNBenchReducer.class);
FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks((int) numberOfReduces);
JobClient.runJob(job);
}
示例15: IDMappingJob
import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
public static void IDMappingJob(String[] args) throws IOException {
JobConf job = new JobConf();
new GenericOptionsParser(job, args);
job.setJarByClass(HybridDriver.class);
job.setJobName("Converting binary similarity scores to text");
job.setMapperClass(IDMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
Path inputPath = new Path(OUTPUT_DIR);
job.setInputFormat(SequenceFileInputFormat.class);
SequenceFileInputFormat.setInputPaths(job, inputPath);
Path outputPath = new Path("SimilarityScores");
job.setOutputFormat(TextOutputFormat.class);
SequenceFileOutputFormat.setOutputPath(job, outputPath);
FileSystem.get(job).delete(outputPath, true);
HashPagesDriver.prepareDistribCache(job, HashPagesDriver.IDS_FILE2); //remove not sure
JobSubmitter.run(job,"BINARY TO TEXT",job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE));
}