當前位置: 首頁>>代碼示例>>Java>>正文


Java SequenceFileInputFormat類代碼示例

本文整理匯總了Java中org.apache.hadoop.mapred.SequenceFileInputFormat的典型用法代碼示例。如果您正苦於以下問題:Java SequenceFileInputFormat類的具體用法?Java SequenceFileInputFormat怎麽用?Java SequenceFileInputFormat使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


SequenceFileInputFormat類屬於org.apache.hadoop.mapred包,在下文中一共展示了SequenceFileInputFormat類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: runTests

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
/**
 * Run the test
 * 
 * @throws IOException on error
 */
public static void runTests() throws IOException {
  config.setLong("io.bytes.per.checksum", bytesPerChecksum);
  
  JobConf job = new JobConf(config, NNBench.class);

  job.setJobName("NNBench-" + operation);
  FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
  job.setInputFormat(SequenceFileInputFormat.class);
  
  // Explicitly set number of max map attempts to 1.
  job.setMaxMapAttempts(1);
  
  // Explicitly turn off speculative execution
  job.setSpeculativeExecution(false);

  job.setMapperClass(NNBenchMapper.class);
  job.setReducerClass(NNBenchReducer.class);

  FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks((int) numberOfReduces);
  JobClient.runJob(job);
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:30,代碼來源:NNBench.java

示例2: runIOTest

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
private void runIOTest(
        Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass, 
        Path outputDir) throws IOException {
  JobConf job = new JobConf(config, TestDFSIO.class);

  FileInputFormat.setInputPaths(job, getControlDir(config));
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(mapperClass);
  job.setReducerClass(AccumulatingReducer.class);

  FileOutputFormat.setOutputPath(job, outputDir);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:18,代碼來源:TestDFSIO.java

示例3: joinAs

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
private static void joinAs(String jointype,
    Class<? extends SimpleCheckerBase> c) throws Exception {
  final int srcs = 4;
  Configuration conf = new Configuration();
  JobConf job = new JobConf(conf, c);
  Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
  Path[] src = writeSimpleSrc(base, conf, srcs);
  job.set("mapreduce.join.expr", CompositeInputFormat.compose(jointype,
      SequenceFileInputFormat.class, src));
  job.setInt("testdatamerge.sources", srcs);
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(c);
  job.setReducerClass(c);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:21,代碼來源:TestDatamerge.java

示例4: getOldAPIJobconf

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
private static JobConf getOldAPIJobconf(Configuration configuration, String name,
                                        String input, String output)
    throws Exception {
  final JobConf jobConf = new JobConf(configuration);
  final FileSystem fs = FileSystem.get(configuration);
  if (fs.exists(new Path(output))) {
    fs.delete(new Path(output), true);
  }
  fs.close();
  jobConf.setJobName(name);
  jobConf.setOutputKeyClass(Text.class);
  jobConf.setOutputValueClass(IntWritable.class);
  jobConf.setMapperClass(WordCountWithOldAPI.TokenizerMapperWithOldAPI.class);
  jobConf.setCombinerClass(WordCountWithOldAPI.IntSumReducerWithOldAPI.class);
  jobConf.setReducerClass(WordCountWithOldAPI.IntSumReducerWithOldAPI.class);

  jobConf.setInputFormat(SequenceFileInputFormat.class);
  jobConf.setOutputFormat(TextOutputFormat.class);

  FileInputFormat.setInputPaths(jobConf, new Path(input));
  FileOutputFormat.setOutputPath(jobConf, new Path(output));
  return jobConf;
}
 
開發者ID:aliyun-beta,項目名稱:aliyun-oss-hadoop-fs,代碼行數:24,代碼來源:OldAPICombinerTest.java

示例5: createMergeJob

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
public static JobConf createMergeJob(Configuration config, Path linkDb,
    boolean normalize, boolean filter) {
  Path newLinkDb = new Path("linkdb-merge-"
      + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  JobConf job = new NutchJob(config);
  job.setJobName("linkdb merge " + linkDb);

  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(LinkDbFilter.class);
  job.setBoolean(LinkDbFilter.URL_NORMALIZING, normalize);
  job.setBoolean(LinkDbFilter.URL_FILTERING, filter);
  job.setReducerClass(LinkDbMerger.class);

  FileOutputFormat.setOutputPath(job, newLinkDb);
  job.setOutputFormat(MapFileOutputFormat.class);
  job.setBoolean("mapred.output.compress", true);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Inlinks.class);

  // https://issues.apache.org/jira/browse/NUTCH-1069
  job.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);

  return job;
}
 
開發者ID:jorcox,項目名稱:GeoCrawler,代碼行數:27,代碼來源:LinkDbMerger.java

示例6: writeTest

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
public static void writeTest(FileSystem fs, boolean fastCheck)
  throws Exception {

  fs.delete(DATA_DIR, true);
  fs.delete(WRITE_DIR, true);
  
  JobConf job = new JobConf(conf, TestFileSystem.class);
  job.setBoolean("fs.test.fastCheck", fastCheck);

  FileInputFormat.setInputPaths(job, CONTROL_DIR);
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(WriteMapper.class);
  job.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(job, WRITE_DIR);
  job.setOutputKeyClass(UTF8.class);
  job.setOutputValueClass(LongWritable.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}
 
開發者ID:Nextzero,項目名稱:hadoop-2.6.0-cdh5.4.3,代碼行數:22,代碼來源:TestFileSystem.java

示例7: readTest

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
public static void readTest(FileSystem fs, boolean fastCheck)
  throws Exception {

  fs.delete(READ_DIR, true);

  JobConf job = new JobConf(conf, TestFileSystem.class);
  job.setBoolean("fs.test.fastCheck", fastCheck);


  FileInputFormat.setInputPaths(job, CONTROL_DIR);
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(ReadMapper.class);
  job.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(job, READ_DIR);
  job.setOutputKeyClass(UTF8.class);
  job.setOutputValueClass(LongWritable.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}
 
開發者ID:Nextzero,項目名稱:hadoop-2.6.0-cdh5.4.3,代碼行數:22,代碼來源:TestFileSystem.java

示例8: seekTest

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
public static void seekTest(FileSystem fs, boolean fastCheck)
  throws Exception {

  fs.delete(READ_DIR, true);

  JobConf job = new JobConf(conf, TestFileSystem.class);
  job.setBoolean("fs.test.fastCheck", fastCheck);

  FileInputFormat.setInputPaths(job,CONTROL_DIR);
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(SeekMapper.class);
  job.setReducerClass(LongSumReducer.class);

  FileOutputFormat.setOutputPath(job, READ_DIR);
  job.setOutputKeyClass(UTF8.class);
  job.setOutputValueClass(LongWritable.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}
 
開發者ID:Nextzero,項目名稱:hadoop-2.6.0-cdh5.4.3,代碼行數:21,代碼來源:TestFileSystem.java

示例9: joinAs

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
private static void joinAs(String jointype,
    Class<? extends SimpleCheckerBase> c) throws Exception {
  final int srcs = 4;
  Configuration conf = new Configuration();
  JobConf job = new JobConf(conf, c);
  Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
  Path[] src = writeSimpleSrc(base, conf, srcs);
  job.set("mapred.join.expr", CompositeInputFormat.compose(jointype,
      SequenceFileInputFormat.class, src));
  job.setInt("testdatamerge.sources", srcs);
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(c);
  job.setReducerClass(c);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}
 
開發者ID:Nextzero,項目名稱:hadoop-2.6.0-cdh5.4.3,代碼行數:21,代碼來源:TestDatamerge.java

示例10: runTests

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
/**
 * Run the test
 * 
 * @throws IOException on error
 */
public static void runTests(Configuration config) throws IOException {
  config.setLong("io.bytes.per.checksum", bytesPerChecksum);
  
  JobConf job = new JobConf(config, NNBench.class);

  job.setJobName("NNBench-" + operation);
  FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
  job.setInputFormat(SequenceFileInputFormat.class);
  
  // Explicitly set number of max map attempts to 1.
  job.setMaxMapAttempts(1);
  
  // Explicitly turn off speculative execution
  job.setSpeculativeExecution(false);

  job.setMapperClass(NNBenchMapper.class);
  job.setReducerClass(NNBenchReducer.class);

  FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks((int) numberOfReduces);
  JobClient.runJob(job);
}
 
開發者ID:Nextzero,項目名稱:hadoop-2.6.0-cdh5.4.3,代碼行數:30,代碼來源:NNBench.java

示例11: updateJobConf

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
private void updateJobConf(JobConf conf, Path inputPath, Path outputPath) {
  // set specific job config
  conf.setLong(NUMBER_OF_MAPS_KEY, nmaps);
  conf.setLong(NUMBER_OF_THREADS_KEY, nthreads);
  conf.setInt(BUFFER_SIZE_KEY, buffersize);
  conf.setLong(WRITER_DATARATE_KEY, datarate);
  conf.setLong("mapred.task.timeout", Long.MAX_VALUE);
  conf.set(OUTPUT_DIR_KEY, output);
  
  // set the output and input for the map reduce
  FileInputFormat.setInputPaths(conf, inputPath);
  FileOutputFormat.setOutputPath(conf, outputPath);

  conf.setInputFormat(SequenceFileInputFormat.class);
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(Text.class);
  conf.setNumReduceTasks(1);
  conf.setSpeculativeExecution(false);
}
 
開發者ID:rhli,項目名稱:hadoop-EAR,代碼行數:20,代碼來源:DFSGeneralTest.java

示例12: createMergeJob

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
public static JobConf createMergeJob(Configuration config, Path linkDb, boolean normalize, boolean filter) {
  Path newLinkDb =
    new Path("linkdb-merge-" + 
             Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  JobConf job = new NutchJob(config);
  job.setJobName("linkdb merge " + linkDb);

  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(LinkDbFilter.class);
  job.setBoolean(LinkDbFilter.URL_NORMALIZING, normalize);
  job.setBoolean(LinkDbFilter.URL_FILTERING, filter);
  job.setReducerClass(LinkDbMerger.class);

  FileOutputFormat.setOutputPath(job, newLinkDb);
  job.setOutputFormat(MapFileOutputFormat.class);
  job.setBoolean("mapred.output.compress", true);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Inlinks.class);

  // https://issues.apache.org/jira/browse/NUTCH-1069
  job.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);

  return job;
}
 
開發者ID:yahoo,項目名稱:anthelion,代碼行數:27,代碼來源:LinkDbMerger.java

示例13: delete

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
public void delete(String crawldb, String solrUrl, boolean noCommit) throws IOException {
  SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
  long start = System.currentTimeMillis();
  LOG.info("SolrClean: starting at " + sdf.format(start));

  JobConf job = new NutchJob(getConf());

  FileInputFormat.addInputPath(job, new Path(crawldb, CrawlDb.CURRENT_NAME));
  job.setBoolean("noCommit", noCommit);
  job.set(SolrConstants.SERVER_URL, solrUrl);
  job.setInputFormat(SequenceFileInputFormat.class);
  job.setOutputFormat(NullOutputFormat.class);
  job.setMapOutputKeyClass(ByteWritable.class);
  job.setMapOutputValueClass(Text.class);
  job.setMapperClass(DBFilter.class);
  job.setReducerClass(SolrDeleter.class);

  JobClient.runJob(job);

  long end = System.currentTimeMillis();
  LOG.info("SolrClean: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
}
 
開發者ID:yahoo,項目名稱:anthelion,代碼行數:23,代碼來源:SolrClean.java

示例14: runTests

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
/**
 * Run the test
 * 
 * @throws IOException on error
 */
private void runTests() throws IOException {
  getConf().setLong("io.bytes.per.checksum", bytesPerChecksum);
  
  JobConf job = new JobConf(getConf(), NNBench.class);

  job.setJobName("NNBench-" + operation);
  FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
  job.setInputFormat(SequenceFileInputFormat.class);
  
  // Explicitly set number of max map attempts to 1.
  job.setMaxMapAttempts(1);
  
  // Explicitly turn off speculative execution
  job.setSpeculativeExecution(false);

  job.setMapperClass(NNBenchMapper.class);
  job.setReducerClass(NNBenchReducer.class);

  FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks((int) numberOfReduces);
  JobClient.runJob(job);
}
 
開發者ID:hopshadoop,項目名稱:hops,代碼行數:30,代碼來源:NNBench.java

示例15: IDMappingJob

import org.apache.hadoop.mapred.SequenceFileInputFormat; //導入依賴的package包/類
public static void IDMappingJob(String[] args) throws  IOException {

		JobConf job = new JobConf();
		new GenericOptionsParser(job, args);
		job.setJarByClass(HybridDriver.class);
		job.setJobName("Converting binary similarity scores to text");
		job.setMapperClass(IDMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setNumReduceTasks(0);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);

		Path inputPath = new Path(OUTPUT_DIR);
		job.setInputFormat(SequenceFileInputFormat.class);
		SequenceFileInputFormat.setInputPaths(job, inputPath);
		Path outputPath = new Path("SimilarityScores"); 
		job.setOutputFormat(TextOutputFormat.class);
		SequenceFileOutputFormat.setOutputPath(job, outputPath);
		FileSystem.get(job).delete(outputPath, true);
		HashPagesDriver.prepareDistribCache(job, HashPagesDriver.IDS_FILE2); //remove not sure
		JobSubmitter.run(job,"BINARY TO TEXT",job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE)); 
	}
 
開發者ID:mahaucsb,項目名稱:pss,代碼行數:24,代碼來源:HybridDriver.java


注:本文中的org.apache.hadoop.mapred.SequenceFileInputFormat類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。