當前位置: 首頁>>代碼示例>>Java>>正文


Java FileInputFormat類代碼示例

本文整理匯總了Java中org.apache.hadoop.mapreduce.lib.input.FileInputFormat的典型用法代碼示例。如果您正苦於以下問題:Java FileInputFormat類的具體用法?Java FileInputFormat怎麽用?Java FileInputFormat使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


FileInputFormat類屬於org.apache.hadoop.mapreduce.lib.input包,在下文中一共展示了FileInputFormat類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: run

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入依賴的package包/類
@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n",
                getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }

    Job job = new Job(getConf(), "Text to Parquet");
    job.setJarByClass(getClass());
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapperClass(TextToParquetMapper.class);
    job.setNumReduceTasks(0);
    job.setOutputFormatClass(AvroParquetOutputFormat.class);
    AvroParquetOutputFormat.setSchema(job, SCHEMA);
    job.setOutputKeyClass(Void.class);
    job.setOutputValueClass(Group.class);
    return job.waitForCompletion(true) ? 0 : 1;
}
 
開發者ID:mumuhadoop,項目名稱:mumu-parquet,代碼行數:22,代碼來源:MapReduceParquetMapReducer.java

示例2: main

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入依賴的package包/類
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	
	Job job =Job.getInstance(conf);
	job.setJobName("TF-IDFCount");
	job.setJarByClass(TF_IDF.class);
	
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(TextArrayWritable.class);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(DoubleWritable.class);
	
	job.setMapperClass(TF_IDFMap.class);
	job.setReducerClass(TF_IDFReduce.class);
	
	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	
	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileInputFormat.addInputPath(job, new Path(args[1]));
	FileOutputFormat.setOutputPath(job, new Path(args[2]));
	boolean wait = job.waitForCompletion(true);
	System.exit(wait ? 0 : 1);
}
 
開發者ID:lzmhhh123,項目名稱:Wikipedia-Index,代碼行數:26,代碼來源:TF_IDF.java

示例3: main

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入依賴的package包/類
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	Job job = Job.getInstance(conf, "maxtemp");
	
	job.setMapperClass(MaxTempMapper.class);
	job.setReducerClass(MaxTempReducer.class);

	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(FloatWritable.class);

	FileInputFormat.setInputPaths(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));

	if (!job.waitForCompletion(true))
		return;
}
 
開發者ID:aadishgoel2013,項目名稱:Hadoop-Codes,代碼行數:17,代碼來源:MaxTempDriver.java

示例4: configureInputFormat

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入依賴的package包/類
@Override
public void configureInputFormat(Job job, String tableName,
    String tableClassName, String splitByCol)
    throws ClassNotFoundException, IOException {

  // Write a line of text into a file so that we can get
  // a record to the map task.
  Path dir = new Path(this.options.getTempDir());
  Path p = new Path(dir, "sqoop-dummy-import-job-file.txt");
  FileSystem fs = FileSystem.getLocal(this.options.getConf());
  if (fs.exists(p)) {
    boolean result = fs.delete(p, false);
    assertTrue("Couldn't delete temp file!", result);
  }

  BufferedWriter w = new BufferedWriter(
      new OutputStreamWriter(fs.create(p)));
  w.append("This is a line!");
  w.close();

  FileInputFormat.addInputPath(job, p);

  // And set the InputFormat itself.
  super.configureInputFormat(job, tableName, tableClassName, splitByCol);
}
 
開發者ID:aliyun,項目名稱:aliyun-maxcompute-data-collectors,代碼行數:26,代碼來源:TestImportJob.java

示例5: createCopyJob

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入依賴的package包/類
/**
 * Creates a simple copy job.
 * 
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a data copy job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createCopyJob(Configuration conf, Path outdir, 
    Path... indirs) throws Exception {
  conf.setInt(MRJobConfig.NUM_MAPS, 3);
  Job theJob = Job.getInstance(conf);
  theJob.setJobName("DataMoveJob");

  FileInputFormat.setInputPaths(theJob, indirs);
  theJob.setMapperClass(DataCopyMapper.class);
  FileOutputFormat.setOutputPath(theJob, outdir);
  theJob.setOutputKeyClass(Text.class);
  theJob.setOutputValueClass(Text.class);
  theJob.setReducerClass(DataCopyReducer.class);
  theJob.setNumReduceTasks(1);
  return theJob;
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:25,代碼來源:MapReduceTestUtil.java

示例6: createFailJob

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入依賴的package包/類
/**
 * Creates a simple fail job.
 * 
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a simple fail job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createFailJob(Configuration conf, Path outdir, 
    Path... indirs) throws Exception {
  FileSystem fs = outdir.getFileSystem(conf);
  if (fs.exists(outdir)) {
    fs.delete(outdir, true);
  }
  conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2);
  Job theJob = Job.getInstance(conf);
  theJob.setJobName("Fail-Job");

  FileInputFormat.setInputPaths(theJob, indirs);
  theJob.setMapperClass(FailMapper.class);
  theJob.setReducerClass(Reducer.class);
  theJob.setNumReduceTasks(0);
  FileOutputFormat.setOutputPath(theJob, outdir);
  theJob.setOutputKeyClass(Text.class);
  theJob.setOutputValueClass(Text.class);
  return theJob;
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:29,代碼來源:MapReduceTestUtil.java

示例7: createJob

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入依賴的package包/類
public static Job createJob(Configuration conf, Path inDir, Path outDir, 
    int numInputFiles, int numReds, String input) throws IOException {
  Job job = Job.getInstance(conf);
  FileSystem fs = FileSystem.get(conf);
  if (fs.exists(outDir)) {
    fs.delete(outDir, true);
  }
  if (fs.exists(inDir)) {
    fs.delete(inDir, true);
  }
  fs.mkdirs(inDir);
  for (int i = 0; i < numInputFiles; ++i) {
    DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
    file.writeBytes(input);
    file.close();
  }    

  FileInputFormat.setInputPaths(job, inDir);
  FileOutputFormat.setOutputPath(job, outDir);
  job.setNumReduceTasks(numReds);
  return job;
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:23,代碼來源:MapReduceTestUtil.java

示例8: testInvalidMultiMapParallelism

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入依賴的package包/類
/**
 * Run a test with a misconfigured number of mappers.
 * Expect failure.
 */
@Test
public void testInvalidMultiMapParallelism() throws Exception {
  Job job = Job.getInstance();

  Path inputPath = createMultiMapsInput();
  Path outputPath = getOutputPath();

  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.getLocal(conf);

  if (fs.exists(outputPath)) {
    fs.delete(outputPath, true);
  }

  job.setMapperClass(StressMapper.class);
  job.setReducerClass(CountingReducer.class);
  job.setNumReduceTasks(1);
  LocalJobRunner.setLocalMaxRunningMaps(job, -6);
  FileInputFormat.addInputPath(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  boolean success = job.waitForCompletion(true);
  assertFalse("Job succeeded somehow", success);
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:29,代碼來源:TestLocalRunner.java

示例9: main

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入依賴的package包/類
public static void main(String[] args) throws Exception {
    if(args.length != 2){
        System.err.println("Usage: MaxTemperatureWithCombiner <input path> <output path>");
        System.exit(-1);
    }

    Job job = new Job();
    job.setJarByClass(MaxTemperatureWithCombiner.class);
    job.setJobName("Max Temperature With Combiner");

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(MaxTemperatureMapper.class);
    job.setCombinerClass(MaxTemperatureReducer.class);
    job.setReducerClass(MaxTemperatureReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
開發者ID:myziyue,項目名稱:learn-to-hadoop,代碼行數:23,代碼來源:MaxTemperatureWithCombiner.java

示例10: run

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入依賴的package包/類
public static void run(Configuration conf, Path inputPath, Path output, double params) throws IOException, ClassNotFoundException, InterruptedException {
    String jobName = "calculating parameter";
    conf.set("params",String.valueOf(params));

    Job job = new Job(conf, jobName);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(indexToCountWritable.class);
    job.setOutputKeyClass(twoDimensionIndexWritable.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(CalParamsMapper.class);
    job.setReducerClass(CalParamsReducer.class);

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job,output);

    job.setJarByClass(LDADriver.class);
    if (!job.waitForCompletion(true)) {
        throw new InterruptedException("calculating parameter failed");
    }
}
 
開發者ID:huyang1,項目名稱:LDA,代碼行數:25,代碼來源:CalParamDriver.java

示例11: main

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入依賴的package包/類
public static void main(String[] args) throws Exception {
    BasicConfigurator.configure();
    Configuration conf = new Configuration();
    conf.setQuietMode(true);

    Job job = Job.getInstance(conf, "WordCount");
    job.setJarByClass(HadoopWordCount.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1] + "_" + System.currentTimeMillis()));

    long t = System.currentTimeMillis();
    job.waitForCompletion(true);

    System.out.println("TotalTime=" + (System.currentTimeMillis() - t));
}
 
開發者ID:hazelcast,項目名稱:big-data-benchmark,代碼行數:27,代碼來源:HadoopWordCount.java

示例12: cut

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入依賴的package包/類
public static void cut(String name, String in) throws Exception {
	Job job = createJob(name, in);
	job.setNumReduceTasks(0);
	boolean success = job.waitForCompletion(true);

	if (success) {
		// MapReduce reads an input and writes the result to an new location.
		// Hence it can not modify data in place, and we have to replace the input
		// with the output
		Path output = FileOutputFormat.getOutputPath(job);
		FileSystem fs = output.getFileSystem(job.getConfiguration());
		Path[] inputs = FileInputFormat.getInputPaths(job);
		for (int i = 0; i < inputs.length; i++) {
			fs.delete(inputs[i], true);
		}
		fs.rename(output, inputs[0]);
	}
}
 
開發者ID:amritbhat786,項目名稱:DocIT,代碼行數:19,代碼來源:Cut.java

示例13: main

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入依賴的package包/類
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	conf.set("xmlinput.start", "<page>");
	conf.set("xmlinput.end", "</page>");
	
	Job job =Job.getInstance(conf);
	job.setJobName("TermFrequencyCount");
	job.setJarByClass(TF.class);
	
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(IntArrayWritable.class);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(DoubleWritable.class);
	
	job.setMapperClass(TFMap.class);
	job.setReducerClass(TFReduce.class);
	
	job.setInputFormatClass(XmlInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	
	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));
	boolean wait = job.waitForCompletion(true);
	System.exit(wait ? 0 : 1);
}
 
開發者ID:lzmhhh123,項目名稱:Wikipedia-Index,代碼行數:27,代碼來源:TF.java

示例14: main

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入依賴的package包/類
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	conf.set("xmlinput.start", "<page>");
	conf.set("xmlinput.end", "</page>");
	
	Job job =Job.getInstance(conf);
	job.setJobName("ExrtactPages");
	job.setJarByClass(ExtractPage.class);
	
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(Text.class);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(Text.class);
	
	job.setMapperClass(ExtractPageMap.class);
	job.setReducerClass(ExtractPageReduce.class);
	
	job.setInputFormatClass(XmlInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	
	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));
	boolean wait = job.waitForCompletion(true);
	System.exit(wait ? 0 : 1);
}
 
開發者ID:lzmhhh123,項目名稱:Wikipedia-Index,代碼行數:27,代碼來源:ExtractPage.java

示例15: main

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入依賴的package包/類
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	
	Job job =Job.getInstance(conf);
	job.setJobName("DocumentFrequencyCount");
	job.setJarByClass(DF.class);
	
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(IntWritable.class);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(IntWritable.class);
	
	job.setMapperClass(DFMap.class);
	job.setReducerClass(DFReduce.class);
	
	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	
	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));
	boolean wait = job.waitForCompletion(true);
	System.exit(wait ? 0 : 1);
}
 
開發者ID:lzmhhh123,項目名稱:Wikipedia-Index,代碼行數:25,代碼來源:DF.java


注:本文中的org.apache.hadoop.mapreduce.lib.input.FileInputFormat類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。