當前位置: 首頁>>代碼示例>>Java>>正文


Java TextInputFormat類代碼示例

本文整理匯總了Java中org.apache.hadoop.mapreduce.lib.input.TextInputFormat的典型用法代碼示例。如果您正苦於以下問題:Java TextInputFormat類的具體用法?Java TextInputFormat怎麽用?Java TextInputFormat使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


TextInputFormat類屬於org.apache.hadoop.mapreduce.lib.input包,在下文中一共展示了TextInputFormat類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: main

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //導入依賴的package包/類
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	
	Job job =Job.getInstance(conf);
	job.setJobName("TF-IDFCount");
	job.setJarByClass(TF_IDF.class);
	
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(TextArrayWritable.class);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(DoubleWritable.class);
	
	job.setMapperClass(TF_IDFMap.class);
	job.setReducerClass(TF_IDFReduce.class);
	
	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	
	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileInputFormat.addInputPath(job, new Path(args[1]));
	FileOutputFormat.setOutputPath(job, new Path(args[2]));
	boolean wait = job.waitForCompletion(true);
	System.exit(wait ? 0 : 1);
}
 
開發者ID:lzmhhh123,項目名稱:Wikipedia-Index,代碼行數:26,代碼來源:TF_IDF.java

示例2: main

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //導入依賴的package包/類
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        conf.setFloat("beta", Float.parseFloat(args[3]));
        Job job = Job.getInstance(conf);
        job.setJarByClass(UnitSum.class);

        ChainMapper.addMapper(job, PassMapper.class, Object.class, Text.class, Text.class, DoubleWritable.class, conf);
        ChainMapper.addMapper(job, BetaMapper.class, Text.class, DoubleWritable.class, Text.class, DoubleWritable.class, conf);

        job.setReducerClass(SumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);

        MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, PassMapper.class);
        MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, BetaMapper.class);

        FileOutputFormat.setOutputPath(job, new Path(args[2]));
        job.waitForCompletion(true);
    }
 
開發者ID:yogykwan,項目名稱:mapreduce-samples,代碼行數:21,代碼來源:UnitSum.java

示例3: main

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //導入依賴的package包/類
public static void main(String[] args) throws Exception {
    BasicConfigurator.configure();
    Configuration conf = new Configuration();
    conf.setQuietMode(true);

    Job job = Job.getInstance(conf, "WordCount");
    job.setJarByClass(HadoopWordCount.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1] + "_" + System.currentTimeMillis()));

    long t = System.currentTimeMillis();
    job.waitForCompletion(true);

    System.out.println("TotalTime=" + (System.currentTimeMillis() - t));
}
 
開發者ID:hazelcast,項目名稱:big-data-benchmark,代碼行數:27,代碼來源:HadoopWordCount.java

示例4: main

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //導入依賴的package包/類
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	
	Job job =Job.getInstance(conf);
	job.setJobName("DocumentFrequencyCount");
	job.setJarByClass(DF.class);
	
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(IntWritable.class);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(IntWritable.class);
	
	job.setMapperClass(DFMap.class);
	job.setReducerClass(DFReduce.class);
	
	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	
	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));
	boolean wait = job.waitForCompletion(true);
	System.exit(wait ? 0 : 1);
}
 
開發者ID:lzmhhh123,項目名稱:Wikipedia-Index,代碼行數:25,代碼來源:DF.java

示例5: main

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //導入依賴的package包/類
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	
	Job job =Job.getInstance(conf);
	job.setJobName("MaxThreeLabel");
	job.setJarByClass(MaxThreeLabel.class);
	
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(TextArrayWritable.class);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(Text.class);
	
	job.setMapperClass(MaxThreeLabelMap.class);
	job.setReducerClass(MaxThreeLabelReduce.class);
	
	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	
	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));
	boolean wait = job.waitForCompletion(true);
	System.exit(wait ? 0 : 1);
}
 
開發者ID:lzmhhh123,項目名稱:Wikipedia-Index,代碼行數:25,代碼來源:MaxThreeLabel.java

示例6: createJob

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //導入依賴的package包/類
public Job createJob(boolean failMappers, boolean failReducers, Path inputFile) 
    throws IOException {
  Configuration conf = getConf();
  conf.setBoolean(FAIL_MAP, failMappers);
  conf.setBoolean(FAIL_REDUCE, failReducers);
  Job job = Job.getInstance(conf, "fail");
  job.setJarByClass(FailJob.class);
  job.setMapperClass(FailMapper.class);
  job.setMapOutputKeyClass(LongWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(FailReducer.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setSpeculativeExecution(false);
  job.setJobName("Fail job");
  FileInputFormat.addInputPath(job, inputFile);
  return job;
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:19,代碼來源:FailJob.java

示例7: main

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //導入依賴的package包/類
public static void main(String [] args) throws Exception
{
  Path outDir = new Path("output");
  Configuration conf = new Configuration();
  Job job = Job.getInstance(conf, "user name check"); 
	
	
  job.setJarByClass(UserNamePermission.class);
  job.setMapperClass(UserNamePermission.UserNameMapper.class);
  job.setCombinerClass(UserNamePermission.UserNameReducer.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Text.class);
  job.setReducerClass(UserNamePermission.UserNameReducer.class);
  job.setNumReduceTasks(1);
    
  job.setInputFormatClass(TextInputFormat.class);
  TextInputFormat.addInputPath(job, new Path("input"));
  FileOutputFormat.setOutputPath(job, outDir);
    
  System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:22,代碼來源:UserNamePermission.java

示例8: setup

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //導入依賴的package包/類
public void setup(Context context) 
    throws IOException, InterruptedException {
  Configuration conf = context.getConfiguration();
  this.fieldSeparator = 
    conf.get(FieldSelectionHelper.DATA_FIELD_SEPERATOR, "\t");
  this.mapOutputKeyValueSpec = 
    conf.get(FieldSelectionHelper.MAP_OUTPUT_KEY_VALUE_SPEC, "0-:");
  try {
    this.ignoreInputKey = TextInputFormat.class.getCanonicalName().equals(
      context.getInputFormatClass().getCanonicalName());
  } catch (ClassNotFoundException e) {
    throw new IOException("Input format class not found", e);
  }
  allMapValueFieldsFrom = FieldSelectionHelper.parseOutputKeyValueSpec(
    mapOutputKeyValueSpec, mapOutputKeyFieldList, mapOutputValueFieldList);
  LOG.info(FieldSelectionHelper.specToString(fieldSeparator,
    mapOutputKeyValueSpec, allMapValueFieldsFrom, mapOutputKeyFieldList,
    mapOutputValueFieldList) + "\nignoreInputKey:" + ignoreInputKey);
}
 
開發者ID:naver,項目名稱:hadoop,代碼行數:20,代碼來源:FieldSelectionMapper.java

示例9: main

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //導入依賴的package包/類
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();

	Job job = Job.getInstance(conf);
	job.setJarByClass(Multiplication.class);

	ChainMapper.addMapper(job, CooccurrenceMapper.class, LongWritable.class, Text.class, Text.class, Text.class, conf);
	ChainMapper.addMapper(job, RatingMapper.class, Text.class, Text.class, Text.class, Text.class, conf);

	job.setMapperClass(CooccurrenceMapper.class);
	job.setMapperClass(RatingMapper.class);

	job.setReducerClass(MultiplicationReducer.class);

	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(Text.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(DoubleWritable.class);

	MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, CooccurrenceMapper.class);
	MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, RatingMapper.class);

	TextOutputFormat.setOutputPath(job, new Path(args[2]));
	
	job.waitForCompletion(true);
}
 
開發者ID:yogykwan,項目名稱:mapreduce-samples,代碼行數:27,代碼來源:Multiplication.java

示例10: main

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //導入依賴的package包/類
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf);
        job.setMapperClass(SumMapper.class);
        job.setReducerClass(SumReducer.class);

        job.setJarByClass(Sum.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);

        TextInputFormat.setInputPaths(job, new Path(args[0]));
        TextOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);
    }
 
開發者ID:yogykwan,項目名稱:mapreduce-samples,代碼行數:21,代碼來源:Sum.java

示例11: main

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //導入依賴的package包/類
public static void main(String[] args) throws Exception {

		Configuration conf = new Configuration();

		Job job = Job.getInstance(conf);
		job.setMapperClass(DataDividerMapper.class);
		job.setReducerClass(DataDividerReducer.class);

		job.setJarByClass(DataDividerByUser.class);

		job.setInputFormatClass(TextInputFormat.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		job.setOutputKeyClass(IntWritable.class);
		job.setOutputValueClass(Text.class);

		TextInputFormat.setInputPaths(job, new Path(args[0]));
		TextOutputFormat.setOutputPath(job, new Path(args[1]));

		job.waitForCompletion(true);
	}
 
開發者ID:yogykwan,項目名稱:mapreduce-samples,代碼行數:21,代碼來源:DataDividerByUser.java

示例12: main

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //導入依賴的package包/類
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf);
        job.setMapperClass(NormalizeMapper.class);
        job.setReducerClass(NormalizeReducer.class);

        job.setJarByClass(Normalize.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        TextInputFormat.setInputPaths(job, new Path(args[0]));
        TextOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);
    }
 
開發者ID:yogykwan,項目名稱:mapreduce-samples,代碼行數:21,代碼來源:Normalize.java

示例13: main

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //導入依賴的package包/類
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(UnitMultiplication.class);

        ChainMapper.addMapper(job, TransitionMapper.class, Object.class, Text.class, Text.class, Text.class, conf);
        ChainMapper.addMapper(job, PRMapper.class, Object.class, Text.class, Text.class, Text.class, conf);

        job.setReducerClass(MultiplicationReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, TransitionMapper.class);
        MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, PRMapper.class);

        FileOutputFormat.setOutputPath(job, new Path(args[2]));
        job.waitForCompletion(true);
    }
 
開發者ID:yogykwan,項目名稱:mapreduce-samples,代碼行數:21,代碼來源:UnitMultiplication.java

示例14: main

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //導入依賴的package包/類
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        conf.setFloat("beta", Float.parseFloat(args[3]));
        Job job = Job.getInstance(conf);
        job.setJarByClass(UnitMultiplication.class);

        ChainMapper.addMapper(job, TransitionMapper.class, Object.class, Text.class, Text.class, Text.class, conf);
        ChainMapper.addMapper(job, PRMapper.class, Object.class, Text.class, Text.class, Text.class, conf);

        job.setReducerClass(MultiplicationReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, TransitionMapper.class);
        MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, PRMapper.class);

        FileOutputFormat.setOutputPath(job, new Path(args[2]));
        job.waitForCompletion(true);
    }
 
開發者ID:yogykwan,項目名稱:mapreduce-samples,代碼行數:22,代碼來源:UnitMultiplication.java

示例15: runTestLazyOutput

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //導入依賴的package包/類
private static void runTestLazyOutput(Configuration conf, Path output,
    int numReducers, boolean createLazily) 
throws Exception {
  Job job = Job.getInstance(conf, "Test-Lazy-Output");

  FileInputFormat.setInputPaths(job, INPUT);
  FileOutputFormat.setOutputPath(job, output);

  job.setJarByClass(TestMapReduceLazyOutput.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(numReducers);

  job.setMapperClass(TestMapper.class);
  job.setReducerClass(TestReducer.class);

  if (createLazily) {
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
  } else {
    job.setOutputFormatClass(TextOutputFormat.class);
  }
  assertTrue(job.waitForCompletion(true));
}
 
開發者ID:aliyun-beta,項目名稱:aliyun-oss-hadoop-fs,代碼行數:25,代碼來源:TestMapReduceLazyOutput.java


注:本文中的org.apache.hadoop.mapreduce.lib.input.TextInputFormat類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。