Java TextInputFormat.addInputPath方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.TextInputFormat.addInputPath方法的典型用法代码示例。如果您正苦于以下问题：Java TextInputFormat.addInputPath方法的具体用法？Java TextInputFormat.addInputPath怎么用？Java TextInputFormat.addInputPath使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.input.TextInputFormat的用法示例。

在下文中一共展示了TextInputFormat.addInputPath方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public static void main(String [] args) throws Exception
{
  Path outDir = new Path("output");
  Configuration conf = new Configuration();
  Job job = Job.getInstance(conf, "user name check"); 
	
	
  job.setJarByClass(UserNamePermission.class);
  job.setMapperClass(UserNamePermission.UserNameMapper.class);
  job.setCombinerClass(UserNamePermission.UserNameReducer.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Text.class);
  job.setReducerClass(UserNamePermission.UserNameReducer.class);
  job.setNumReduceTasks(1);
    
  job.setInputFormatClass(TextInputFormat.class);
  TextInputFormat.addInputPath(job, new Path("input"));
  FileOutputFormat.setOutputPath(job, outDir);
    
  System.exit(job.waitForCompletion(true) ? 0 : 1);
}

开发者ID:naver，项目名称:hadoop，代码行数:22，代码来源:UserNamePermission.java

示例2: main

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
	if (args.length < 2) {
		System.err.println("Usage: WordCount <input path> <result path>");
		return;
	}
	
	final String inputPath = args[0];
	final String outputPath = args[1];
	
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	
	// Set up the Hadoop Input Format
	Job job = Job.getInstance();
	HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
	TextInputFormat.addInputPath(job, new Path(inputPath));
	
	// Create a Flink job with it
	DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
	
	// Tokenize the line and convert from Writable "Text" to String for better handling
	DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());
	
	// Sum up the words
	DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);
	
	// Convert String back to Writable "Text" for use with Hadoop Output Format
	DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());
	
	// Set up Hadoop Output Format
	HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
	hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
	hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
	TextOutputFormat.setOutputPath(job, new Path(outputPath));
	
	// Output & Execute
	hadoopResult.output(hadoopOutputFormat);
	env.execute("Word Count");
}

开发者ID:axbaretto，项目名称:flink，代码行数:39，代码来源:WordCount.java

示例3: main

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public static void main(String [] args) throws Exception
{
  Path outDir = new Path("output");
  Configuration conf = new Configuration();
  Job job = new Job(conf, "user name check"); 
	
	
  job.setJarByClass(UserNamePermission.class);
  job.setMapperClass(UserNamePermission.UserNameMapper.class);
  job.setCombinerClass(UserNamePermission.UserNameReducer.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Text.class);
  job.setReducerClass(UserNamePermission.UserNameReducer.class);
  job.setNumReduceTasks(1);
    
  job.setInputFormatClass(TextInputFormat.class);
  TextInputFormat.addInputPath(job, new Path("input"));
  FileOutputFormat.setOutputPath(job, outDir);
    
  System.exit(job.waitForCompletion(true) ? 0 : 1);
}

开发者ID:Nextzero，项目名称:hadoop-2.6.0-cdh5.4.3，代码行数:22，代码来源:UserNamePermission.java

示例4: run

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    Job job = Job.getInstance(conf, "loadlogs mr");
    job.setJarByClass(LoadLogsMR.class);
 
    job.setInputFormatClass(TextInputFormat.class);
    
    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initTableReducerJob(args[2], LoadLogsReducer.class, job);
    job.setNumReduceTasks(3);
   
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);
    TextInputFormat.addInputPath(job, new Path(args[0]));
    TextInputFormat.addInputPath(job, new Path(args[1]));
    TextOutputFormat.setOutputPath(job, new Path(args[2]));
 
    return job.waitForCompletion(true) ? 0 : 1;
}

开发者ID:hanhanwu，项目名称:Hanhan-HBase-MapReduce-in-Java，代码行数:21，代码来源:LoadLogsMR.java

示例5: run

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    Job job = Job.getInstance(conf, "reddit average");
    job.setJarByClass(RedditAverage.class);
 
    job.setInputFormatClass(TextInputFormat.class);
 
    job.setMapperClass(RedditMapper.class);
    job.setCombinerClass(RedditCombiner.class);
    job.setReducerClass(RedditReducer.class);
 
    job.setMapOutputValueClass(LongPairWritable.class);
    
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    TextInputFormat.addInputPath(job, new Path(args[0]));
    TextInputFormat.addInputPath(job, new Path(args[1]));
    TextOutputFormat.setOutputPath(job, new Path(args[2]));
 
    return job.waitForCompletion(true) ? 0 : 1;
}

开发者ID:hanhanwu，项目名称:Hanhan-Hadoop-MapReduce，代码行数:24，代码来源:RedditAverage.java

示例6: run

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCountImproved.class);
 
    job.setInputFormatClass(TextInputFormat.class);
 
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(LongSumReducer.class);
 
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    TextInputFormat.addInputPath(job, new Path(args[0]));
    TextOutputFormat.setOutputPath(job, new Path(args[1]));
 
    return job.waitForCompletion(true) ? 0 : 1;
}

开发者ID:hanhanwu，项目名称:Hanhan-Hadoop-MapReduce，代码行数:21，代码来源:WordCountImproved.java

示例7: createAndSubmitJob

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public boolean createAndSubmitJob() throws IOException, ClassNotFoundException, InterruptedException {
    Job job = Job.getInstance(yarnUnit.getConfig());
    job.setJobName(this.getClass().getSimpleName() + "-job");

    job.setNumReduceTasks(1);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(CountMapReduce.CountMapper.class);
    job.setReducerClass(CountMapReduce.CountReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    TextInputFormat.addInputPath(job, new Path(inputPath));
    TextOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setSpeculativeExecution(false);
    job.setMaxMapAttempts(1);
    return job.waitForCompletion(true);
}

开发者ID:intropro，项目名称:prairie，代码行数:23，代码来源:YarnBenchmarks.java

示例8: makeJob

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public static Job makeJob(Configuration conf, Path in, Path out, String matchPath, long scanSince, 
		String chlorineConfigFilePath, String queue, String maskPath) throws IOException {
	conf.setBoolean("mapred.output.compress", false);
	conf.setLong("scanSince", scanSince);
	conf.set("matchPath", matchPath);
	conf.set("maskPath", maskPath);
	conf.set("inputPath", in.toString());
	if (queue != null) {
		conf.set("mapred.job.queue.name", queue);
	}
	conf.set("fs.permissions.umask-mode", 
			"007");
	conf.setInt("input_path_depth", in.depth());
	Job job = Job.getInstance(conf, "Chlorine_HDFS_Scan");
	job.setJarByClass(HDFSScanMR.class);
	if (chlorineConfigFilePath != null) {
		try {
			job.addCacheFile(new URI(chlorineConfigFilePath));
			conf.set("finder_file", (new File(chlorineConfigFilePath)).getName());
		} catch (URISyntaxException e) {
			LOG.error(e);
		}
	}
	job.setMapperClass(DeepScanMapper.class);
	job.setNumReduceTasks(0);
	job.setInputFormatClass(TextInputFormat.class);
	TextInputFormat.addInputPath(job, in);
	TextInputFormat.setInputDirRecursive(job, true);
	TextInputFormat.setInputPathFilter(job, NewFilesFilter.class);
	FileOutputFormat.setOutputPath(job, out);
	LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); 
	return job;
}

开发者ID:dataApps，项目名称:chlorine-hadoop，代码行数:34，代码来源:HDFSScanMR.java

示例9: run

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
  Opts opts = new Opts();
  opts.parseArgs(getClass().getName(), args);

  Job job = Job.getInstance(getConf());
  job.setJobName(getClass().getSimpleName());
  job.setJarByClass(getClass());

  opts.setAccumuloConfigs(job);
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputFormatClass(AccumuloOutputFormat.class);

  job.setMapperClass(NGramMapper.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Mutation.class);

  job.setNumReduceTasks(0);
  job.setSpeculativeExecution(false);

  if (!opts.getConnector().tableOperations().exists(opts.getTableName())) {
    log.info("Creating table " + opts.getTableName());
    opts.getConnector().tableOperations().create(opts.getTableName());
    SortedSet<Text> splits = new TreeSet<>();
    String numbers[] = "1 2 3 4 5 6 7 8 9".split("\\s");
    String lower[] = "a b c d e f g h i j k l m n o p q r s t u v w x y z".split("\\s");
    String upper[] = "A B C D E F G H I J K L M N O P Q R S T U V W X Y Z".split("\\s");
    for (String[] array : new String[][] {numbers, lower, upper}) {
      for (String s : array) {
        splits.add(new Text(s));
      }
    }
    opts.getConnector().tableOperations().addSplits(opts.getTableName(), splits);
  }

  TextInputFormat.addInputPath(job, new Path(opts.inputDirectory));
  job.waitForCompletion(true);
  return job.isSuccessful() ? 0 : 1;
}

开发者ID:apache，项目名称:accumulo-examples，代码行数:40，代码来源:NGramIngest.java

示例10: configs

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Parameters
public static Collection<Object[]> configs() throws IOException {
    Configuration conf = HdpBootstrap.hadoopConfig();
    HadoopCfgUtils.setGenericOptions(conf);

    Job job = new Job(conf);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(EsOutputFormat.class);
    job.setMapOutputValueClass(LinkedMapWritable.class);
    job.setMapperClass(TabMapper.class);
    job.setNumReduceTasks(0);


    Job standard = new Job(job.getConfiguration());
    File fl = new File(TestUtils.sampleArtistsDat());
    long splitSize = fl.length() / 3;
    TextInputFormat.setMaxInputSplitSize(standard, splitSize);
    TextInputFormat.setMinInputSplitSize(standard, 50);

    standard.setMapperClass(TabMapper.class);
    standard.setMapOutputValueClass(LinkedMapWritable.class);
    TextInputFormat.addInputPath(standard, new Path(TestUtils.sampleArtistsDat(conf)));

    Job json = new Job(job.getConfiguration());
    json.setMapperClass(Mapper.class);
    json.setMapOutputValueClass(Text.class);
    json.getConfiguration().set(ConfigurationOptions.ES_INPUT_JSON, "true");
    TextInputFormat.addInputPath(json, new Path(TestUtils.sampleArtistsJson(conf)));

    return Arrays.asList(new Object[][] {
            { standard, "" },
            { json, "json-" } });
}

开发者ID:xushjie1987，项目名称:es-hadoop-v2.2.0，代码行数:34，代码来源:AbstractMRNewApiSaveTest.java

示例11: main

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
	if (args.length < 2) {
		System.err.println("Usage: WordCount <input path> <result path>");
		return;
	}

	final String inputPath = args[0];
	final String outputPath = args[1];

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// Set up the Hadoop Input Format
	Job job = Job.getInstance();
	HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
	TextInputFormat.addInputPath(job, new Path(inputPath));

	// Create a Flink job with it
	DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);

	// Tokenize the line and convert from Writable "Text" to String for better handling
	DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());

	// Sum up the words
	DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);

	// Convert String back to Writable "Text" for use with Hadoop Output Format
	DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());

	// Set up Hadoop Output Format
	HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
	hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
	hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
	TextOutputFormat.setOutputPath(job, new Path(outputPath));

	// Output & Execute
	hadoopResult.output(hadoopOutputFormat);
	env.execute("Word Count");
}

开发者ID:axbaretto，项目名称:flink，代码行数:39，代码来源:WordCount.java

示例12: run

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public int run(String args[]) throws Exception {
    IndexConfig config = new IndexConfig();
    config.fromArray(args);

    // job
    Job job = Job.getInstance(getConf());
    job.setJobName("index");
    job.setJarByClass(IndexDriver.class);

    Path inputPath = new Path(config.getInput());
    Path outputPath = new Path(config.getOutput());
    Path remoteIndexPath = new Path(config.getRemoteIndex());

    // set mapper
    job.getConfiguration().set(IndexMapper.PREVIOUS_SORT_OUTPUT_CONFIG_NAME, config.getPreviousSortOutput());
    job.setMapperClass(IndexMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, inputPath);

    // set the reducer
    job.getConfiguration().set(IndexReducer.LOCAL_INDEX_CONFIG_NAME, config.getLocalIndex());
    job.getConfiguration().set(IndexReducer.REMOTE_INDEX_CONFIG_NAME, remoteIndexPath.toString());
    job.setNumReduceTasks(NUM_REDUCER);
    job.setReducerClass(IndexReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    TextOutputFormat.setOutputPath(job, outputPath);


    // clean up the old output path
    outputPath.getFileSystem(job.getConfiguration()).delete(outputPath, true);
    // create the folder for remote index
    remoteIndexPath.getFileSystem(job.getConfiguration()).mkdirs(remoteIndexPath);

    // run the job and wait until it complete
    return job.waitForCompletion(true) ? 0 : 1;
}

开发者ID:at15，项目名称:tree-index，代码行数:41，代码来源:IndexDriver.java

示例13: run

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    Configuration reduceConf = new Configuration(false);
    Configuration mapConf = new Configuration(false);
    Job job = Job.getInstance(conf, "correlate logs");
    job.setJarByClass(CorrelateLogs.class);    
            
    Scan scan = new Scan();
    scan.setCaching(500);
    scan.setCacheBlocks(false);
    scan.addFamily(Bytes.toBytes("struct"));
    TableMapReduceUtil.initTableMapperJob(args[0], scan, HBaseMapper.class, Text.class, LongWritable.class, job);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);       
    
    job.setNumReduceTasks(1);
    
    ChainReducer.setReducer(job, HBaseReducer.class, Text.class, LongWritable.class,
    		Text.class, LongPairWritable.class, reduceConf);
    ChainReducer.addMapper(job, AggregateMapper.class, Text.class, LongPairWritable.class, Text.class, DoubleWritable.class, mapConf);
    
    
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    TextInputFormat.addInputPath(job, new Path(args[0]));
    TextOutputFormat.setOutputPath(job, new Path(args[1]));
 
    return job.waitForCompletion(true) ? 0 : 1;
}

开发者ID:hanhanwu，项目名称:Hanhan-HBase-MapReduce-in-Java，代码行数:32，代码来源:CorrelateLogs.java

示例14: run

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    Job job = Job.getInstance(conf, "euler estimator");
    job.setJarByClass(EulerEstimator.class);
 
    job.setInputFormatClass(TextInputFormat.class);
 
    job.setMapperClass(EulerMapper.class);
 
    job.setOutputFormatClass(NullOutputFormat.class);
    TextInputFormat.addInputPath(job, new Path(args[0]));
    
    return job.waitForCompletion(true) ? 0 : 1;
}

开发者ID:hanhanwu，项目名称:Hanhan-Hadoop-MapReduce，代码行数:16，代码来源:EulerEstimator.java

示例15: initialiseInput

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
private void initialiseInput(final Job job, final MapReduce operation) throws IOException {
    job.setInputFormatClass(TextInputFormat.class);

    for (final Map.Entry<String, String> entry : operation.getInputMapperPairs().entrySet()) {
        if (entry.getValue().contains(job.getConfiguration().get(MAPPER_GENERATOR))) {
            TextInputFormat.addInputPath(job, new Path(entry.getKey()));
        }
    }
}

开发者ID:gchq，项目名称:Gaffer，代码行数:10，代码来源:TextJobInitialiser.java

注：本文中的org.apache.hadoop.mapreduce.lib.input.TextInputFormat.addInputPath方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。