本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.TextInputFormat.addInputPath方法的典型用法代码示例。如果您正苦于以下问题:Java TextInputFormat.addInputPath方法的具体用法?Java TextInputFormat.addInputPath怎么用?Java TextInputFormat.addInputPath使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.input.TextInputFormat
的用法示例。
在下文中一共展示了TextInputFormat.addInputPath方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public static void main(String [] args) throws Exception
{
Path outDir = new Path("output");
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "user name check");
job.setJarByClass(UserNamePermission.class);
job.setMapperClass(UserNamePermission.UserNameMapper.class);
job.setCombinerClass(UserNamePermission.UserNameReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(UserNamePermission.UserNameReducer.class);
job.setNumReduceTasks(1);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job, new Path("input"));
FileOutputFormat.setOutputPath(job, outDir);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
示例2: main
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length < 2) {
System.err.println("Usage: WordCount <input path> <result path>");
return;
}
final String inputPath = args[0];
final String outputPath = args[1];
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// Set up the Hadoop Input Format
Job job = Job.getInstance();
HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
TextInputFormat.addInputPath(job, new Path(inputPath));
// Create a Flink job with it
DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
// Tokenize the line and convert from Writable "Text" to String for better handling
DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());
// Sum up the words
DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);
// Convert String back to Writable "Text" for use with Hadoop Output Format
DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());
// Set up Hadoop Output Format
HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
TextOutputFormat.setOutputPath(job, new Path(outputPath));
// Output & Execute
hadoopResult.output(hadoopOutputFormat);
env.execute("Word Count");
}
示例3: main
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public static void main(String [] args) throws Exception
{
Path outDir = new Path("output");
Configuration conf = new Configuration();
Job job = new Job(conf, "user name check");
job.setJarByClass(UserNamePermission.class);
job.setMapperClass(UserNamePermission.UserNameMapper.class);
job.setCombinerClass(UserNamePermission.UserNameReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(UserNamePermission.UserNameReducer.class);
job.setNumReduceTasks(1);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job, new Path("input"));
FileOutputFormat.setOutputPath(job, outDir);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
示例4: run
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
Job job = Job.getInstance(conf, "loadlogs mr");
job.setJarByClass(LoadLogsMR.class);
job.setInputFormatClass(TextInputFormat.class);
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initTableReducerJob(args[2], LoadLogsReducer.class, job);
job.setNumReduceTasks(3);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
TextInputFormat.addInputPath(job, new Path(args[0]));
TextInputFormat.addInputPath(job, new Path(args[1]));
TextOutputFormat.setOutputPath(job, new Path(args[2]));
return job.waitForCompletion(true) ? 0 : 1;
}
示例5: run
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
Job job = Job.getInstance(conf, "reddit average");
job.setJarByClass(RedditAverage.class);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(RedditMapper.class);
job.setCombinerClass(RedditCombiner.class);
job.setReducerClass(RedditReducer.class);
job.setMapOutputValueClass(LongPairWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextInputFormat.addInputPath(job, new Path(args[0]));
TextInputFormat.addInputPath(job, new Path(args[1]));
TextOutputFormat.setOutputPath(job, new Path(args[2]));
return job.waitForCompletion(true) ? 0 : 1;
}
示例6: run
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(WordCountImproved.class);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(LongSumReducer.class);
job.setReducerClass(LongSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextInputFormat.addInputPath(job, new Path(args[0]));
TextOutputFormat.setOutputPath(job, new Path(args[1]));
return job.waitForCompletion(true) ? 0 : 1;
}
示例7: createAndSubmitJob
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public boolean createAndSubmitJob() throws IOException, ClassNotFoundException, InterruptedException {
Job job = Job.getInstance(yarnUnit.getConfig());
job.setJobName(this.getClass().getSimpleName() + "-job");
job.setNumReduceTasks(1);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(CountMapReduce.CountMapper.class);
job.setReducerClass(CountMapReduce.CountReducer.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextInputFormat.addInputPath(job, new Path(inputPath));
TextOutputFormat.setOutputPath(job, new Path(outputPath));
job.setSpeculativeExecution(false);
job.setMaxMapAttempts(1);
return job.waitForCompletion(true);
}
示例8: makeJob
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public static Job makeJob(Configuration conf, Path in, Path out, String matchPath, long scanSince,
String chlorineConfigFilePath, String queue, String maskPath) throws IOException {
conf.setBoolean("mapred.output.compress", false);
conf.setLong("scanSince", scanSince);
conf.set("matchPath", matchPath);
conf.set("maskPath", maskPath);
conf.set("inputPath", in.toString());
if (queue != null) {
conf.set("mapred.job.queue.name", queue);
}
conf.set("fs.permissions.umask-mode",
"007");
conf.setInt("input_path_depth", in.depth());
Job job = Job.getInstance(conf, "Chlorine_HDFS_Scan");
job.setJarByClass(HDFSScanMR.class);
if (chlorineConfigFilePath != null) {
try {
job.addCacheFile(new URI(chlorineConfigFilePath));
conf.set("finder_file", (new File(chlorineConfigFilePath)).getName());
} catch (URISyntaxException e) {
LOG.error(e);
}
}
job.setMapperClass(DeepScanMapper.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job, in);
TextInputFormat.setInputDirRecursive(job, true);
TextInputFormat.setInputPathFilter(job, NewFilesFilter.class);
FileOutputFormat.setOutputPath(job, out);
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
return job;
}
示例9: run
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
Opts opts = new Opts();
opts.parseArgs(getClass().getName(), args);
Job job = Job.getInstance(getConf());
job.setJobName(getClass().getSimpleName());
job.setJarByClass(getClass());
opts.setAccumuloConfigs(job);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(AccumuloOutputFormat.class);
job.setMapperClass(NGramMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Mutation.class);
job.setNumReduceTasks(0);
job.setSpeculativeExecution(false);
if (!opts.getConnector().tableOperations().exists(opts.getTableName())) {
log.info("Creating table " + opts.getTableName());
opts.getConnector().tableOperations().create(opts.getTableName());
SortedSet<Text> splits = new TreeSet<>();
String numbers[] = "1 2 3 4 5 6 7 8 9".split("\\s");
String lower[] = "a b c d e f g h i j k l m n o p q r s t u v w x y z".split("\\s");
String upper[] = "A B C D E F G H I J K L M N O P Q R S T U V W X Y Z".split("\\s");
for (String[] array : new String[][] {numbers, lower, upper}) {
for (String s : array) {
splits.add(new Text(s));
}
}
opts.getConnector().tableOperations().addSplits(opts.getTableName(), splits);
}
TextInputFormat.addInputPath(job, new Path(opts.inputDirectory));
job.waitForCompletion(true);
return job.isSuccessful() ? 0 : 1;
}
示例10: configs
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Parameters
public static Collection<Object[]> configs() throws IOException {
Configuration conf = HdpBootstrap.hadoopConfig();
HadoopCfgUtils.setGenericOptions(conf);
Job job = new Job(conf);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(EsOutputFormat.class);
job.setMapOutputValueClass(LinkedMapWritable.class);
job.setMapperClass(TabMapper.class);
job.setNumReduceTasks(0);
Job standard = new Job(job.getConfiguration());
File fl = new File(TestUtils.sampleArtistsDat());
long splitSize = fl.length() / 3;
TextInputFormat.setMaxInputSplitSize(standard, splitSize);
TextInputFormat.setMinInputSplitSize(standard, 50);
standard.setMapperClass(TabMapper.class);
standard.setMapOutputValueClass(LinkedMapWritable.class);
TextInputFormat.addInputPath(standard, new Path(TestUtils.sampleArtistsDat(conf)));
Job json = new Job(job.getConfiguration());
json.setMapperClass(Mapper.class);
json.setMapOutputValueClass(Text.class);
json.getConfiguration().set(ConfigurationOptions.ES_INPUT_JSON, "true");
TextInputFormat.addInputPath(json, new Path(TestUtils.sampleArtistsJson(conf)));
return Arrays.asList(new Object[][] {
{ standard, "" },
{ json, "json-" } });
}
示例11: main
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length < 2) {
System.err.println("Usage: WordCount <input path> <result path>");
return;
}
final String inputPath = args[0];
final String outputPath = args[1];
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// Set up the Hadoop Input Format
Job job = Job.getInstance();
HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
TextInputFormat.addInputPath(job, new Path(inputPath));
// Create a Flink job with it
DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
// Tokenize the line and convert from Writable "Text" to String for better handling
DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());
// Sum up the words
DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);
// Convert String back to Writable "Text" for use with Hadoop Output Format
DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());
// Set up Hadoop Output Format
HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
TextOutputFormat.setOutputPath(job, new Path(outputPath));
// Output & Execute
hadoopResult.output(hadoopOutputFormat);
env.execute("Word Count");
}
示例12: run
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public int run(String args[]) throws Exception {
IndexConfig config = new IndexConfig();
config.fromArray(args);
// job
Job job = Job.getInstance(getConf());
job.setJobName("index");
job.setJarByClass(IndexDriver.class);
Path inputPath = new Path(config.getInput());
Path outputPath = new Path(config.getOutput());
Path remoteIndexPath = new Path(config.getRemoteIndex());
// set mapper
job.getConfiguration().set(IndexMapper.PREVIOUS_SORT_OUTPUT_CONFIG_NAME, config.getPreviousSortOutput());
job.setMapperClass(IndexMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job, inputPath);
// set the reducer
job.getConfiguration().set(IndexReducer.LOCAL_INDEX_CONFIG_NAME, config.getLocalIndex());
job.getConfiguration().set(IndexReducer.REMOTE_INDEX_CONFIG_NAME, remoteIndexPath.toString());
job.setNumReduceTasks(NUM_REDUCER);
job.setReducerClass(IndexReducer.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
TextOutputFormat.setOutputPath(job, outputPath);
// clean up the old output path
outputPath.getFileSystem(job.getConfiguration()).delete(outputPath, true);
// create the folder for remote index
remoteIndexPath.getFileSystem(job.getConfiguration()).mkdirs(remoteIndexPath);
// run the job and wait until it complete
return job.waitForCompletion(true) ? 0 : 1;
}
示例13: run
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
Configuration reduceConf = new Configuration(false);
Configuration mapConf = new Configuration(false);
Job job = Job.getInstance(conf, "correlate logs");
job.setJarByClass(CorrelateLogs.class);
Scan scan = new Scan();
scan.setCaching(500);
scan.setCacheBlocks(false);
scan.addFamily(Bytes.toBytes("struct"));
TableMapReduceUtil.initTableMapperJob(args[0], scan, HBaseMapper.class, Text.class, LongWritable.class, job);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setNumReduceTasks(1);
ChainReducer.setReducer(job, HBaseReducer.class, Text.class, LongWritable.class,
Text.class, LongPairWritable.class, reduceConf);
ChainReducer.addMapper(job, AggregateMapper.class, Text.class, LongPairWritable.class, Text.class, DoubleWritable.class, mapConf);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextInputFormat.addInputPath(job, new Path(args[0]));
TextOutputFormat.setOutputPath(job, new Path(args[1]));
return job.waitForCompletion(true) ? 0 : 1;
}
示例14: run
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
Job job = Job.getInstance(conf, "euler estimator");
job.setJarByClass(EulerEstimator.class);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(EulerMapper.class);
job.setOutputFormatClass(NullOutputFormat.class);
TextInputFormat.addInputPath(job, new Path(args[0]));
return job.waitForCompletion(true) ? 0 : 1;
}
示例15: initialiseInput
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
private void initialiseInput(final Job job, final MapReduce operation) throws IOException {
job.setInputFormatClass(TextInputFormat.class);
for (final Map.Entry<String, String> entry : operation.getInputMapperPairs().entrySet()) {
if (entry.getValue().contains(job.getConfiguration().get(MAPPER_GENERATOR))) {
TextInputFormat.addInputPath(job, new Path(entry.getKey()));
}
}
}