本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.TextInputFormat.setInputPaths方法的典型用法代码示例。如果您正苦于以下问题:Java TextInputFormat.setInputPaths方法的具体用法?Java TextInputFormat.setInputPaths怎么用?Java TextInputFormat.setInputPaths使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.input.TextInputFormat
的用法示例。
在下文中一共展示了TextInputFormat.setInputPaths方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setMapperClass(SumMapper.class);
job.setReducerClass(SumReducer.class);
job.setJarByClass(Sum.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
TextInputFormat.setInputPaths(job, new Path(args[0]));
TextOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
示例2: main
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setMapperClass(DataDividerMapper.class);
job.setReducerClass(DataDividerReducer.class);
job.setJarByClass(DataDividerByUser.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
TextInputFormat.setInputPaths(job, new Path(args[0]));
TextOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
示例3: main
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setMapperClass(NormalizeMapper.class);
job.setReducerClass(NormalizeReducer.class);
job.setJarByClass(Normalize.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
TextInputFormat.setInputPaths(job, new Path(args[0]));
TextOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
示例4: run
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
Opts opts = new Opts();
opts.parseArgs(WordCount.class.getName(), args);
Job job = Job.getInstance(getConf());
job.setJobName(WordCount.class.getName());
job.setJarByClass(this.getClass());
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.setInputPaths(job, new Path(opts.inputDirectory));
job.setMapperClass(MapClass.class);
job.setNumReduceTasks(0);
job.setOutputFormatClass(AccumuloOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Mutation.class);
opts.setAccumuloConfigs(job);
job.waitForCompletion(true);
return 0;
}
示例5: run
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
String instance = args[0];
String zookeepers = args[1];
String user = args[2];
String tokenFile = args[3];
String input = args[4];
String tableName = args[5];
Job job = Job.getInstance(getConf());
job.setJobName(TokenFileWordCount.class.getName());
job.setJarByClass(this.getClass());
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.setInputPaths(job, input);
job.setMapperClass(MapClass.class);
job.setNumReduceTasks(0);
job.setOutputFormatClass(AccumuloOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Mutation.class);
// AccumuloInputFormat not used here, but it uses the same functions.
AccumuloOutputFormat.setZooKeeperInstance(job, ClientConfiguration.loadDefault().withInstance(instance).withZkHosts(zookeepers));
AccumuloOutputFormat.setConnectorInfo(job, user, tokenFile);
AccumuloOutputFormat.setCreateTables(job, true);
AccumuloOutputFormat.setDefaultTableName(job, tableName);
job.waitForCompletion(true);
return 0;
}
示例6: run
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
MetaConfig config = new MetaConfig();
config.fromArray(args);
// LOGGER.info(config.getInput());
// LOGGER.info(config.getOutput());
Job job = Job.getInstance(getConf());
job.setJobName("meta");
job.setJarByClass(MetaDriver.class);
// define the path
Path mapInputPath = new Path(config.getInput());
Path mapOutputPath = new Path(config.getOutput());
// define the mapper
job.setMapperClass(MetaMapper.class);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.setInputPaths(job, mapInputPath);
// define the reducer, identity reducer
job.setNumReduceTasks(NUM_REDUCER);
// output
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
TextOutputFormat.setOutputPath(job, mapOutputPath);
// clean the old output
mapOutputPath.getFileSystem(job.getConfiguration()).delete(mapOutputPath, true);
// run the job and wait until it complete
return job.waitForCompletion(true) ? 0 : 1;
}
示例7: run
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
PreSortConfig config = new PreSortConfig();
config.fromArray(args);
Job job = Job.getInstance(getConf());
job.setJobName("pre-sort");
job.setJarByClass(PreSortDriver.class);
Path mapInputPath = new Path(config.getInput());
Path mapOutputPath = new Path(config.getOutput());
LOGGER.info("use " + mapInputPath.toString() + " as pre-sort input ");
LOGGER.info("use " + mapOutputPath.toString() + " as pre-sort output ");
// define the mapper
job.getConfiguration().set(PreSortMapper.COLUMN_INDEX_CONFIG_NAME, config.getKeyColumnAsString());
job.setMapperClass(PreSortMapper.class);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.setInputPaths(job, mapInputPath);
// define reducer
job.setNumReduceTasks(NUM_REDUCER);
// define the output, NOTE: we do not have reducer
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputPath(job, mapOutputPath);
// clean up the output folder
mapOutputPath.getFileSystem(job.getConfiguration()).delete(mapOutputPath, true);
// run the job and wait until it complete
return job.waitForCompletion(true) ? 0 : 1;
}
示例8: run
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 3) {
System.err.println("Usage: Tester <input> <bloom filter> <output>");
System.exit(1);
}
Path input = new Path(args[0]);
URI bloom = new URI(args[1]);
Path output = new Path(args[2]);
// TODO create the Job object, and set the jar by class
Job job = Job.getInstance(getConf(), "Bloom Filtering");
job.setJarByClass(MRBloomFilter.class);
// TODO add the Bloom URI file to the cache
job.addCacheFile(bloom);
// TODO set the mapper class
job.setMapperClass(BloomMapper.class);
// TODO set the number of reduce tasks to 0
job.setNumReduceTasks(0);
// TODO set the input paths
TextInputFormat.setInputPaths(job, input);
// TODO set the output paths
TextOutputFormat.setOutputPath(job, output);
// TODO set the output key class to Text
job.setOutputKeyClass(Text.class);
// TODO set the output value class to NullWritable
job.setOutputValueClass(NullWritable.class);
// TODO execute the job via wait for completion and return 0 if successful
return job.waitForCompletion(true) ? 0 : 1;
}
示例9: run
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
String input = args[0];
Configuration conf = getConf();
Job job = Job.getInstance(conf, Mapper2HbaseDemo.class.getSimpleName());
job.setJarByClass(Mapper2HbaseDemo.class);
job.setMapperClass(Map.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.setInputPaths(job, input);
job.setOutputFormatClass(NullOutputFormat.class);
return job.waitForCompletion(true) ? 0 : 1;
}
示例10: main
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {
// configure n-gram mapreduce job
Configuration conf1 = new Configuration();
conf1.set("textinputformat.record.delimiter", "."); // read a complete sentence as a line
conf1.set("GRAM_NUMBER", args[2]);
Job job1 = Job.getInstance(conf1);
job1.setNumReduceTasks(3);
job1.setJobName("NGram");
job1.setJarByClass(Dispatcher.class);
job1.setMapperClass(NGramBuilder.NGramMapper.class);
job1.setReducerClass(NGramBuilder.NGramReducer.class);
job1.setOutputKeyClass(Text.class);
job1.setOutputValueClass(IntWritable.class);
job1.setInputFormatClass(TextInputFormat.class); // default format: reads lines of text files
job1.setOutputFormatClass(TextOutputFormat.class); // default format: key \t value
TextInputFormat.setInputPaths(job1, new Path(args[0]));
TextOutputFormat.setOutputPath(job1, new Path(args[1]));
job1.waitForCompletion(true); // language model won't start to build until the n-gram library completely built
// configure language model mapreduce job
Configuration conf2 = new Configuration();
conf2.set("THRESHOLD", args[3]);
conf2.set("TOP_K", args[4]);
DBConfiguration.configureDB(conf2, "com.mysql.jdbc.Driver", "jdbc:mysql://127.0.0.1:3306/tp", "root", "123456"); // establish connection with mySQL database
Job job2 = Job.getInstance(conf2);
job2.setNumReduceTasks(3);
job2.setJobName("LModel");
job2.setJarByClass(Dispatcher.class);
job2.addArchiveToClassPath(new Path("/mysql/mysql-connector-java-5.1.39-bin.jar")); // putting this jar file into jre/lib/ext is recommended
job2.setMapperClass(LanguageModel.ModelMapper.class);
job2.setReducerClass(LanguageModel.ModelReducer.class);
job2.setMapOutputKeyClass(Text.class); // Mapper emits different key type than the Reducer
job2.setMapOutputValueClass(Text.class); // Mapper emits different value type than the Reducer
job2.setOutputKeyClass(DBOutputWritable.class);
job2.setOutputValueClass(NullWritable.class);
job2.setInputFormatClass(TextInputFormat.class);
job2.setOutputFormatClass(DBOutputFormat.class);
TextInputFormat.setInputPaths(job2, new Path(args[1]));
DBOutputFormat.setOutput(job2, "LanguageModel", new String[] {"starter", "follower", "probability"});
System.exit(job2.waitForCompletion(true) ? 0 : 1);
}
示例11: testCombiner
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Test
public void testCombiner() throws Exception {
if (!new File(TEST_ROOT_DIR).mkdirs()) {
throw new RuntimeException("Could not create test dir: " + TEST_ROOT_DIR);
}
File in = new File(TEST_ROOT_DIR, "input");
if (!in.mkdirs()) {
throw new RuntimeException("Could not create test dir: " + in);
}
File out = new File(TEST_ROOT_DIR, "output");
PrintWriter pw = new PrintWriter(new FileWriter(new File(in, "data.txt")));
pw.println("A|a,1");
pw.println("A|b,2");
pw.println("B|a,3");
pw.println("B|b,4");
pw.println("B|c,5");
pw.close();
JobConf conf = new JobConf();
conf.set("mapreduce.framework.name", "local");
Job job = new Job(conf);
TextInputFormat.setInputPaths(job, new Path(in.getPath()));
TextOutputFormat.setOutputPath(job, new Path(out.getPath()));
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setGroupingComparatorClass(GroupComparator.class);
job.setCombinerKeyGroupingComparatorClass(GroupComparator.class);
job.setCombinerClass(Combiner.class);
job.getConfiguration().setInt("min.num.spills.for.combine", 0);
job.submit();
job.waitForCompletion(false);
if (job.isSuccessful()) {
Counters counters = job.getCounters();
long combinerInputRecords = counters.findCounter(
"org.apache.hadoop.mapreduce.TaskCounter",
"COMBINE_INPUT_RECORDS").getValue();
long combinerOutputRecords = counters.findCounter(
"org.apache.hadoop.mapreduce.TaskCounter",
"COMBINE_OUTPUT_RECORDS").getValue();
Assert.assertTrue(combinerInputRecords > 0);
Assert.assertTrue(combinerInputRecords > combinerOutputRecords);
BufferedReader br = new BufferedReader(new FileReader(
new File(out, "part-r-00000")));
Set<String> output = new HashSet<String>();
String line = br.readLine();
Assert.assertNotNull(line);
output.add(line.substring(0, 1) + line.substring(4, 5));
line = br.readLine();
Assert.assertNotNull(line);
output.add(line.substring(0, 1) + line.substring(4, 5));
line = br.readLine();
Assert.assertNull(line);
br.close();
Set<String> expected = new HashSet<String>();
expected.add("A2");
expected.add("B5");
Assert.assertEquals(expected, output);
} else {
Assert.fail("Job failed");
}
}
示例12: main
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setMapperClass(MatrixGeneratorMapper.class);
job.setReducerClass(MatrixGeneratorReducer.class);
job.setJarByClass(CoOccurrenceMatrixGenerator.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
TextInputFormat.setInputPaths(job, new Path(args[0]));
TextOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
示例13: main
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {
Configuration conf1 = new Configuration();
conf1.set("textinputformat.record.delimiter", ".");
conf1.set("noGram", args[2]);
//First Job
Job job1 = Job.getInstance(conf1);
job1.setJobName("NGram");
job1.setJarByClass(Driver.class);
job1.setMapperClass(NGramLibraryBuilder.NGramMapper.class);
job1.setReducerClass(NGramLibraryBuilder.NGramReducer.class);
job1.setOutputKeyClass(Text.class);
job1.setOutputValueClass(IntWritable.class);
job1.setInputFormatClass(TextInputFormat.class);
job1.setOutputFormatClass(TextOutputFormat.class);
TextInputFormat.setInputPaths(job1, new Path(args[0]));
TextOutputFormat.setOutputPath(job1, new Path(args[1]));
job1.waitForCompletion(true);
//Second Job
Configuration conf2 = new Configuration();
conf2.set("threashold", args[3]);
conf2.set("n", args[4]);
DBConfiguration.configureDB(conf2,
"com.mysql.jdbc.Driver", // driver class
"jdbc:mysql://10.101.0.163:8889/test", //
"root", // user name
"root"); //password
Job job2 = Job.getInstance(conf2);
job2.setJobName("LanguageModel");
job2.setJarByClass(Driver.class);
job2.addArchiveToClassPath(new Path("/mysql/mysql-connector-java-5.1.39-bin.jar"));
job2.setMapOutputKeyClass(Text.class);
job2.setMapOutputValueClass(Text.class);
job2.setOutputKeyClass(Text.class);
job2.setOutputValueClass(NullWritable.class);
job2.setMapperClass(LanguageModel.Map.class);
job2.setReducerClass(LanguageModel.Reduce.class);
job2.setInputFormatClass(TextInputFormat.class);
job2.setOutputFormatClass(DBOutputFormat.class);
DBOutputFormat.setOutput(
job2,
"output", // output table name
new String[] { "starting_phrase", "following_word", "count" } //table columns
);
//Path name for this job should match first job's output path name
TextInputFormat.setInputPaths(job2, new Path(args[1]));
System.exit(job2.waitForCompletion(true)?0:1);
}
示例14: run
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) {
Opts opts = new Opts();
opts.parseArgs(BulkIngestExample.class.getName(), args);
Configuration conf = getConf();
PrintStream out = null;
try {
Job job = Job.getInstance(conf);
job.setJobName("bulk ingest example");
job.setJarByClass(this.getClass());
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(MapClass.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(ReduceClass.class);
job.setOutputFormatClass(AccumuloFileOutputFormat.class);
opts.setAccumuloConfigs(job);
Connector connector = opts.getConnector();
TextInputFormat.setInputPaths(job, new Path(opts.inputDir));
AccumuloFileOutputFormat.setOutputPath(job, new Path(opts.workDir + "/files"));
FileSystem fs = FileSystem.get(conf);
out = new PrintStream(new BufferedOutputStream(fs.create(new Path(opts.workDir + "/splits.txt"))));
Collection<Text> splits = connector.tableOperations().listSplits(opts.getTableName(), 100);
for (Text split : splits)
out.println(Base64.getEncoder().encodeToString(TextUtil.getBytes(split)));
job.setNumReduceTasks(splits.size() + 1);
out.close();
job.setPartitionerClass(RangePartitioner.class);
RangePartitioner.setSplitFile(job, opts.workDir + "/splits.txt");
job.waitForCompletion(true);
Path failures = new Path(opts.workDir, "failures");
fs.delete(failures, true);
fs.mkdirs(new Path(opts.workDir, "failures"));
// With HDFS permissions on, we need to make sure the Accumulo user can read/move the rfiles
FsShell fsShell = new FsShell(conf);
fsShell.run(new String[] {"-chmod", "-R", "777", opts.workDir});
connector.tableOperations().importDirectory(opts.getTableName(), opts.workDir + "/files", opts.workDir + "/failures", false);
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
if (out != null)
out.close();
}
return 0;
}
示例15: testCombiner
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; //导入方法依赖的package包/类
public void testCombiner() throws Exception {
if (!new File(TEST_ROOT_DIR).mkdirs()) {
throw new RuntimeException("Could not create test dir: " + TEST_ROOT_DIR);
}
File in = new File(TEST_ROOT_DIR, "input");
if (!in.mkdirs()) {
throw new RuntimeException("Could not create test dir: " + in);
}
File out = new File(TEST_ROOT_DIR, "output");
PrintWriter pw = new PrintWriter(new FileWriter(new File(in, "data.txt")));
pw.println("A|a,1");
pw.println("A|b,2");
pw.println("B|a,3");
pw.println("B|b,4");
pw.println("B|c,5");
pw.close();
Job job = new Job();
TextInputFormat.setInputPaths(job, new Path(in.getPath()));
TextOutputFormat.setOutputPath(job, new Path(out.getPath()));
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setGroupingComparatorClass(GroupComparator.class);
job.setCombinerKeyGroupingComparatorClass(GroupComparator.class);
job.setCombinerClass(Combiner.class);
job.getConfiguration().setInt("min.num.spills.for.combine", 0);
job.submit();
job.waitForCompletion(false);
if (job.isSuccessful()) {
Counters counters = job.getCounters();
long combinerInputRecords = counters.findCounter(
"org.apache.hadoop.mapreduce.TaskCounter",
"COMBINE_INPUT_RECORDS").getValue();
long combinerOutputRecords = counters.findCounter(
"org.apache.hadoop.mapreduce.TaskCounter",
"COMBINE_OUTPUT_RECORDS").getValue();
Assert.assertTrue(combinerInputRecords > 0);
Assert.assertTrue(combinerInputRecords > combinerOutputRecords);
BufferedReader br = new BufferedReader(new FileReader(
new File(out, "part-r-00000")));
Set<String> output = new HashSet<String>();
String line = br.readLine();
Assert.assertNotNull(line);
output.add(line);
line = br.readLine();
Assert.assertNotNull(line);
output.add(line);
line = br.readLine();
Assert.assertNull(line);
br.close();
Set<String> expected = new HashSet<String>();
expected.add("A\t2");
expected.add("B\t5");
Assert.assertEquals(expected, output);
} else {
Assert.fail("Job failed");
}
}