本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.map.RegexMapper类的典型用法代码示例。如果您正苦于以下问题:Java RegexMapper类的具体用法?Java RegexMapper怎么用?Java RegexMapper使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
RegexMapper类属于org.apache.hadoop.mapreduce.lib.map包,在下文中一共展示了RegexMapper类的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: doAnalyze
import org.apache.hadoop.mapreduce.lib.map.RegexMapper; //导入依赖的package包/类
/**
* doAnalyze:
* @param inputFilesDirectory : Directory containing the files to be analyzed.
* @param outputDirectory : Directory to store analysis (output).
* @param grepPattern : Pattern to *grep* for.
* @param sortColumns : Sort specification for output.
* @param columnSeparator : Column separator.
* @throws IOException
*/
public void
doAnalyze(String inputFilesDirectory, String outputDirectory,
String grepPattern, String sortColumns, String columnSeparator)
throws IOException
{
Path grepInput = new Path(inputFilesDirectory);
Path analysisOutput = null;
if (outputDirectory.equals("")) {
analysisOutput = new Path(inputFilesDirectory, "logalyzer_" +
Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
} else {
analysisOutput = new Path(outputDirectory);
}
JobConf grepJob = new JobConf(fsConfig);
grepJob.setJobName("logalyzer-grep-sort");
FileInputFormat.setInputPaths(grepJob, grepInput);
grepJob.setInputFormat(TextInputFormat.class);
grepJob.setMapperClass(LogRegexMapper.class);
grepJob.set(RegexMapper.PATTERN, grepPattern);
grepJob.set(SORT_COLUMNS, sortColumns);
grepJob.set(COLUMN_SEPARATOR, columnSeparator);
grepJob.setCombinerClass(LongSumReducer.class);
grepJob.setReducerClass(LongSumReducer.class);
FileOutputFormat.setOutputPath(grepJob, analysisOutput);
grepJob.setOutputFormat(TextOutputFormat.class);
grepJob.setOutputKeyClass(Text.class);
grepJob.setOutputValueClass(LongWritable.class);
grepJob.setOutputKeyComparatorClass(LogComparator.class);
grepJob.setNumReduceTasks(1); // write a single file
JobClient.runJob(grepJob);
}
示例2: run
import org.apache.hadoop.mapreduce.lib.map.RegexMapper; //导入依赖的package包/类
public int run(String[] args) throws Exception {
if (args.length < 3) {
System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
ToolRunner.printGenericCommandUsage(System.out);
return 2;
}
Path tempDir =
new Path("grep-temp-"+
Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
Configuration conf = getConf();
conf.set(RegexMapper.PATTERN, args[2]);
if (args.length == 4)
conf.set(RegexMapper.GROUP, args[3]);
Job grepJob = Job.getInstance(conf);
try {
grepJob.setJobName("grep-search");
grepJob.setJarByClass(Grep.class);
FileInputFormat.setInputPaths(grepJob, args[0]);
grepJob.setMapperClass(RegexMapper.class);
grepJob.setCombinerClass(LongSumReducer.class);
grepJob.setReducerClass(LongSumReducer.class);
FileOutputFormat.setOutputPath(grepJob, tempDir);
grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
grepJob.setOutputKeyClass(Text.class);
grepJob.setOutputValueClass(LongWritable.class);
grepJob.waitForCompletion(true);
Job sortJob = Job.getInstance(conf);
sortJob.setJobName("grep-sort");
sortJob.setJarByClass(Grep.class);
FileInputFormat.setInputPaths(sortJob, tempDir);
sortJob.setInputFormatClass(SequenceFileInputFormat.class);
sortJob.setMapperClass(InverseMapper.class);
sortJob.setNumReduceTasks(1); // write a single file
FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
sortJob.setSortComparatorClass( // sort by decreasing freq
LongWritable.DecreasingComparator.class);
sortJob.waitForCompletion(true);
}
finally {
FileSystem.get(conf).delete(tempDir, true);
}
return 0;
}
示例3: configure
import org.apache.hadoop.mapreduce.lib.map.RegexMapper; //导入依赖的package包/类
public void configure(JobConf job) {
pattern = Pattern.compile(job.get(RegexMapper.PATTERN));
}
示例4: run
import org.apache.hadoop.mapreduce.lib.map.RegexMapper; //导入依赖的package包/类
public int run(String[] args) throws Exception {
if (args.length < 3) {
System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
ToolRunner.printGenericCommandUsage(System.out);
return 2;
}
Path tempDir =
new Path("grep-temp-"+
Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
Configuration conf = getConf();
conf.set(RegexMapper.PATTERN, args[2]);
if (args.length == 4)
conf.set(RegexMapper.GROUP, args[3]);
Job grepJob = new Job(conf);
try {
grepJob.setJobName("grep-search");
FileInputFormat.setInputPaths(grepJob, args[0]);
grepJob.setMapperClass(RegexMapper.class);
grepJob.setCombinerClass(LongSumReducer.class);
grepJob.setReducerClass(LongSumReducer.class);
FileOutputFormat.setOutputPath(grepJob, tempDir);
grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
grepJob.setOutputKeyClass(Text.class);
grepJob.setOutputValueClass(LongWritable.class);
grepJob.waitForCompletion(true);
Job sortJob = new Job(conf);
sortJob.setJobName("grep-sort");
FileInputFormat.setInputPaths(sortJob, tempDir);
sortJob.setInputFormatClass(SequenceFileInputFormat.class);
sortJob.setMapperClass(InverseMapper.class);
sortJob.setNumReduceTasks(1); // write a single file
FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
sortJob.setSortComparatorClass( // sort by decreasing freq
LongWritable.DecreasingComparator.class);
sortJob.waitForCompletion(true);
}
finally {
FileSystem.get(conf).delete(tempDir, true);
}
return 0;
}
示例5: run
import org.apache.hadoop.mapreduce.lib.map.RegexMapper; //导入依赖的package包/类
@SuppressWarnings("deprecation")
public int run(String[] args) throws Exception {
long random = new Random().nextLong();
log.info("random -> " + random);
// 第三个参数为抓取的单词目标
args = new String[] { String.format(ConfigUtils.HDFS.WORDCOUNT_IN, "word.txt"), String.format(ConfigUtils.HDFS.WORDCOUNT_OUT, random),"d" };
Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
Configuration conf = getConf();
conf.set(RegexMapper.PATTERN, args[2]);
if (args.length == 4)
conf.set(RegexMapper.GROUP, args[3]);
Job grepJob = new Job(conf);
try {
grepJob.setJobName("grep-search");
FileInputFormat.setInputPaths(grepJob, args[0]);
grepJob.setMapperClass(RegexMapper.class);
grepJob.setCombinerClass(LongSumReducer.class);
grepJob.setReducerClass(LongSumReducer.class);
FileOutputFormat.setOutputPath(grepJob, tempDir);
grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
grepJob.setOutputKeyClass(Text.class);
grepJob.setOutputValueClass(LongWritable.class);
grepJob.waitForCompletion(true);
Job sortJob = new Job(conf);
sortJob.setJobName("grep-sort");
FileInputFormat.setInputPaths(sortJob, tempDir);
sortJob.setInputFormatClass(SequenceFileInputFormat.class);
sortJob.setMapperClass(InverseMapper.class);
sortJob.setNumReduceTasks(1); // write a single file
FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
sortJob.setSortComparatorClass( // sort by decreasing freq
LongWritable.DecreasingComparator.class);
sortJob.waitForCompletion(true);
} finally {
FileSystem.get(conf).delete(tempDir, true);
}
return 0;
}
示例6: run
import org.apache.hadoop.mapreduce.lib.map.RegexMapper; //导入依赖的package包/类
public int run(String[] args) throws Exception {
if (args.length < 3) {
System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
ToolRunner.printGenericCommandUsage(System.out);
return 2;
}
Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
Configuration conf = getConf();
conf.set(RegexMapper.PATTERN, args[2]);
if (args.length == 4)
conf.set(RegexMapper.GROUP, args[3]);
Job grepJob = new Job(conf);
try {
grepJob.setJobName("grep-search");
FileInputFormat.setInputPaths(grepJob, args[0]);
grepJob.setMapperClass(RegexMapper.class);
grepJob.setCombinerClass(LongSumReducer.class);
grepJob.setReducerClass(LongSumReducer.class);
FileOutputFormat.setOutputPath(grepJob, tempDir);
grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
grepJob.setOutputKeyClass(Text.class);
grepJob.setOutputValueClass(LongWritable.class);
grepJob.waitForCompletion(true);
Job sortJob = new Job(conf);
sortJob.setJobName("grep-sort");
FileInputFormat.setInputPaths(sortJob, tempDir);
sortJob.setInputFormatClass(SequenceFileInputFormat.class);
sortJob.setMapperClass(InverseMapper.class);
sortJob.setNumReduceTasks(1); // write a single file
FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
sortJob.setSortComparatorClass( // sort by decreasing freq
LongWritable.DecreasingComparator.class);
sortJob.waitForCompletion(true);
} finally {
FileSystem.get(conf).delete(tempDir, true);
}
return 0;
}
示例7: run
import org.apache.hadoop.mapreduce.lib.map.RegexMapper; //导入依赖的package包/类
public int run(String[] args) throws Exception {
if (args.length < 3) {
System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
ToolRunner.printGenericCommandUsage(System.out);
org.apache.hadoop.util.Tool t;
return 2;
}
Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
Configuration conf = getConf();
conf.set(RegexMapper.PATTERN, args[2]);
if (args.length == 4)
conf.set(RegexMapper.GROUP, args[3]);
Job grepJob = Job.getInstance(conf);
try {
grepJob.setJobName("grep-search");
FileInputFormat.setInputPaths(grepJob, args[0]);
grepJob.setMapperClass(RegexMapper.class);
grepJob.setCombinerClass(LongSumReducer.class);
grepJob.setReducerClass(LongSumReducer.class);
FileOutputFormat.setOutputPath(grepJob, tempDir);
grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
grepJob.setOutputKeyClass(Text.class);
grepJob.setOutputValueClass(LongWritable.class);
grepJob.waitForCompletion(true);
Job sortJob = Job.getInstance(conf);
sortJob.setJobName("grep-sort");
FileInputFormat.setInputPaths(sortJob, tempDir);
sortJob.setInputFormatClass(SequenceFileInputFormat.class);
sortJob.setMapperClass(InverseMapper.class);
sortJob.setNumReduceTasks(1); // write a single file
FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
sortJob.setSortComparatorClass( // sort by decreasing freq
LongWritable.DecreasingComparator.class);
sortJob.waitForCompletion(true);
} finally {
FileSystem.get(conf).delete(tempDir, true);
}
return 0;
}