当前位置: 首页>>代码示例>>Java>>正文


Java RegexMapper类代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.map.RegexMapper的典型用法代码示例。如果您正苦于以下问题:Java RegexMapper类的具体用法?Java RegexMapper怎么用?Java RegexMapper使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


RegexMapper类属于org.apache.hadoop.mapreduce.lib.map包,在下文中一共展示了RegexMapper类的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: doAnalyze

import org.apache.hadoop.mapreduce.lib.map.RegexMapper; //导入依赖的package包/类
/**
 * doAnalyze: 
 * @param inputFilesDirectory : Directory containing the files to be analyzed.
 * @param outputDirectory : Directory to store analysis (output).
 * @param grepPattern : Pattern to *grep* for.
 * @param sortColumns : Sort specification for output.
 * @param columnSeparator : Column separator.
 * @throws IOException
 */
public void
  doAnalyze(String inputFilesDirectory, String outputDirectory,
            String grepPattern, String sortColumns, String columnSeparator)
  throws IOException
{		
  Path grepInput = new Path(inputFilesDirectory);
  
  Path analysisOutput = null;
  if (outputDirectory.equals("")) {
    analysisOutput =  new Path(inputFilesDirectory, "logalyzer_" + 
                               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
  } else {
    analysisOutput = new Path(outputDirectory);
  }
  
  JobConf grepJob = new JobConf(fsConfig);
  grepJob.setJobName("logalyzer-grep-sort");
  
  FileInputFormat.setInputPaths(grepJob, grepInput);
  grepJob.setInputFormat(TextInputFormat.class);
  
  grepJob.setMapperClass(LogRegexMapper.class);
  grepJob.set(RegexMapper.PATTERN, grepPattern);
  grepJob.set(SORT_COLUMNS, sortColumns);
  grepJob.set(COLUMN_SEPARATOR, columnSeparator);
  
  grepJob.setCombinerClass(LongSumReducer.class);
  grepJob.setReducerClass(LongSumReducer.class);
  
  FileOutputFormat.setOutputPath(grepJob, analysisOutput);
  grepJob.setOutputFormat(TextOutputFormat.class);
  grepJob.setOutputKeyClass(Text.class);
  grepJob.setOutputValueClass(LongWritable.class);
  grepJob.setOutputKeyComparatorClass(LogComparator.class);
  
  grepJob.setNumReduceTasks(1);                 // write a single file
  
  JobClient.runJob(grepJob);
}
 
开发者ID:yncxcw,项目名称:big-c,代码行数:49,代码来源:Logalyzer.java

示例2: run

import org.apache.hadoop.mapreduce.lib.map.RegexMapper; //导入依赖的package包/类
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = Job.getInstance(conf);
  
  try {
    
    grepJob.setJobName("grep-search");
    grepJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = Job.getInstance(conf);
    sortJob.setJobName("grep-sort");
    sortJob.setJarByClass(Grep.class);

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:59,代码来源:Grep.java

示例3: configure

import org.apache.hadoop.mapreduce.lib.map.RegexMapper; //导入依赖的package包/类
public void configure(JobConf job) {
  pattern = Pattern.compile(job.get(RegexMapper.PATTERN));
}
 
开发者ID:naver,项目名称:hadoop,代码行数:4,代码来源:Logalyzer.java

示例4: run

import org.apache.hadoop.mapreduce.lib.map.RegexMapper; //导入依赖的package包/类
public int run(String[] args) throws Exception {
  if (args.length < 3) {
    System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }

  Path tempDir =
    new Path("grep-temp-"+
        Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

  Configuration conf = getConf();
  conf.set(RegexMapper.PATTERN, args[2]);
  if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

  Job grepJob = new Job(conf);
  
  try {
    
    grepJob.setJobName("grep-search");

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = new Job(conf);
    sortJob.setJobName("grep-sort");

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass(          // sort by decreasing freq
      LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
  }
  finally {
    FileSystem.get(conf).delete(tempDir, true);
  }
  return 0;
}
 
开发者ID:Nextzero,项目名称:hadoop-2.6.0-cdh5.4.3,代码行数:57,代码来源:Grep.java

示例5: run

import org.apache.hadoop.mapreduce.lib.map.RegexMapper; //导入依赖的package包/类
@SuppressWarnings("deprecation")
   public int run(String[] args) throws Exception {
long random = new Random().nextLong();
log.info("random -> " + random);
// 第三个参数为抓取的单词目标
args = new String[] { String.format(ConfigUtils.HDFS.WORDCOUNT_IN, "word.txt"), String.format(ConfigUtils.HDFS.WORDCOUNT_OUT, random),"d" };

Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

Configuration conf = getConf();
conf.set(RegexMapper.PATTERN, args[2]);
if (args.length == 4)
    conf.set(RegexMapper.GROUP, args[3]);

Job grepJob = new Job(conf);

try {

    grepJob.setJobName("grep-search");

    FileInputFormat.setInputPaths(grepJob, args[0]);

    grepJob.setMapperClass(RegexMapper.class);

    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(grepJob, tempDir);
    grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(Text.class);
    grepJob.setOutputValueClass(LongWritable.class);

    grepJob.waitForCompletion(true);

    Job sortJob = new Job(conf);
    sortJob.setJobName("grep-sort");

    FileInputFormat.setInputPaths(sortJob, tempDir);
    sortJob.setInputFormatClass(SequenceFileInputFormat.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1); // write a single file
    FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
    sortJob.setSortComparatorClass( // sort by decreasing freq
    LongWritable.DecreasingComparator.class);

    sortJob.waitForCompletion(true);
} finally {
    FileSystem.get(conf).delete(tempDir, true);
}
return 0;
   }
 
开发者ID:smartdengjie,项目名称:stats-hdfs,代码行数:54,代码来源:Grep.java

示例6: run

import org.apache.hadoop.mapreduce.lib.map.RegexMapper; //导入依赖的package包/类
public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    Configuration conf = getConf();
    conf.set(RegexMapper.PATTERN, args[2]);
    if (args.length == 4)
        conf.set(RegexMapper.GROUP, args[3]);

    Job grepJob = new Job(conf);

    try {

        grepJob.setJobName("grep-search");

        FileInputFormat.setInputPaths(grepJob, args[0]);

        grepJob.setMapperClass(RegexMapper.class);

        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);

        FileOutputFormat.setOutputPath(grepJob, tempDir);
        grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);

        grepJob.waitForCompletion(true);

        Job sortJob = new Job(conf);
        sortJob.setJobName("grep-sort");

        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormatClass(SequenceFileInputFormat.class);

        sortJob.setMapperClass(InverseMapper.class);

        sortJob.setNumReduceTasks(1); // write a single file
        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        sortJob.setSortComparatorClass( // sort by decreasing freq
        LongWritable.DecreasingComparator.class);

        sortJob.waitForCompletion(true);
    } finally {
        FileSystem.get(conf).delete(tempDir, true);
    }
    return 0;
}
 
开发者ID:xuzhikethinker,项目名称:t4f-data,代码行数:54,代码来源:WordCountTool.java

示例7: run

import org.apache.hadoop.mapreduce.lib.map.RegexMapper; //导入依赖的package包/类
public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        org.apache.hadoop.util.Tool t;
        return 2;
    }

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    Configuration conf = getConf();
    conf.set(RegexMapper.PATTERN, args[2]);

    if (args.length == 4)
        conf.set(RegexMapper.GROUP, args[3]);

    Job grepJob = Job.getInstance(conf);

    try {

        grepJob.setJobName("grep-search");

        FileInputFormat.setInputPaths(grepJob, args[0]);

        grepJob.setMapperClass(RegexMapper.class);

        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);

        FileOutputFormat.setOutputPath(grepJob, tempDir);
        grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);

        grepJob.waitForCompletion(true);

        Job sortJob = Job.getInstance(conf);
        sortJob.setJobName("grep-sort");

        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormatClass(SequenceFileInputFormat.class);

        sortJob.setMapperClass(InverseMapper.class);

        sortJob.setNumReduceTasks(1); // write a single file
        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        sortJob.setSortComparatorClass( // sort by decreasing freq
        LongWritable.DecreasingComparator.class);

        sortJob.waitForCompletion(true);
        
    } finally {
        FileSystem.get(conf).delete(tempDir, true);
    }
    
    return 0;
}
 
开发者ID:xuzhikethinker,项目名称:t4f-data,代码行数:58,代码来源:GrepTool.java


注:本文中的org.apache.hadoop.mapreduce.lib.map.RegexMapper类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。