本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper.setMapperClass方法的典型用法代码示例。如果您正苦于以下问题:Java MultithreadedMapper.setMapperClass方法的具体用法?Java MultithreadedMapper.setMapperClass怎么用?Java MultithreadedMapper.setMapperClass使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper
的用法示例。
在下文中一共展示了MultithreadedMapper.setMapperClass方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("Usage: MultithreadedZipContentLoader configFile inputDir threadCount");
System.exit(2);
}
Job job = Job.getInstance(conf);
job.setJarByClass(MultithreadedZipContentLoader.class);
job.setInputFormatClass(ZipContentInputFormat.class);
job.setMapperClass(MultithreadedMapper.class);
MultithreadedMapper.setMapperClass(job, ZipContentMapper.class);
MultithreadedMapper.setNumberOfThreads(job, Integer.parseInt(args[2]));
job.setMapOutputKeyClass(DocumentURI.class);
job.setMapOutputValueClass(Text.class);
job.setOutputFormatClass(ContentOutputFormat.class);
ZipContentInputFormat.setInputPaths(job, new Path(otherArgs[1]));
conf = job.getConfiguration();
conf.addResource(otherArgs[0]);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
示例2: getOptions
import org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper; //导入方法依赖的package包/类
@Override
public MapperModeOp getOptions() {
return new MapperModeOp() {
@Option(
name = "--threads",
aliases = "-j",
required = false,
usage = "Use NUMBER threads per mapper. defaults n processors.",
metaVar = "NUMBER")
private int concurrency = Runtime.getRuntime().availableProcessors();
@Override
public void prepareJobMapper(Job job, Class<LocalFeaturesMapper> mapperClass) {
if (concurrency <= 0)
concurrency = Runtime.getRuntime().availableProcessors();
job.setMapperClass(MultithreadedMapper.class);
MultithreadedMapper.setNumberOfThreads(job, concurrency);
MultithreadedMapper.setMapperClass(job, mapperClass);
System.out.println("Using multithreaded mapper");
}
};
}
示例3: run
import org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
final HadoopDownloaderOptions options = new HadoopDownloaderOptions(args);
options.prepare(true);
final Job job = new Job(getConf());
job.setJarByClass(HadoopDownloader.class);
job.setJobName("Hadoop Downloader Utility");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(BytesWritable.class);
if (options.getNumberOfThreads() <= 1) {
job.setMapperClass(DownloadMapper.class);
} else {
job.setMapperClass(MultithreadedMapper.class);
MultithreadedMapper.setMapperClass(job, DownloadMapper.class);
MultithreadedMapper.setNumberOfThreads(job, options.getNumberOfThreads());
}
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setNumReduceTasks(options.getNumberOfReducers());
job.getConfiguration().setStrings(ARGS_KEY, args);
FileInputFormat.setInputPaths(job, options.getInputPaths());
SequenceFileOutputFormat.setOutputPath(job, options.getOutputPath());
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
job.waitForCompletion(true);
return 0;
}
示例4: prepareJobMapper
import org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper; //导入方法依赖的package包/类
@Override
public void prepareJobMapper(Job job, Class<SimpleTwitterPreprocessingMapper> mapperClass) {
if (concurrency <= 0)
concurrency = Runtime.getRuntime().availableProcessors();
job.setMapperClass(MultithreadedMapper.class);
MultithreadedMapper.setNumberOfThreads(job, concurrency);
MultithreadedMapper.setMapperClass(job, mapperClass);
System.out.println("NThreads = " + MultithreadedMapper.getNumberOfThreads(job));
}
示例5: prepareJobMapper
import org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper; //导入方法依赖的package包/类
@Override
public void prepareJobMapper(Job job, Class<ClusterQuantiserMapper> mapperClass,
AbstractClusterQuantiserOptions opts)
{
int concurrency = opts.getConcurrency();
if (opts.getConcurrency() <= 0)
concurrency = Runtime.getRuntime().availableProcessors();
job.setMapperClass(MultithreadedMapper.class);
MultithreadedMapper.setNumberOfThreads(job, concurrency);
MultithreadedMapper.setMapperClass(job, mapperClass);
System.out.println("NThreads = " + MultithreadedMapper.getNumberOfThreads(job));
}
示例6: run
import org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
getConf().set(CSVLineRecordReader.FORMAT_DELIMITER, "\"");
getConf().set(CSVLineRecordReader.FORMAT_SEPARATOR, ";");
getConf().setInt(CSVNLineInputFormat.LINES_PER_MAP, 40000);
getConf().setBoolean(CSVLineRecordReader.IS_ZIPFILE, false);
Job csvJob = new Job(getConf(), "csv_test_job");
csvJob.setJarByClass(CSVTestRunner.class);
csvJob.setNumReduceTasks(0);
MultithreadedMapper.setMapperClass(csvJob, TestMapper.class);
MultithreadedMapper.setNumberOfThreads(csvJob, 8);
MultithreadedMapper.setMapperClass(csvJob, TestMapper.class);
MultithreadedMapper.setNumberOfThreads(csvJob, 1);
csvJob.setMapperClass(MultithreadedMapper.class);
// To run without multithread, use the following line instead of the 3
// above
// csvJob.setMapperClass(TestMapper.class);
csvJob.setInputFormatClass(CSVNLineInputFormat.class);
csvJob.setOutputFormatClass(NullOutputFormat.class);
FileInputFormat.setInputPaths(csvJob, new Path(INPUT_PATH_PREFIX));
logger.info("Process will begin");
csvJob.waitForCompletion(true);
logger.info("Process ended");
return 0;
}