当前位置: 首页>>代码示例>>Java>>正文


Java MultithreadedMapper.setMapperClass方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper.setMapperClass方法的典型用法代码示例。如果您正苦于以下问题:Java MultithreadedMapper.setMapperClass方法的具体用法?Java MultithreadedMapper.setMapperClass怎么用?Java MultithreadedMapper.setMapperClass使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper的用法示例。


在下文中一共展示了MultithreadedMapper.setMapperClass方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: MultithreadedZipContentLoader configFile inputDir threadCount");
        System.exit(2);
    }
    
    Job job = Job.getInstance(conf);
    job.setJarByClass(MultithreadedZipContentLoader.class);
    job.setInputFormatClass(ZipContentInputFormat.class);
    job.setMapperClass(MultithreadedMapper.class);
    MultithreadedMapper.setMapperClass(job, ZipContentMapper.class);
    MultithreadedMapper.setNumberOfThreads(job, Integer.parseInt(args[2]));
    job.setMapOutputKeyClass(DocumentURI.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(ContentOutputFormat.class);
    
    ZipContentInputFormat.setInputPaths(job, new Path(otherArgs[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);
     
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
开发者ID:marklogic,项目名称:marklogic-contentpump,代码行数:26,代码来源:MultithreadedZipContentLoader.java

示例2: getOptions

import org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper; //导入方法依赖的package包/类
@Override
public MapperModeOp getOptions() {
	return new MapperModeOp() {
		@Option(
				name = "--threads",
				aliases = "-j",
				required = false,
				usage = "Use NUMBER threads per mapper. defaults n processors.",
				metaVar = "NUMBER")
		private int concurrency = Runtime.getRuntime().availableProcessors();

		@Override
		public void prepareJobMapper(Job job, Class<LocalFeaturesMapper> mapperClass) {
			if (concurrency <= 0)
				concurrency = Runtime.getRuntime().availableProcessors();

			job.setMapperClass(MultithreadedMapper.class);
			MultithreadedMapper.setNumberOfThreads(job, concurrency);
			MultithreadedMapper.setMapperClass(job, mapperClass);
			System.out.println("Using multithreaded mapper");
		}
	};
}
 
开发者ID:openimaj,项目名称:openimaj,代码行数:24,代码来源:HadoopLocalFeaturesToolOptions.java

示例3: run

import org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
	final HadoopDownloaderOptions options = new HadoopDownloaderOptions(args);
	options.prepare(true);

	final Job job = new Job(getConf());

	job.setJarByClass(HadoopDownloader.class);
	job.setJobName("Hadoop Downloader Utility");

	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(BytesWritable.class);

	if (options.getNumberOfThreads() <= 1) {
		job.setMapperClass(DownloadMapper.class);
	} else {
		job.setMapperClass(MultithreadedMapper.class);
		MultithreadedMapper.setMapperClass(job, DownloadMapper.class);
		MultithreadedMapper.setNumberOfThreads(job, options.getNumberOfThreads());
	}

	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(SequenceFileOutputFormat.class);

	job.setNumReduceTasks(options.getNumberOfReducers());

	job.getConfiguration().setStrings(ARGS_KEY, args);

	FileInputFormat.setInputPaths(job, options.getInputPaths());
	SequenceFileOutputFormat.setOutputPath(job, options.getOutputPath());
	SequenceFileOutputFormat.setCompressOutput(job, true);
	SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
	SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

	job.waitForCompletion(true);

	return 0;
}
 
开发者ID:openimaj,项目名称:openimaj,代码行数:39,代码来源:HadoopDownloader.java

示例4: prepareJobMapper

import org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper; //导入方法依赖的package包/类
@Override
public void prepareJobMapper(Job job, Class<SimpleTwitterPreprocessingMapper> mapperClass) {
	if (concurrency <= 0)
		concurrency = Runtime.getRuntime().availableProcessors();

	job.setMapperClass(MultithreadedMapper.class);
	MultithreadedMapper.setNumberOfThreads(job, concurrency);
	MultithreadedMapper.setMapperClass(job, mapperClass);
	System.out.println("NThreads = " + MultithreadedMapper.getNumberOfThreads(job));
}
 
开发者ID:openimaj,项目名称:openimaj,代码行数:11,代码来源:MapperMode.java

示例5: prepareJobMapper

import org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper; //导入方法依赖的package包/类
@Override
public void prepareJobMapper(Job job, Class<ClusterQuantiserMapper> mapperClass,
		AbstractClusterQuantiserOptions opts)
{
	int concurrency = opts.getConcurrency();
	if (opts.getConcurrency() <= 0)
		concurrency = Runtime.getRuntime().availableProcessors();

	job.setMapperClass(MultithreadedMapper.class);
	MultithreadedMapper.setNumberOfThreads(job, concurrency);
	MultithreadedMapper.setMapperClass(job, mapperClass);
	System.out.println("NThreads = " + MultithreadedMapper.getNumberOfThreads(job));
}
 
开发者ID:openimaj,项目名称:openimaj,代码行数:14,代码来源:HadoopClusterQuantiserOptions.java

示例6: run

import org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {

		
		getConf().set(CSVLineRecordReader.FORMAT_DELIMITER, "\"");
		getConf().set(CSVLineRecordReader.FORMAT_SEPARATOR, ";");
		getConf().setInt(CSVNLineInputFormat.LINES_PER_MAP, 40000);
		getConf().setBoolean(CSVLineRecordReader.IS_ZIPFILE, false);
		Job csvJob = new Job(getConf(), "csv_test_job");
		csvJob.setJarByClass(CSVTestRunner.class);
		csvJob.setNumReduceTasks(0);		
		
		MultithreadedMapper.setMapperClass(csvJob, TestMapper.class);
		MultithreadedMapper.setNumberOfThreads(csvJob, 8);
		
		MultithreadedMapper.setMapperClass(csvJob, TestMapper.class);
		MultithreadedMapper.setNumberOfThreads(csvJob, 1);
		csvJob.setMapperClass(MultithreadedMapper.class);
		// To run without multithread, use the following line instead of the 3
		// above
		// csvJob.setMapperClass(TestMapper.class);		
		csvJob.setInputFormatClass(CSVNLineInputFormat.class);
		csvJob.setOutputFormatClass(NullOutputFormat.class);
		FileInputFormat.setInputPaths(csvJob, new Path(INPUT_PATH_PREFIX));
		logger.info("Process will begin");
		
		csvJob.waitForCompletion(true);

		logger.info("Process ended");

		return 0;
	}
 
开发者ID:mvallebr,项目名称:CSVInputFormat,代码行数:32,代码来源:CSVTestRunner.java


注:本文中的org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper.setMapperClass方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。