当前位置: 首页>>代码示例>>Java>>正文


Java MultithreadedMapper类代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper的典型用法代码示例。如果您正苦于以下问题:Java MultithreadedMapper类的具体用法?Java MultithreadedMapper怎么用?Java MultithreadedMapper使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


MultithreadedMapper类属于org.apache.hadoop.mapreduce.lib.map包,在下文中一共展示了MultithreadedMapper类的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: configure

import org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper; //导入依赖的package包/类
@SuppressWarnings("unchecked")
public void configure(JobConf jobConf) {
  int numberOfThreads =
    jobConf.getInt(MultithreadedMapper.NUM_THREADS, 10);
  if (LOG.isDebugEnabled()) {
    LOG.debug("Configuring jobConf " + jobConf.getJobName() +
              " to use " + numberOfThreads + " threads");
  }

  this.job = jobConf;
  //increment processed counter only if skipping feature is enabled
  this.incrProcCount = SkipBadRecords.getMapperMaxSkipRecords(job)>0 && 
    SkipBadRecords.getAutoIncrMapperProcCount(job);
  this.mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(),
      jobConf);

  // Creating a threadpool of the configured size to execute the Mapper
  // map method in parallel.
  executorService = new ThreadPoolExecutor(numberOfThreads, numberOfThreads, 
                                           0L, TimeUnit.MILLISECONDS,
                                           new BlockingArrayQueue
                                             (numberOfThreads));
}
 
开发者ID:naver,项目名称:hadoop,代码行数:24,代码来源:MultithreadedMapRunner.java

示例2: main

import org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: MultithreadedZipContentLoader configFile inputDir threadCount");
        System.exit(2);
    }
    
    Job job = Job.getInstance(conf);
    job.setJarByClass(MultithreadedZipContentLoader.class);
    job.setInputFormatClass(ZipContentInputFormat.class);
    job.setMapperClass(MultithreadedMapper.class);
    MultithreadedMapper.setMapperClass(job, ZipContentMapper.class);
    MultithreadedMapper.setNumberOfThreads(job, Integer.parseInt(args[2]));
    job.setMapOutputKeyClass(DocumentURI.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(ContentOutputFormat.class);
    
    ZipContentInputFormat.setInputPaths(job, new Path(otherArgs[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);
     
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
 
开发者ID:marklogic,项目名称:marklogic-contentpump,代码行数:26,代码来源:MultithreadedZipContentLoader.java

示例3: getOptions

import org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper; //导入依赖的package包/类
@Override
public MapperModeOp getOptions() {
	return new MapperModeOp() {
		@Option(
				name = "--threads",
				aliases = "-j",
				required = false,
				usage = "Use NUMBER threads per mapper. defaults n processors.",
				metaVar = "NUMBER")
		private int concurrency = Runtime.getRuntime().availableProcessors();

		@Override
		public void prepareJobMapper(Job job, Class<LocalFeaturesMapper> mapperClass) {
			if (concurrency <= 0)
				concurrency = Runtime.getRuntime().availableProcessors();

			job.setMapperClass(MultithreadedMapper.class);
			MultithreadedMapper.setNumberOfThreads(job, concurrency);
			MultithreadedMapper.setMapperClass(job, mapperClass);
			System.out.println("Using multithreaded mapper");
		}
	};
}
 
开发者ID:openimaj,项目名称:openimaj,代码行数:24,代码来源:HadoopLocalFeaturesToolOptions.java

示例4: run

import org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
	final HadoopDownloaderOptions options = new HadoopDownloaderOptions(args);
	options.prepare(true);

	final Job job = new Job(getConf());

	job.setJarByClass(HadoopDownloader.class);
	job.setJobName("Hadoop Downloader Utility");

	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(BytesWritable.class);

	if (options.getNumberOfThreads() <= 1) {
		job.setMapperClass(DownloadMapper.class);
	} else {
		job.setMapperClass(MultithreadedMapper.class);
		MultithreadedMapper.setMapperClass(job, DownloadMapper.class);
		MultithreadedMapper.setNumberOfThreads(job, options.getNumberOfThreads());
	}

	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(SequenceFileOutputFormat.class);

	job.setNumReduceTasks(options.getNumberOfReducers());

	job.getConfiguration().setStrings(ARGS_KEY, args);

	FileInputFormat.setInputPaths(job, options.getInputPaths());
	SequenceFileOutputFormat.setOutputPath(job, options.getOutputPath());
	SequenceFileOutputFormat.setCompressOutput(job, true);
	SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
	SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

	job.waitForCompletion(true);

	return 0;
}
 
开发者ID:openimaj,项目名称:openimaj,代码行数:39,代码来源:HadoopDownloader.java

示例5: prepareJobMapper

import org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper; //导入依赖的package包/类
@Override
public void prepareJobMapper(Job job, Class<SimpleTwitterPreprocessingMapper> mapperClass) {
	if (concurrency <= 0)
		concurrency = Runtime.getRuntime().availableProcessors();

	job.setMapperClass(MultithreadedMapper.class);
	MultithreadedMapper.setNumberOfThreads(job, concurrency);
	MultithreadedMapper.setMapperClass(job, mapperClass);
	System.out.println("NThreads = " + MultithreadedMapper.getNumberOfThreads(job));
}
 
开发者ID:openimaj,项目名称:openimaj,代码行数:11,代码来源:MapperMode.java

示例6: prepareJobMapper

import org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper; //导入依赖的package包/类
@Override
public void prepareJobMapper(Job job, Class<ClusterQuantiserMapper> mapperClass,
		AbstractClusterQuantiserOptions opts)
{
	int concurrency = opts.getConcurrency();
	if (opts.getConcurrency() <= 0)
		concurrency = Runtime.getRuntime().availableProcessors();

	job.setMapperClass(MultithreadedMapper.class);
	MultithreadedMapper.setNumberOfThreads(job, concurrency);
	MultithreadedMapper.setMapperClass(job, mapperClass);
	System.out.println("NThreads = " + MultithreadedMapper.getNumberOfThreads(job));
}
 
开发者ID:openimaj,项目名称:openimaj,代码行数:14,代码来源:HadoopClusterQuantiserOptions.java

示例7: run

import org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper; //导入依赖的package包/类
public int run(String[] args) throws Exception {

		
		getConf().set(CSVLineRecordReader.FORMAT_DELIMITER, "\"");
		getConf().set(CSVLineRecordReader.FORMAT_SEPARATOR, ";");
		getConf().setInt(CSVNLineInputFormat.LINES_PER_MAP, 40000);
		getConf().setBoolean(CSVLineRecordReader.IS_ZIPFILE, false);
		Job csvJob = new Job(getConf(), "csv_test_job");
		csvJob.setJarByClass(CSVTestRunner.class);
		csvJob.setNumReduceTasks(0);		
		
		MultithreadedMapper.setMapperClass(csvJob, TestMapper.class);
		MultithreadedMapper.setNumberOfThreads(csvJob, 8);
		
		MultithreadedMapper.setMapperClass(csvJob, TestMapper.class);
		MultithreadedMapper.setNumberOfThreads(csvJob, 1);
		csvJob.setMapperClass(MultithreadedMapper.class);
		// To run without multithread, use the following line instead of the 3
		// above
		// csvJob.setMapperClass(TestMapper.class);		
		csvJob.setInputFormatClass(CSVNLineInputFormat.class);
		csvJob.setOutputFormatClass(NullOutputFormat.class);
		FileInputFormat.setInputPaths(csvJob, new Path(INPUT_PATH_PREFIX));
		logger.info("Process will begin");
		
		csvJob.waitForCompletion(true);

		logger.info("Process ended");

		return 0;
	}
 
开发者ID:mvallebr,项目名称:CSVInputFormat,代码行数:32,代码来源:CSVTestRunner.java

示例8: run

import org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper; //导入依赖的package包/类
private void run(boolean ioEx, boolean rtEx) throws Exception {
  Path inDir = new Path("testing/mt/input");
  Path outDir = new Path("testing/mt/output");

  // Hack for local FS that does not have the concept of a 'mounting point'
  if (isLocalFS()) {
    String localPathRoot = System.getProperty("test.build.data", "/tmp")
            .replace(' ', '+');
    inDir = new Path(localPathRoot, inDir);
    outDir = new Path(localPathRoot, outDir);
  }


  JobConf conf = createJobConf();
  FileSystem fs = FileSystem.get(conf);

  fs.delete(outDir, true);
  if (!fs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir.toString());
  }
  {
    DataOutputStream file = fs.create(new Path(inDir, "part-0"));
    file.writeBytes("a\nb\n\nc\nd\ne");
    file.close();
  }

  conf.setJobName("mt");
  conf.setInputFormat(TextInputFormat.class);

  conf.setOutputKeyClass(LongWritable.class);
  conf.setOutputValueClass(Text.class);

  conf.setMapOutputKeyClass(LongWritable.class);
  conf.setMapOutputValueClass(Text.class);

  conf.setOutputFormat(TextOutputFormat.class);
  conf.setOutputKeyClass(LongWritable.class);
  conf.setOutputValueClass(Text.class);

  conf.setMapperClass(IDMap.class);
  conf.setReducerClass(IDReduce.class);

  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);

  conf.setMapRunnerClass(MultithreadedMapRunner.class);
  
  conf.setInt(MultithreadedMapper.NUM_THREADS, 2);

  if (ioEx) {
    conf.setBoolean("multithreaded.ioException", true);
  }
  if (rtEx) {
    conf.setBoolean("multithreaded.runtimeException", true);
  }
  JobClient jc = new JobClient(conf);
  RunningJob job =jc.submitJob(conf);
  while (!job.isComplete()) {
    Thread.sleep(100);
  }

  if (job.isSuccessful()) {
    assertFalse(ioEx || rtEx);
  }
  else {
    assertTrue(ioEx || rtEx);
  }

}
 
开发者ID:aliyun-beta,项目名称:aliyun-oss-hadoop-fs,代码行数:70,代码来源:TestMultithreadedMapRunner.java


注:本文中的org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。