当前位置: 首页>>代码示例>>Java>>正文


Java Job.setOutputFormatClass方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.Job.setOutputFormatClass方法的典型用法代码示例。如果您正苦于以下问题:Java Job.setOutputFormatClass方法的具体用法?Java Job.setOutputFormatClass怎么用?Java Job.setOutputFormatClass使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.mapreduce.Job的用法示例。


在下文中一共展示了Job.setOutputFormatClass方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: configureJob

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
 * Job configuration.
 */
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
  String tableName = args[0];
  String columnFamily = args[1];
  System.out.println("****" + tableName);
  conf.set(TableInputFormat.SCAN, TableMapReduceUtil.convertScanToString(new Scan()));
  conf.set(TableInputFormat.INPUT_TABLE, tableName);
  conf.set("index.tablename", tableName);
  conf.set("index.familyname", columnFamily);
  String[] fields = new String[args.length - 2];
  System.arraycopy(args, 2, fields, 0, fields.length);
  conf.setStrings("index.fields", fields);
  Job job = new Job(conf, tableName);
  job.setJarByClass(IndexBuilder.class);
  job.setMapperClass(Map.class);
  job.setNumReduceTasks(0);
  job.setInputFormatClass(TableInputFormat.class);
  job.setOutputFormatClass(MultiTableOutputFormat.class);
  return job;
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:24,代码来源:IndexBuilder.java

示例2: runRandomInputGenerator

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public int runRandomInputGenerator(int numMappers, long numNodes, Path tmpOutput,
    Integer width, Integer wrapMuplitplier) throws Exception {
  LOG.info("Running RandomInputGenerator with numMappers=" + numMappers
      + ", numNodes=" + numNodes);
  Job job = Job.getInstance(getConf());

  job.setJobName("Random Input Generator");
  job.setNumReduceTasks(0);
  job.setJarByClass(getClass());

  job.setInputFormatClass(GeneratorInputFormat.class);
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(NullWritable.class);

  setJobConf(job, numMappers, numNodes, width, wrapMuplitplier);

  job.setMapperClass(Mapper.class); //identity mapper

  FileOutputFormat.setOutputPath(job, tmpOutput);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);

  boolean success = jobCompletion(job);

  return success ? 0 : 1;
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:26,代码来源:IntegrationTestBigLinkedList.java

示例3: runJob

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void runJob(Configuration conf, Path inputPath, Path output) throws IOException, ClassNotFoundException, InterruptedException {

        Job job = new Job(conf, "Input Drive running input:"+inputPath);
        log.info("start running InputDriver");
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(indexToWordWritable.class);
        job.setOutputKeyClass(twoDimensionIndexWritable.class);
        job.setOutputValueClass(Text.class);

        job.setMapperClass(InputMapper.class);
        job.setReducerClass(InputReducer.class);
        job.setNumReduceTasks(1);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setJarByClass(InputDriver.class);

        FileInputFormat.addInputPath(job, inputPath);
        FileOutputFormat.setOutputPath(job, output);

        boolean succeeded = job.waitForCompletion(true);
        if (!succeeded) {
            throw new IllegalStateException("Job failed!");
        }

    }
 
开发者ID:huyang1,项目名称:LDA,代码行数:25,代码来源:InputDriver.java

示例4: configueAvroMergeJob

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
private void configueAvroMergeJob(Configuration conf, Job job, Path oldPath, Path newPath)
    throws IOException {
  LOG.info("Trying to merge avro files");
  final Schema oldPathSchema = AvroUtil.getAvroSchema(oldPath, conf);
  final Schema newPathSchema = AvroUtil.getAvroSchema(newPath, conf);
  if (oldPathSchema == null || newPathSchema == null || !oldPathSchema.equals(newPathSchema)) {
    throw new IOException("Invalid schema for input directories. Schema for old data: ["
        + oldPathSchema + "]. Schema for new data: [" + newPathSchema + "]");
  }
  LOG.debug("Avro Schema:" + oldPathSchema);
  job.setInputFormatClass(AvroInputFormat.class);
  job.setOutputFormatClass(AvroOutputFormat.class);
  job.setMapperClass(MergeAvroMapper.class);
  job.setReducerClass(MergeAvroReducer.class);
  AvroJob.setOutputSchema(job.getConfiguration(), oldPathSchema);
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:17,代码来源:MergeJob.java

示例5: main

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	
	Job job =Job.getInstance(conf);
	job.setJobName("MaxThreeLabel");
	job.setJarByClass(MaxThreeLabel.class);
	
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(TextArrayWritable.class);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(Text.class);
	
	job.setMapperClass(MaxThreeLabelMap.class);
	job.setReducerClass(MaxThreeLabelReduce.class);
	
	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	
	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));
	boolean wait = job.waitForCompletion(true);
	System.exit(wait ? 0 : 1);
}
 
开发者ID:lzmhhh123,项目名称:Wikipedia-Index,代码行数:25,代码来源:MaxThreeLabel.java

示例6: createJob

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public Job createJob() 
throws IOException {
  Configuration conf = getConf();
  conf.setInt(MRJobConfig.NUM_MAPS, 1);
  Job job = Job.getInstance(conf, "test");
  job.setNumReduceTasks(1);
  job.setJarByClass(CredentialsTestJob.class);
  job.setNumReduceTasks(1);
  job.setMapperClass(CredentialsTestJob.CredentialsTestMapper.class);
  job.setMapOutputKeyClass(IntWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setReducerClass(CredentialsTestJob.CredentialsTestReducer.class);
  job.setInputFormatClass(SleepJob.SleepInputFormat.class);
  job.setPartitionerClass(SleepJob.SleepJobPartitioner.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setSpeculativeExecution(false);
  job.setJobName("test job");
  FileInputFormat.addInputPath(job, new Path("ignored"));
  return job;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:21,代码来源:CredentialsTestJob.java

示例7: createJob

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public static Job createJob(String name, String base) throws IOException {
	Configuration conf = new Configuration();
	conf.set(Total.QUERIED_NAME, name);
	Job job = Job.getInstance(new Cluster(conf), conf);
	job.setJarByClass(Cut.class);

	// in
	String in = base;
	if (!base.endsWith("/"))
		in = in.concat("/");
	in = in.concat("employees");
	SequenceFileInputFormat.addInputPath(job, new Path(in));
	job.setInputFormatClass(SequenceFileInputFormat.class);

	// map
	job.setMapperClass(CutMapper.class);
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(Employee.class);

	// out
	SequenceFileOutputFormat.setOutputPath(job, new Path(base + "/tmp"));
	job.setOutputFormatClass(SequenceFileOutputFormat.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(Employee.class);

	return job;
}
 
开发者ID:amritbhat786,项目名称:DocIT,代码行数:28,代码来源:Cut.java

示例8: run

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
	Configuration conf = this.getConf();
	// 初始化参数
	this.processArgs(conf, args);

	// 创建job
	Job job = Job.getInstance(conf, "active_user");

	// 设置job相关配置参数
	job.setJarByClass(ActiveUserRunner.class);
	// hbase 输入mapper参数
	// 1. 本地运行
	TableMapReduceUtil.initTableMapperJob(this.initScans(job), ActiveUserMapper.class, StatsUserDimension.class,
			TimeOutputValue.class, job, false);
	// 2. 集群运行
	// TableMapReduceUtil.initTableMapperJob(null, ActiveUserMapper.class,
	// StatsUserDimension.class, TimeOutputValue.class, job);

	// 设置reducer相关参数
	job.setReducerClass(ActiveUserReducer.class);
	job.setOutputKeyClass(StatsUserDimension.class);
	job.setOutputValueClass(MapWritableValue.class);

	// 设置output相关参数
	job.setOutputFormatClass(TransformerOutputFormat.class);
	// 开始毫秒数
	long startTime = System.currentTimeMillis();
	try {
		return job.waitForCompletion(true) ? 0 : -1;
	} finally {
		// 结束的毫秒数
		long endTime = System.currentTimeMillis();
		logger.info("Job<" + job.getJobName() + ">是否执行成功:" + job.isSuccessful() + "; 开始时间:" + startTime + "; 结束时间:"
				+ endTime + "; 用时:" + (endTime - startTime) + "ms");
	}
}
 
开发者ID:liuhaozzu,项目名称:big_data,代码行数:38,代码来源:ActiveUserRunner.java

示例9: createSubmittableJob

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@Override
public Job createSubmittableJob(String[] args) throws IOException {
  Job job = super.createSubmittableJob(args);
  // Call my class instead.
  job.setJarByClass(WALMapperSearcher.class);
  job.setMapperClass(WALMapperSearcher.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  return job;
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:10,代码来源:IntegrationTestLoadAndVerify.java

示例10: setOutput

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
private static DBConfiguration setOutput(Job job,
    String tableName) throws IOException {
  job.setOutputFormatClass(DBOutputFormat.class);
  ConfigurationHelper.setJobReduceSpeculativeExecution(job, false);

  DBConfiguration dbConf = new DBConfiguration(job.getConfiguration());

  dbConf.setOutputTableName(tableName);
  return dbConf;
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:11,代码来源:DBOutputFormat.java

示例11: testKilledJob

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
private void testKilledJob(String fileName,
    Class<? extends OutputFormat> output, String[] exclude) throws Exception {
  Path outDir = getNewOutputDir();
  Job job = MapReduceTestUtil.createKillJob(conf, outDir, inDir);
  job.setOutputFormatClass(output);

  job.submit();

  // wait for the setup to be completed
  while (job.setupProgress() != 1.0f) {
    UtilsForTests.waitFor(100);
  }

  job.killJob(); // kill the job

  assertFalse("Job did not get kill", job.waitForCompletion(true));

  if (fileName != null) {
    Path testFile = new Path(outDir, fileName);
    assertTrue("File " + testFile + " missing for job " + job.getJobID(), fs
        .exists(testFile));
  }

  // check if the files from the missing set exists
  for (String ex : exclude) {
    Path file = new Path(outDir, ex);
    assertFalse("File " + file + " should not be present for killed job "
        + job.getJobID(), fs.exists(file));
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:31,代码来源:TestJobOutputCommitter.java

示例12: loadHCatTable

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public List<HCatRecord> loadHCatTable(String dbName,
  String tableName, Map<String, String> partKeyMap,
  HCatSchema tblSchema, List<HCatRecord> records)
  throws Exception {

  Job job = new Job(conf, "HCat load job");

  job.setJarByClass(this.getClass());
  job.setMapperClass(HCatWriterMapper.class);


  // Just writ 10 lines to the file to drive the mapper
  Path path = new Path(fs.getWorkingDirectory(),
    "mapreduce/HCatTableIndexInput");

  job.getConfiguration()
    .setInt(ConfigurationConstants.PROP_MAPRED_MAP_TASKS, 1);
  int writeCount = records.size();
  recsToLoad.clear();
  recsToLoad.addAll(records);
  createInputFile(path, writeCount);
  // input/output settings
  HCatWriterMapper.setWrittenRecordCount(0);

  FileInputFormat.setInputPaths(job, path);
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputFormatClass(HCatOutputFormat.class);
  OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName,
    partKeyMap);

  HCatOutputFormat.setOutput(job, outputJobInfo);
  HCatOutputFormat.setSchema(job, tblSchema);
  job.setMapOutputKeyClass(BytesWritable.class);
  job.setMapOutputValueClass(DefaultHCatRecord.class);

  job.setNumReduceTasks(0);
  SqoopHCatUtilities.addJars(job, new SqoopOptions());
  boolean success = job.waitForCompletion(true);

  if (!success) {
    throw new IOException("Loading HCatalog table with test records failed");
  }
  utils.invokeOutputCommitterForLocalMode(job);
  LOG.info("Loaded " + HCatWriterMapper.writtenRecordCount + " records");
  return recsToLoad;
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:47,代码来源:HCatalogTestUtils.java

示例13: run

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public void run() throws IOException, ClassNotFoundException, InterruptedException {

        Job job = Job.getInstance(configuration, "com.romanysik.util.Transposer");

        job.setJarByClass(MRNMF.class);

        FileInputFormat.addInputPath(job, new Path(inputPath));
        FileOutputFormat.setOutputPath(job, new Path(outputPath));

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);

        job.setMapperClass(TMapper.class);
        job.setReducerClass(TReducer.class);

        job.waitForCompletion(true);
    }
 
开发者ID:Romm17,项目名称:MRNMF,代码行数:21,代码来源:Transposer.java

示例14: createSubmittableJob

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public Job createSubmittableJob(String[] args) throws IOException {
  FileSystem fs = sourceHashDir.getFileSystem(getConf());
  if (!fs.exists(sourceHashDir)) {
    throw new IOException("Source hash dir not found: " + sourceHashDir);
  }
  
  HashTable.TableHash tableHash = HashTable.TableHash.read(getConf(), sourceHashDir);
  LOG.info("Read source hash manifest: " + tableHash);
  LOG.info("Read " + tableHash.partitions.size() + " partition keys");
  if (!tableHash.tableName.equals(sourceTableName)) {
    LOG.warn("Table name mismatch - manifest indicates hash was taken from: "
        + tableHash.tableName + " but job is reading from: " + sourceTableName);
  }
  if (tableHash.numHashFiles != tableHash.partitions.size() + 1) {
    throw new RuntimeException("Hash data appears corrupt. The number of of hash files created"
        + " should be 1 more than the number of partition keys.  However, the manifest file "
        + " says numHashFiles=" + tableHash.numHashFiles + " but the number of partition keys"
        + " found in the partitions file is " + tableHash.partitions.size());
  }
  
  Path dataDir = new Path(sourceHashDir, HashTable.HASH_DATA_DIR);
  int dataSubdirCount = 0;
  for (FileStatus file : fs.listStatus(dataDir)) {
    if (file.getPath().getName().startsWith(HashTable.OUTPUT_DATA_FILE_PREFIX)) {
      dataSubdirCount++;
    }
  }
  
  if (dataSubdirCount != tableHash.numHashFiles) {
    throw new RuntimeException("Hash data appears corrupt. The number of of hash files created"
        + " should be 1 more than the number of partition keys.  However, the number of data dirs"
        + " found is " + dataSubdirCount + " but the number of partition keys"
        + " found in the partitions file is " + tableHash.partitions.size());
  }
  
  Job job = Job.getInstance(getConf(),getConf().get("mapreduce.job.name",
      "syncTable_" + sourceTableName + "-" + targetTableName));
  Configuration jobConf = job.getConfiguration();
  job.setJarByClass(HashTable.class);
  jobConf.set(SOURCE_HASH_DIR_CONF_KEY, sourceHashDir.toString());
  jobConf.set(SOURCE_TABLE_CONF_KEY, sourceTableName);
  jobConf.set(TARGET_TABLE_CONF_KEY, targetTableName);
  if (sourceZkCluster != null) {
    jobConf.set(SOURCE_ZK_CLUSTER_CONF_KEY, sourceZkCluster);
  }
  if (targetZkCluster != null) {
    jobConf.set(TARGET_ZK_CLUSTER_CONF_KEY, targetZkCluster);
  }
  jobConf.setBoolean(DRY_RUN_CONF_KEY, dryRun);
  
  TableMapReduceUtil.initTableMapperJob(targetTableName, tableHash.initScan(),
      SyncMapper.class, null, null, job);
  
  job.setNumReduceTasks(0);
   
  if (dryRun) {
    job.setOutputFormatClass(NullOutputFormat.class);
  } else {
    // No reducers.  Just write straight to table.  Call initTableReducerJob
    // because it sets up the TableOutputFormat.
    TableMapReduceUtil.initTableReducerJob(targetTableName, null, job, null,
        targetZkCluster, null, null);
    
    // would be nice to add an option for bulk load instead
  }
  
  return job;
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:69,代码来源:SyncTable.java

示例15: run

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public void run() throws IOException, ClassNotFoundException, InterruptedException {
    
    Job job = Job.getInstance(configuration, "com.romanysik.matrixmultiplication.MM3");

    job.setJarByClass(MRNMF.class);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setMapperClass(MM3Mapper.class);
    job.setReducerClass(MM3Reducer.class);

    job.waitForCompletion(true);
}
 
开发者ID:Romm17,项目名称:MRNMF,代码行数:21,代码来源:MM3.java


注:本文中的org.apache.hadoop.mapreduce.Job.setOutputFormatClass方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。