当前位置: 首页>>代码示例>>Java>>正文


Java LazyOutputFormat.setOutputFormatClass方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat.setOutputFormatClass方法的典型用法代码示例。如果您正苦于以下问题:Java LazyOutputFormat.setOutputFormatClass方法的具体用法?Java LazyOutputFormat.setOutputFormatClass怎么用?Java LazyOutputFormat.setOutputFormatClass使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat的用法示例。


在下文中一共展示了LazyOutputFormat.setOutputFormatClass方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: runTestLazyOutput

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
private static void runTestLazyOutput(Configuration conf, Path output,
    int numReducers, boolean createLazily) 
throws Exception {
  Job job = Job.getInstance(conf, "Test-Lazy-Output");

  FileInputFormat.setInputPaths(job, INPUT);
  FileOutputFormat.setOutputPath(job, output);

  job.setJarByClass(TestMapReduceLazyOutput.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(numReducers);

  job.setMapperClass(TestMapper.class);
  job.setReducerClass(TestReducer.class);

  if (createLazily) {
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
  } else {
    job.setOutputFormatClass(TextOutputFormat.class);
  }
  assertTrue(job.waitForCompletion(true));
}
 
开发者ID:aliyun-beta,项目名称:aliyun-oss-hadoop-fs,代码行数:25,代码来源:TestMapReduceLazyOutput.java

示例2: runTestLazyOutput

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
private static void runTestLazyOutput(Configuration conf, Path output,
    int numReducers, boolean createLazily) 
throws Exception {
  Job job = new Job(conf, "Test-Lazy-Output");

  FileInputFormat.setInputPaths(job, INPUT);
  FileOutputFormat.setOutputPath(job, output);

  job.setJarByClass(TestMapReduceLazyOutput.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(numReducers);

  job.setMapperClass(TestMapper.class);
  job.setReducerClass(TestReducer.class);

  if (createLazily) {
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
  } else {
    job.setOutputFormatClass(TextOutputFormat.class);
  }
  assertTrue(job.waitForCompletion(true));
}
 
开发者ID:Nextzero,项目名称:hadoop-2.6.0-cdh5.4.3,代码行数:25,代码来源:TestMapReduceLazyOutput.java

示例3: configureTextOutput

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
/**
 * Set up a MapReduce job to output human-readable text.
 */
protected void configureTextOutput(String destination) {
    Path outPath;
    outPath = MRReasoningUtils.getOutputPath(job.getConfiguration(), destination);
    TextOutputFormat.setOutputPath(job, outPath);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT,
        TextOutputFormat.class, NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT,
        TextOutputFormat.class, NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT,
        TextOutputFormat.class, NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT,
        TextOutputFormat.class, NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT,
        TextOutputFormat.class, Text.class, Text.class);
    MultipleOutputs.setCountersEnabled(job, true);
}
 
开发者ID:apache,项目名称:incubator-rya,代码行数:21,代码来源:AbstractReasoningTool.java

示例4: ensureJobSet

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
private static void ensureJobSet(Job job) {
    if (job.getConfiguration().getBoolean(MULTIREDUCERS_HAVE_OUTPUT_FORMAT, false)) {
        // we need to use the TextOutputFormat, since otherwise the FileOutputCommitter won't run
        LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
    } else {
        job.setOutputFormatClass(NullOutputFormat.class);
    }
    job.setOutputFormatClass(MultiOutputFormat.class);
    job.setReducerClass(MultiReducer.class);
    job.setMapperClass(MultiMapper.class);
    job.setMapOutputKeyClass(PerMapperOutputKey.class);
    job.setMapOutputValueClass(PerMapperOutputValue.class);
    job.setSortComparatorClass(MultiComparator.class);
    job.setPartitionerClass(MultiPartitioner.class);
    List<Class<?>> serializations = Arrays.asList(
            job.getConfiguration().getClasses(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY));
    if (serializations.indexOf(MultiSerializer.class) == -1) {
        appendTo(job, CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, MultiSerializer.class);
    }
    for (Class<?> aClass : job.getConfiguration().getClasses(MultiCombiner.CONF_KEY)) {
        if (!aClass.equals(Reducer.class)) {
            job.setCombinerClass(MultiCombiner.class);
        }
    }
}
 
开发者ID:elazarl,项目名称:multireducers,代码行数:26,代码来源:MultiJob.java

示例5: runAnnotatorSort

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
private int runAnnotatorSort() throws Exception {

        BioJob job = BioJob.getInstance(conf);

        job.setJobName("GaeaAnnotatorSortResult");
        job.setJarByClass(this.getClass());
        job.setMapperClass(AnnotationSortMapper.class);
        job.setReducerClass(AnnotationSortReducer.class);
        job.setNumReduceTasks(sampleNames.size());

        job.setMapOutputKeyClass(PairWritable.class);
        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(Text.class);
        job.setInputFormatClass(TextInputFormat.class);
        LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

        Path inputPath = new Path(options.getTmpPath());
        Path outputPath = new Path(options.getOutputPath());
        FileInputFormat.setInputPaths(job, inputPath);
        FileOutputFormat.setOutputPath(job, outputPath);

        FileSystem fs = outputPath.getFileSystem(conf);
        if(job.waitForCompletion(true)){
            int loop = 0;
            for (String sampleName : sampleNames){
                Path outputPart = getSampleOutputPath(sampleName);
                while (outputPart == null && loop < 10){
                    TimeUnit.MILLISECONDS.sleep(6000);
                    outputPart = getSampleOutputPath(sampleName);
                    loop ++;
                }
                Path outputName = new Path(options.getOutputPath() + "/" + sampleName + ".tsv");
                fs.rename(outputPart, outputName);
            }
            return 0;
        }
        return 1;
    }
 
开发者ID:BGI-flexlab,项目名称:SOAPgaea,代码行数:41,代码来源:Annotator.java

示例6: makeJob

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
public static Job makeJob(Configuration conf, Path in, Path out, String matchPath, long scanSince, 
		String chlorineConfigFilePath, String queue, String maskPath) throws IOException {
	conf.setBoolean("mapred.output.compress", false);
	conf.setLong("scanSince", scanSince);
	conf.set("matchPath", matchPath);
	conf.set("maskPath", maskPath);
	conf.set("inputPath", in.toString());
	if (queue != null) {
		conf.set("mapred.job.queue.name", queue);
	}
	conf.set("fs.permissions.umask-mode", 
			"007");
	conf.setInt("input_path_depth", in.depth());
	Job job = Job.getInstance(conf, "Chlorine_HDFS_Scan");
	job.setJarByClass(HDFSScanMR.class);
	if (chlorineConfigFilePath != null) {
		try {
			job.addCacheFile(new URI(chlorineConfigFilePath));
			conf.set("finder_file", (new File(chlorineConfigFilePath)).getName());
		} catch (URISyntaxException e) {
			LOG.error(e);
		}
	}
	job.setMapperClass(DeepScanMapper.class);
	job.setNumReduceTasks(0);
	job.setInputFormatClass(TextInputFormat.class);
	TextInputFormat.addInputPath(job, in);
	TextInputFormat.setInputDirRecursive(job, true);
	TextInputFormat.setInputPathFilter(job, NewFilesFilter.class);
	FileOutputFormat.setOutputPath(job, out);
	LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); 
	return job;
}
 
开发者ID:dataApps,项目名称:chlorine-hadoop,代码行数:34,代码来源:HDFSScanMR.java

示例7: configureJob

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
/**
 * Job configurator
 *
 * @param job                      job instance
 * @param jarByClass               class of the jar
 * @param mapperClass              mapper
 * @param reducerClass             reducer
 * @param commaSeparatedInputFiles input paths
 * @param outputPath               output
 * @throws IOException I/O exception
 */
public static void configureJob(Job job, Class<?> jarByClass,
        Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass,
        String commaSeparatedInputFiles, String outputPath)
        throws IOException
{
    job.setJarByClass(jarByClass);
    job.setJobName(jarByClass.getName());

    // mapper
    job.setMapperClass(mapperClass);

    // reducer
    job.setReducerClass(reducerClass);

    // input-output is warc
    job.setInputFormatClass(WARCInputFormat.class);
    // prevent producing empty files
    LazyOutputFormat.setOutputFormatClass(job, WARCOutputFormat.class);

    // intermediate data
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(WARCWritable.class);

    // output data
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(WARCWritable.class);

    // set output compression to GZip
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
}
 
开发者ID:dkpro,项目名称:dkpro-c4corpus,代码行数:46,代码来源:ConfigurationHelper.java

示例8: run

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args)
        throws Exception
{
    Job job = Job.getInstance(getConf());

    job.setJarByClass(Phase3Step1ExtractNearDupInfo.class);
    job.setJobName(Phase3Step1ExtractNearDupInfo.class.getName());

    // mapper
    job.setMapperClass(MapperClass.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(DocumentInfo.class);

    // reducer
    job.setReducerClass(DeDuplicationTextOutputReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(List.class);

    job.setInputFormatClass(WARCInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, DocumentInfoOutputFormat.class);

    // paths
    String commaSeparatedInputFiles = args[0];
    String outputPath = args[1];

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;

}
 
开发者ID:dkpro,项目名称:dkpro-c4corpus,代码行数:33,代码来源:Phase3Step1ExtractNearDupInfo.java

示例9: run

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args)
        throws Exception
{
    Job job = Job.getInstance(getConf());

    job.setJarByClass(Phase3Step3NearDupTuplesCreation.class);
    job.setJobName(Phase3Step3NearDupTuplesCreation.class.getName());

    // mapper
    job.setMapperClass(CreateTuplesMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(TreeSet.class);

    job.setInputFormatClass(TextInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    // paths
    String commaSeparatedInputFiles = args[0];
    String outputPath = args[1];

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setNumReduceTasks(0); //must be added or the mapper wont be called

    return job.waitForCompletion(true) ? 0 : 1;
}
 
开发者ID:dkpro,项目名称:dkpro-c4corpus,代码行数:29,代码来源:Phase3Step3NearDupTuplesCreation.java

示例10: run

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args)
        throws Exception
{
    Job job = Job.getInstance(getConf());

    job.setJarByClass(Phase3Step4LocalDeDuplication.class);
    job.setJobName(Phase3Step4LocalDeDuplication.class.getName());

    // paths
    String inputPath = args[0];
    // text files of ids to be deleted
    String outputPath = args[1];

    // input: reading max N lines for each mapper
    job.setInputFormatClass(NLineInputFormat.class);
    NLineInputFormat.addInputPath(job, new Path(inputPath));
    job.getConfiguration().setInt("mapreduce.input.lineinputformat.linespermap", LINES);

    // mapper
    job.setMapperClass(LocalGreedyDeDuplicationMapper.class);

    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    // reducer
    job.setReducerClass(IDCollectorReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}
 
开发者ID:dkpro,项目名称:dkpro-c4corpus,代码行数:35,代码来源:Phase3Step4LocalDeDuplication.java

示例11: run

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args)
        throws Exception
{

    Job job = Job.getInstance(getConf());
    job.setJarByClass(Phase3Step2DistinctDataJob.class);
    job.setJobName(Phase3Step2DistinctDataJob.class.getName());

    //mapper
    job.setMapperClass(RemoveRedundantDataMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    //reducer
    job.setReducerClass(RemoveRedundantDataReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    //paths
    String commaSeparatedInputFiles = args[0];
    String outputPath = args[1];

    job.setInputFormatClass(TextInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    //i/o paths
    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}
 
开发者ID:dkpro,项目名称:dkpro-c4corpus,代码行数:33,代码来源:Phase3Step2DistinctDataJob.java

示例12: configureSchemaOutput

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
/**
 * Set up the MapReduce job to output a schema (TBox).
 */
protected void configureSchemaOutput() {
    Path outPath = MRReasoningUtils.getSchemaPath(job.getConfiguration());
    SequenceFileOutputFormat.setOutputPath(job, outPath);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(SchemaWritable.class);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
    MultipleOutputs.addNamedOutput(job, "schemaobj",
        SequenceFileOutputFormat.class, NullWritable.class, SchemaWritable.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT,
        TextOutputFormat.class, Text.class, Text.class);
    MultipleOutputs.setCountersEnabled(job, true);
}
 
开发者ID:apache,项目名称:incubator-rya,代码行数:17,代码来源:AbstractReasoningTool.java

示例13: configureDerivationOutput

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
/**
 * Set up a MapReduce job to output newly derived triples.
 * @param   intermediate    True if this is intermediate data. Outputs
 *                          to [base]-[iteration]-[temp].
 */
protected void configureDerivationOutput(boolean intermediate) {
    Path outPath;
    Configuration conf = job.getConfiguration();
    int iteration = MRReasoningUtils.getCurrentIteration(conf);
    if (intermediate) {
        outPath = MRReasoningUtils.getOutputPath(conf,
            MRReasoningUtils.OUTPUT_BASE + iteration
            + MRReasoningUtils.TEMP_SUFFIX);
    }
    else {
        outPath = MRReasoningUtils.getOutputPath(conf,
            MRReasoningUtils.OUTPUT_BASE + iteration);
    }
    SequenceFileOutputFormat.setOutputPath(job, outPath);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT,
        SequenceFileOutputFormat.class, Fact.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT,
        SequenceFileOutputFormat.class, Fact.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT,
        SequenceFileOutputFormat.class, Fact.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT,
        SequenceFileOutputFormat.class, Derivation.class, NullWritable.class);
    MultipleOutputs.setCountersEnabled(job, true);
    // Set up an output for diagnostic info, if needed
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT,
        TextOutputFormat.class, Text.class, Text.class);
}
 
开发者ID:apache,项目名称:incubator-rya,代码行数:34,代码来源:AbstractReasoningTool.java

示例14: setupReducer

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
private void setupReducer(Path output, CubeSegment cubeSeg)
        throws IOException {
    FactDistinctColumnsReducerMapping reducerMapping = new FactDistinctColumnsReducerMapping(cubeSeg.getCubeInstance());
    int numberOfReducers = reducerMapping.getTotalReducerNum();
    if (numberOfReducers > 250) {
        throw new IllegalArgumentException(
                "The max reducer number for FactDistinctColumnsJob is 250, but now it is "
                        + numberOfReducers
                        + ", decrease 'kylin.engine.mr.uhc-reducer-count'");
    }

    job.setReducerClass(FactDistinctColumnsReducer.class);
    job.setPartitionerClass(FactDistinctColumnPartitioner.class);
    job.setNumReduceTasks(numberOfReducers);
    job.getConfiguration().setInt(BatchConstants.CFG_HLL_REDUCER_NUM, reducerMapping.getCuboidRowCounterReducerNum());

    // make each reducer output to respective dir
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_COLUMN, SequenceFileOutputFormat.class, NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, BytesWritable.class);
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_STATISTICS, SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class);
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class, NullWritable.class, LongWritable.class);

    FileOutputFormat.setOutputPath(job, output);
    job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());

    // prevent to create zero-sized default output
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

    deletePath(job.getConfiguration(), output);
}
 
开发者ID:apache,项目名称:kylin,代码行数:31,代码来源:FactDistinctColumnsJob.java

示例15: setupReducer

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入方法依赖的package包/类
private void setupReducer(Path output, int numberOfReducers) throws IOException {
    job.setReducerClass(UHCDictionaryReducer.class);
    job.setPartitionerClass(UHCDictionaryPartitioner.class);
    job.setNumReduceTasks(numberOfReducers);

    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, BytesWritable.class);
    FileOutputFormat.setOutputPath(job, output);
    job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());

    //prevent to create zero-sized default output
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

    deletePath(job.getConfiguration(), output);
}
 
开发者ID:apache,项目名称:kylin,代码行数:15,代码来源:UHCDictionaryJob.java


注:本文中的org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat.setOutputFormatClass方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。