当前位置: 首页>>代码示例>>Java>>正文


Java LazyOutputFormat类代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat的典型用法代码示例。如果您正苦于以下问题:Java LazyOutputFormat类的具体用法?Java LazyOutputFormat怎么用?Java LazyOutputFormat使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


LazyOutputFormat类属于org.apache.hadoop.mapreduce.lib.output包,在下文中一共展示了LazyOutputFormat类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: runTestLazyOutput

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入依赖的package包/类
private static void runTestLazyOutput(Configuration conf, Path output,
    int numReducers, boolean createLazily) 
throws Exception {
  Job job = Job.getInstance(conf, "Test-Lazy-Output");

  FileInputFormat.setInputPaths(job, INPUT);
  FileOutputFormat.setOutputPath(job, output);

  job.setJarByClass(TestMapReduceLazyOutput.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(numReducers);

  job.setMapperClass(TestMapper.class);
  job.setReducerClass(TestReducer.class);

  if (createLazily) {
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
  } else {
    job.setOutputFormatClass(TextOutputFormat.class);
  }
  assertTrue(job.waitForCompletion(true));
}
 
开发者ID:aliyun-beta,项目名称:aliyun-oss-hadoop-fs,代码行数:25,代码来源:TestMapReduceLazyOutput.java

示例2: runTestLazyOutput

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入依赖的package包/类
private static void runTestLazyOutput(Configuration conf, Path output,
    int numReducers, boolean createLazily) 
throws Exception {
  Job job = new Job(conf, "Test-Lazy-Output");

  FileInputFormat.setInputPaths(job, INPUT);
  FileOutputFormat.setOutputPath(job, output);

  job.setJarByClass(TestMapReduceLazyOutput.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(numReducers);

  job.setMapperClass(TestMapper.class);
  job.setReducerClass(TestReducer.class);

  if (createLazily) {
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
  } else {
    job.setOutputFormatClass(TextOutputFormat.class);
  }
  assertTrue(job.waitForCompletion(true));
}
 
开发者ID:Nextzero,项目名称:hadoop-2.6.0-cdh5.4.3,代码行数:25,代码来源:TestMapReduceLazyOutput.java

示例3: configureTextOutput

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入依赖的package包/类
/**
 * Set up a MapReduce job to output human-readable text.
 */
protected void configureTextOutput(String destination) {
    Path outPath;
    outPath = MRReasoningUtils.getOutputPath(job.getConfiguration(), destination);
    TextOutputFormat.setOutputPath(job, outPath);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT,
        TextOutputFormat.class, NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT,
        TextOutputFormat.class, NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT,
        TextOutputFormat.class, NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT,
        TextOutputFormat.class, NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT,
        TextOutputFormat.class, Text.class, Text.class);
    MultipleOutputs.setCountersEnabled(job, true);
}
 
开发者ID:apache,项目名称:incubator-rya,代码行数:21,代码来源:AbstractReasoningTool.java

示例4: ensureJobSet

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入依赖的package包/类
private static void ensureJobSet(Job job) {
    if (job.getConfiguration().getBoolean(MULTIREDUCERS_HAVE_OUTPUT_FORMAT, false)) {
        // we need to use the TextOutputFormat, since otherwise the FileOutputCommitter won't run
        LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
    } else {
        job.setOutputFormatClass(NullOutputFormat.class);
    }
    job.setOutputFormatClass(MultiOutputFormat.class);
    job.setReducerClass(MultiReducer.class);
    job.setMapperClass(MultiMapper.class);
    job.setMapOutputKeyClass(PerMapperOutputKey.class);
    job.setMapOutputValueClass(PerMapperOutputValue.class);
    job.setSortComparatorClass(MultiComparator.class);
    job.setPartitionerClass(MultiPartitioner.class);
    List<Class<?>> serializations = Arrays.asList(
            job.getConfiguration().getClasses(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY));
    if (serializations.indexOf(MultiSerializer.class) == -1) {
        appendTo(job, CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, MultiSerializer.class);
    }
    for (Class<?> aClass : job.getConfiguration().getClasses(MultiCombiner.CONF_KEY)) {
        if (!aClass.equals(Reducer.class)) {
            job.setCombinerClass(MultiCombiner.class);
        }
    }
}
 
开发者ID:elazarl,项目名称:multireducers,代码行数:26,代码来源:MultiJob.java

示例5: setOutputPath

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入依赖的package包/类
private MROutputConfigBuilder setOutputPath(String outputPath) {
  boolean passNewLazyOutputFormatCheck =
      (LazyOutputFormat.class.isAssignableFrom(outputFormat)) &&
      org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.class.
          isAssignableFrom(conf.getClass(
              MRJobConfig.LAZY_OUTPUTFORMAT_OUTPUTFORMAT, null));
  boolean passOldLazyOutputFormatCheck =
      (org.apache.hadoop.mapred.lib.LazyOutputFormat.class.
          isAssignableFrom(outputFormat)) &&
      FileOutputFormat.class.isAssignableFrom(conf.getClass(
          MRJobConfig.LAZY_OUTPUTFORMAT_OUTPUTFORMAT, null));

  if (!(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.class.
      isAssignableFrom(outputFormat) ||
      FileOutputFormat.class.isAssignableFrom(outputFormat) ||
      passNewLazyOutputFormatCheck || passOldLazyOutputFormatCheck)) {
    throw new TezUncheckedException("When setting outputPath the outputFormat must " +
        "be assignable from either org.apache.hadoop.mapred.FileOutputFormat or " +
        "org.apache.hadoop.mapreduce.lib.output.FileOutputFormat. " +
        "Otherwise use the non-path config builder." +
        " Given: " + outputFormat.getName());
  }
  conf.set(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.OUTDIR, outputPath);
  this.outputPath = outputPath;
  return this;
}
 
开发者ID:apache,项目名称:tez,代码行数:27,代码来源:MROutput.java

示例6: runAnnotatorSort

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入依赖的package包/类
private int runAnnotatorSort() throws Exception {

        BioJob job = BioJob.getInstance(conf);

        job.setJobName("GaeaAnnotatorSortResult");
        job.setJarByClass(this.getClass());
        job.setMapperClass(AnnotationSortMapper.class);
        job.setReducerClass(AnnotationSortReducer.class);
        job.setNumReduceTasks(sampleNames.size());

        job.setMapOutputKeyClass(PairWritable.class);
        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(Text.class);
        job.setInputFormatClass(TextInputFormat.class);
        LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

        Path inputPath = new Path(options.getTmpPath());
        Path outputPath = new Path(options.getOutputPath());
        FileInputFormat.setInputPaths(job, inputPath);
        FileOutputFormat.setOutputPath(job, outputPath);

        FileSystem fs = outputPath.getFileSystem(conf);
        if(job.waitForCompletion(true)){
            int loop = 0;
            for (String sampleName : sampleNames){
                Path outputPart = getSampleOutputPath(sampleName);
                while (outputPart == null && loop < 10){
                    TimeUnit.MILLISECONDS.sleep(6000);
                    outputPart = getSampleOutputPath(sampleName);
                    loop ++;
                }
                Path outputName = new Path(options.getOutputPath() + "/" + sampleName + ".tsv");
                fs.rename(outputPart, outputName);
            }
            return 0;
        }
        return 1;
    }
 
开发者ID:BGI-flexlab,项目名称:SOAPgaea,代码行数:41,代码来源:Annotator.java

示例7: makeJob

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入依赖的package包/类
public static Job makeJob(Configuration conf, Path in, Path out, String matchPath, long scanSince, 
		String chlorineConfigFilePath, String queue, String maskPath) throws IOException {
	conf.setBoolean("mapred.output.compress", false);
	conf.setLong("scanSince", scanSince);
	conf.set("matchPath", matchPath);
	conf.set("maskPath", maskPath);
	conf.set("inputPath", in.toString());
	if (queue != null) {
		conf.set("mapred.job.queue.name", queue);
	}
	conf.set("fs.permissions.umask-mode", 
			"007");
	conf.setInt("input_path_depth", in.depth());
	Job job = Job.getInstance(conf, "Chlorine_HDFS_Scan");
	job.setJarByClass(HDFSScanMR.class);
	if (chlorineConfigFilePath != null) {
		try {
			job.addCacheFile(new URI(chlorineConfigFilePath));
			conf.set("finder_file", (new File(chlorineConfigFilePath)).getName());
		} catch (URISyntaxException e) {
			LOG.error(e);
		}
	}
	job.setMapperClass(DeepScanMapper.class);
	job.setNumReduceTasks(0);
	job.setInputFormatClass(TextInputFormat.class);
	TextInputFormat.addInputPath(job, in);
	TextInputFormat.setInputDirRecursive(job, true);
	TextInputFormat.setInputPathFilter(job, NewFilesFilter.class);
	FileOutputFormat.setOutputPath(job, out);
	LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); 
	return job;
}
 
开发者ID:dataApps,项目名称:chlorine-hadoop,代码行数:34,代码来源:HDFSScanMR.java

示例8: configureJob

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入依赖的package包/类
/**
 * Job configurator
 *
 * @param job                      job instance
 * @param jarByClass               class of the jar
 * @param mapperClass              mapper
 * @param reducerClass             reducer
 * @param commaSeparatedInputFiles input paths
 * @param outputPath               output
 * @throws IOException I/O exception
 */
public static void configureJob(Job job, Class<?> jarByClass,
        Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass,
        String commaSeparatedInputFiles, String outputPath)
        throws IOException
{
    job.setJarByClass(jarByClass);
    job.setJobName(jarByClass.getName());

    // mapper
    job.setMapperClass(mapperClass);

    // reducer
    job.setReducerClass(reducerClass);

    // input-output is warc
    job.setInputFormatClass(WARCInputFormat.class);
    // prevent producing empty files
    LazyOutputFormat.setOutputFormatClass(job, WARCOutputFormat.class);

    // intermediate data
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(WARCWritable.class);

    // output data
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(WARCWritable.class);

    // set output compression to GZip
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
}
 
开发者ID:dkpro,项目名称:dkpro-c4corpus,代码行数:46,代码来源:ConfigurationHelper.java

示例9: run

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入依赖的package包/类
@Override
public int run(String[] args)
        throws Exception
{
    Job job = Job.getInstance(getConf());

    job.setJarByClass(Phase3Step1ExtractNearDupInfo.class);
    job.setJobName(Phase3Step1ExtractNearDupInfo.class.getName());

    // mapper
    job.setMapperClass(MapperClass.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(DocumentInfo.class);

    // reducer
    job.setReducerClass(DeDuplicationTextOutputReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(List.class);

    job.setInputFormatClass(WARCInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, DocumentInfoOutputFormat.class);

    // paths
    String commaSeparatedInputFiles = args[0];
    String outputPath = args[1];

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;

}
 
开发者ID:dkpro,项目名称:dkpro-c4corpus,代码行数:33,代码来源:Phase3Step1ExtractNearDupInfo.java

示例10: run

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入依赖的package包/类
@Override
public int run(String[] args)
        throws Exception
{
    Job job = Job.getInstance(getConf());

    job.setJarByClass(Phase3Step3NearDupTuplesCreation.class);
    job.setJobName(Phase3Step3NearDupTuplesCreation.class.getName());

    // mapper
    job.setMapperClass(CreateTuplesMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(TreeSet.class);

    job.setInputFormatClass(TextInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    // paths
    String commaSeparatedInputFiles = args[0];
    String outputPath = args[1];

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setNumReduceTasks(0); //must be added or the mapper wont be called

    return job.waitForCompletion(true) ? 0 : 1;
}
 
开发者ID:dkpro,项目名称:dkpro-c4corpus,代码行数:29,代码来源:Phase3Step3NearDupTuplesCreation.java

示例11: run

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入依赖的package包/类
@Override
public int run(String[] args)
        throws Exception
{
    Job job = Job.getInstance(getConf());

    job.setJarByClass(Phase3Step4LocalDeDuplication.class);
    job.setJobName(Phase3Step4LocalDeDuplication.class.getName());

    // paths
    String inputPath = args[0];
    // text files of ids to be deleted
    String outputPath = args[1];

    // input: reading max N lines for each mapper
    job.setInputFormatClass(NLineInputFormat.class);
    NLineInputFormat.addInputPath(job, new Path(inputPath));
    job.getConfiguration().setInt("mapreduce.input.lineinputformat.linespermap", LINES);

    // mapper
    job.setMapperClass(LocalGreedyDeDuplicationMapper.class);

    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    // reducer
    job.setReducerClass(IDCollectorReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}
 
开发者ID:dkpro,项目名称:dkpro-c4corpus,代码行数:35,代码来源:Phase3Step4LocalDeDuplication.java

示例12: run

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入依赖的package包/类
@Override
public int run(String[] args)
        throws Exception
{

    Job job = Job.getInstance(getConf());
    job.setJarByClass(Phase3Step2DistinctDataJob.class);
    job.setJobName(Phase3Step2DistinctDataJob.class.getName());

    //mapper
    job.setMapperClass(RemoveRedundantDataMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    //reducer
    job.setReducerClass(RemoveRedundantDataReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    //paths
    String commaSeparatedInputFiles = args[0];
    String outputPath = args[1];

    job.setInputFormatClass(TextInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    //i/o paths
    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}
 
开发者ID:dkpro,项目名称:dkpro-c4corpus,代码行数:33,代码来源:Phase3Step2DistinctDataJob.java

示例13: getBaseOutputFormatClass

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入依赖的package包/类
public static Class getBaseOutputFormatClass(final Job job) {
    try {
        if (LazyOutputFormat.class.isAssignableFrom(job.getOutputFormatClass())) {
            Class<OutputFormat> baseClass = (Class<OutputFormat>)
                    DEFAULT_COMPAT.getJobContextConfiguration(job).getClass(LazyOutputFormat.OUTPUT_FORMAT, null);
            return (null == baseClass) ? job.getOutputFormatClass() : baseClass;
        }
        return job.getOutputFormatClass();
    } catch (Exception e) {
        return null;
    }
}
 
开发者ID:graben1437,项目名称:titan1withtp3.1,代码行数:13,代码来源:FormatTools.java

示例14: configureSchemaOutput

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入依赖的package包/类
/**
 * Set up the MapReduce job to output a schema (TBox).
 */
protected void configureSchemaOutput() {
    Path outPath = MRReasoningUtils.getSchemaPath(job.getConfiguration());
    SequenceFileOutputFormat.setOutputPath(job, outPath);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(SchemaWritable.class);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
    MultipleOutputs.addNamedOutput(job, "schemaobj",
        SequenceFileOutputFormat.class, NullWritable.class, SchemaWritable.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT,
        TextOutputFormat.class, Text.class, Text.class);
    MultipleOutputs.setCountersEnabled(job, true);
}
 
开发者ID:apache,项目名称:incubator-rya,代码行数:17,代码来源:AbstractReasoningTool.java

示例15: configureDerivationOutput

import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; //导入依赖的package包/类
/**
 * Set up a MapReduce job to output newly derived triples.
 * @param   intermediate    True if this is intermediate data. Outputs
 *                          to [base]-[iteration]-[temp].
 */
protected void configureDerivationOutput(boolean intermediate) {
    Path outPath;
    Configuration conf = job.getConfiguration();
    int iteration = MRReasoningUtils.getCurrentIteration(conf);
    if (intermediate) {
        outPath = MRReasoningUtils.getOutputPath(conf,
            MRReasoningUtils.OUTPUT_BASE + iteration
            + MRReasoningUtils.TEMP_SUFFIX);
    }
    else {
        outPath = MRReasoningUtils.getOutputPath(conf,
            MRReasoningUtils.OUTPUT_BASE + iteration);
    }
    SequenceFileOutputFormat.setOutputPath(job, outPath);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT,
        SequenceFileOutputFormat.class, Fact.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT,
        SequenceFileOutputFormat.class, Fact.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT,
        SequenceFileOutputFormat.class, Fact.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT,
        SequenceFileOutputFormat.class, Derivation.class, NullWritable.class);
    MultipleOutputs.setCountersEnabled(job, true);
    // Set up an output for diagnostic info, if needed
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT,
        TextOutputFormat.class, Text.class, Text.class);
}
 
开发者ID:apache,项目名称:incubator-rya,代码行数:34,代码来源:AbstractReasoningTool.java


注:本文中的org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。