当前位置: 首页>>代码示例>>Java>>正文


Java SequenceFileOutputFormat.setOutputCompressionType方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat.setOutputCompressionType方法的典型用法代码示例。如果您正苦于以下问题:Java SequenceFileOutputFormat.setOutputCompressionType方法的具体用法?Java SequenceFileOutputFormat.setOutputCompressionType怎么用?Java SequenceFileOutputFormat.setOutputCompressionType使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat的用法示例。


在下文中一共展示了SequenceFileOutputFormat.setOutputCompressionType方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: setStoreLocation

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public void setStoreLocation(String location, Job job)
    throws IOException {
  job.setOutputKeyClass(keyClass);
  job.setOutputValueClass(valueClass);
  if (compressionType != null && compressionCodecClass != null) {
    Class<? extends CompressionCodec> codecClass =
        FileOutputFormat.getOutputCompressorClass(job,
            DefaultCodec.class);
    SequenceFileOutputFormat.
        setOutputCompressorClass(job, codecClass);
    SequenceFileOutputFormat.setOutputCompressionType(job,
        SequenceFile.CompressionType.valueOf(compressionType));
  }
  FileOutputFormat.setOutputPath(job, new Path(location));
}
 
开发者ID:Hanmourang,项目名称:hiped2,代码行数:17,代码来源:SequenceFileStoreFunc.java

示例2: setTasksClasses

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
/**
 * Sets task classes with related info if needed into configuration object.
 *
 * @param job Configuration to change.
 * @param setMapper Option to set mapper and input format classes.
 * @param setCombiner Option to set combiner class.
 * @param setReducer Option to set reducer and output format classes.
 */
public static void setTasksClasses(Job job, boolean setMapper, boolean setCombiner, boolean setReducer,
        boolean outputCompression) {
    if (setMapper) {
        job.setMapperClass(HadoopWordCount2Mapper.class);
        job.setInputFormatClass(TextInputFormat.class);
    }

    if (setCombiner)
        job.setCombinerClass(HadoopWordCount2Combiner.class);

    if (setReducer) {
        job.setReducerClass(HadoopWordCount2Reducer.class);
        job.setOutputFormatClass(TextOutputFormat.class);
    }

    if (outputCompression) {
        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

        SequenceFileOutputFormat.setCompressOutput(job, true);

        job.getConfiguration().set(FileOutputFormat.COMPRESS_CODEC, SnappyCodec.class.getName());
    }
}
 
开发者ID:apache,项目名称:ignite,代码行数:34,代码来源:HadoopWordCount2.java

示例3: run

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {

  if (args.length != 2) {
    System.out.printf("Usage: CreateSequenceFile <input dir> <output dir>\n");
    return -1;
  }

  Job job = new Job(getConf());
  job.setJarByClass(CreateSequenceFile.class);
  job.setJobName("Create Sequence File");
  
  job.setNumReduceTasks(0);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  
  FileInputFormat.setInputPaths(job, new Path(args[0]));
  SequenceFileOutputFormat.setOutputPath(job, new Path(args[1]));
  
  FileOutputFormat.setCompressOutput(job,true);
  FileOutputFormat.setOutputCompressorClass(job,SnappyCodec.class);
  
  SequenceFileOutputFormat.setOutputCompressionType(job,
  CompressionType.BLOCK);
  boolean success = job.waitForCompletion(true);
  return success ? 0 : 1;
}
 
开发者ID:mellowonpsx,项目名称:cloudera-homework,代码行数:27,代码来源:CreateSequenceFile.java

示例4: createJob

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
public static Job createJob(Path[] inputPaths, Path outputPath, Map<String, String> metadata, Configuration config)
		throws IOException
{
	final Job job = new Job(config);

	job.setInputFormatClass(SequenceFileInputFormat.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(BytesWritable.class);
	job.setOutputFormatClass(MetadataSequenceFileOutputFormat.class);

	SequenceFileInputFormat.setInputPaths(job, inputPaths);
	SequenceFileOutputFormat.setOutputPath(job, outputPath);
	SequenceFileOutputFormat.setCompressOutput(job, true);
	SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
	SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

	if (metadata != null)
		MetadataConfiguration.setMetadata(metadata, job.getConfiguration());

	return job;
}
 
开发者ID:openimaj,项目名称:openimaj,代码行数:22,代码来源:TextBytesJobUtil.java

示例5: runPartitionerJob

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
private int runPartitionerJob() throws Exception
{
  Job partitionerJob = new Job(getConf(), "Partition Wikipedia");
  Configuration partitionerConf = partitionerJob.getConfiguration();
  partitionerConf.set("mapred.map.tasks.speculative.execution", "false");

  configurePartitionerJob(partitionerJob);
  
  List<Path> inputPaths = new ArrayList<Path>();
  SortedSet<String> languages = new TreeSet<String>();
  FileSystem fs = FileSystem.get(partitionerConf);
  Path parent = new Path(partitionerConf.get("wikipedia.input"));
  listFiles(parent, fs, inputPaths, languages);
  
  System.out.println("Input files in " + parent + ":" + inputPaths.size());
  Path[] inputPathsArray = new Path[inputPaths.size()];
  inputPaths.toArray(inputPathsArray);
  
  System.out.println("Languages:" + languages.size());

  // setup input format
  
  WikipediaInputFormat.setInputPaths(partitionerJob, inputPathsArray);
  
  partitionerJob.setMapperClass(WikipediaPartitioner.class);
  partitionerJob.setNumReduceTasks(0);

  // setup output format
  partitionerJob.setMapOutputKeyClass(Text.class);
  partitionerJob.setMapOutputValueClass(Article.class);
  partitionerJob.setOutputKeyClass(Text.class);
  partitionerJob.setOutputValueClass(Article.class);
  partitionerJob.setOutputFormatClass(SequenceFileOutputFormat.class);
  Path outputDir = WikipediaConfiguration.getPartitionedArticlesPath(partitionerConf);
  SequenceFileOutputFormat.setOutputPath(partitionerJob, outputDir);
  SequenceFileOutputFormat.setCompressOutput(partitionerJob, true);
  SequenceFileOutputFormat.setOutputCompressionType(partitionerJob, CompressionType.RECORD);
  
  return partitionerJob.waitForCompletion(true) ? 0 : 1;
}
 
开发者ID:apache,项目名称:accumulo-wikisearch,代码行数:41,代码来源:WikipediaPartitionedIngester.java

示例6: run

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
/**
   * Write the sequence file.
   *
   * @param args the command-line arguments
   * @return the process exit code
   * @throws Exception if something goes wrong
   */
  public int run(final String[] args) throws Exception {

    Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
    int result = cli.runCmd();

    if (result != 0) {
      return result;
    }

    Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
    Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));

    Configuration conf = super.getConf();

    Job job = new Job(conf);
    job.setJarByClass(SequenceFileProtobufMapReduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Stock.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(PbMapper.class);
    job.setReducerClass(PbReducer.class);

    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);

  ProtobufSerialization.register(job.getConfiguration());

  FileInputFormat.setInputPaths(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  if (job.waitForCompletion(true)) {
    return 0;
  }
  return 1;
}
 
开发者ID:Hanmourang,项目名称:hiped2,代码行数:46,代码来源:SequenceFileProtobufMapReduce.java

示例7: run

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
/**
 * Write the sequence file.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {

  Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
  Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));

  Configuration conf = super.getConf();

  Job job = new Job(conf);
  job.setJarByClass(SequenceFileStockMapReduce.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(StockPriceWritable.class);
  job.setInputFormatClass(
      SequenceFileInputFormat.class); //<co id="ch03_comment_seqfile_mr1"/>
  job.setOutputFormatClass(SequenceFileOutputFormat.class);  //<co id="ch03_comment_seqfile_mr2"/>
  SequenceFileOutputFormat.setCompressOutput(job, true);  //<co id="ch03_comment_seqfile_mr3"/>
  SequenceFileOutputFormat.setOutputCompressionType(job,  //<co id="ch03_comment_seqfile_mr4"/>
      SequenceFile.CompressionType.BLOCK);
  SequenceFileOutputFormat.setOutputCompressorClass(job,  //<co id="ch03_comment_seqfile_mr5"/>
      DefaultCodec.class);

  FileInputFormat.setInputPaths(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  if (job.waitForCompletion(true)) {
    return 0;
  }
  return 1;
}
 
开发者ID:Hanmourang,项目名称:hiped2,代码行数:43,代码来源:SequenceFileStockMapReduce.java

示例8: setupOutput

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
protected void setupOutput(final Job job, final SampleDataForSplitPoints operation, final Store store) throws IOException {
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, new Path(operation.getOutputPath()));
    if (null != operation.getCompressionCodec()) {
        if (GzipCodec.class.isAssignableFrom(operation.getCompressionCodec()) && !NativeCodeLoader.isNativeCodeLoaded() && !ZlibFactory.isNativeZlibLoaded(job.getConfiguration())) {
            LOGGER.warn("SequenceFile doesn't work with GzipCodec without native-hadoop code!");
        } else {
            SequenceFileOutputFormat.setCompressOutput(job, true);
            SequenceFileOutputFormat.setOutputCompressorClass(job, operation.getCompressionCodec());
            SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
        }
    }
}
 
开发者ID:gchq,项目名称:Gaffer,代码行数:14,代码来源:AccumuloSampleDataForSplitPointsJobFactory.java

示例9: run

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
	final HadoopDownloaderOptions options = new HadoopDownloaderOptions(args);
	options.prepare(true);

	final Job job = new Job(getConf());

	job.setJarByClass(HadoopDownloader.class);
	job.setJobName("Hadoop Downloader Utility");

	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(BytesWritable.class);

	if (options.getNumberOfThreads() <= 1) {
		job.setMapperClass(DownloadMapper.class);
	} else {
		job.setMapperClass(MultithreadedMapper.class);
		MultithreadedMapper.setMapperClass(job, DownloadMapper.class);
		MultithreadedMapper.setNumberOfThreads(job, options.getNumberOfThreads());
	}

	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(SequenceFileOutputFormat.class);

	job.setNumReduceTasks(options.getNumberOfReducers());

	job.getConfiguration().setStrings(ARGS_KEY, args);

	FileInputFormat.setInputPaths(job, options.getInputPaths());
	SequenceFileOutputFormat.setOutputPath(job, options.getOutputPath());
	SequenceFileOutputFormat.setCompressOutput(job, true);
	SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
	SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

	job.waitForCompletion(true);

	return 0;
}
 
开发者ID:openimaj,项目名称:openimaj,代码行数:39,代码来源:HadoopDownloader.java

示例10: runDictionaryJobSampling

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
protected boolean runDictionaryJobSampling() throws IOException, ClassNotFoundException, InterruptedException {
	boolean jobOK;
	Job job = null;

	// if input path does not exists, fail
	if (!this.inputFS.exists(this.conf.getInputPath())) {
		System.out.println("Dictionary input path does not exist: " + this.conf.getInputPath());
		System.exit(-1);
	}

	// if samples path exists...
	if (this.dictionaryFS.exists(this.conf.getDictionarySamplesPath())) {
		if (this.conf.getDeleteDictionarySamplesPath()) { // ... and option provided, delete recursively
			this.dictionaryFS.delete(this.conf.getDictionarySamplesPath(), true);
		} else { // ... and option not provided, fail
			System.out.println("Dictionary samples path does exist: " + this.conf.getDictionarySamplesPath());
			System.out.println("Select other path or use option -ds to overwrite");
			System.exit(-1);
		}
	}

	// Job to create a SequenceInputFormat with Roles
	job = new Job(this.conf.getConfigurationObject(), this.conf.getDictionaryJobName() + " phase 1");
	job.setJarByClass(HDTBuilderDriver.class);

	System.out.println("input = " + this.conf.getInputPath());
	System.out.println("samples = " + this.conf.getDictionarySamplesPath());

	FileInputFormat.addInputPath(job, this.conf.getInputPath());
	FileOutputFormat.setOutputPath(job, this.conf.getDictionarySamplesPath());

	job.setInputFormatClass(LzoTextInputFormat.class);
	LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

	job.setMapperClass(DictionarySamplerMapper.class);
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(Text.class);
	job.setCombinerClass(DictionarySamplerReducer.class);
	job.setReducerClass(DictionarySamplerReducer.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(Text.class);

	job.setNumReduceTasks(this.conf.getDictionarySampleReducers());

	SequenceFileOutputFormat.setCompressOutput(job, true);
	SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
	SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

	jobOK = job.waitForCompletion(true);

	return jobOK;
}
 
开发者ID:rdfhdt,项目名称:hdt-mr,代码行数:53,代码来源:HDTBuilderDriver.java

示例11: runDictionaryJob

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
protected boolean runDictionaryJob() throws ClassNotFoundException, IOException, InterruptedException, URISyntaxException {
	boolean jobOK;
	Job job = null;
	BufferedWriter bufferedWriter;

	// if output path exists...
	if (this.dictionaryFS.exists(this.conf.getDictionaryOutputPath())) {
		if (this.conf.getDeleteDictionaryOutputPath()) { // ... and option provided, delete recursively
			this.dictionaryFS.delete(this.conf.getDictionaryOutputPath(), true);
		} else { // ... and option not provided, fail
			System.out.println("Dictionary output path does exist: " + this.conf.getDictionaryOutputPath());
			System.out.println("Select other path or use option -dd to overwrite");
			System.exit(-1);
		}
	}

	// Sample the SequenceInputFormat to do TotalSort and create final output
	job = new Job(this.conf.getConfigurationObject(), this.conf.getDictionaryJobName() + " phase 2");

	job.setJarByClass(HDTBuilderDriver.class);

	System.out.println("samples = " + this.conf.getDictionarySamplesPath());
	System.out.println("output = " + this.conf.getDictionaryOutputPath());

	FileInputFormat.addInputPath(job, this.conf.getDictionarySamplesPath());
	FileOutputFormat.setOutputPath(job, this.conf.getDictionaryOutputPath());

	job.setInputFormatClass(SequenceFileInputFormat.class);
	LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

	// Identity Mapper
	// job.setMapperClass(Mapper.class);
	job.setCombinerClass(DictionaryCombiner.class);
	job.setPartitionerClass(TotalOrderPartitioner.class);
	job.setReducerClass(DictionaryReducer.class);

	job.setNumReduceTasks(this.conf.getDictionaryReducers());

	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(Text.class);

	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(NullWritable.class);

	System.out.println("Sampling started");
	InputSampler.writePartitionFile(job, new InputSampler.IntervalSampler<Text, Text>(this.conf.getSampleProbability()));
	String partitionFile = TotalOrderPartitioner.getPartitionFile(job.getConfiguration());
	URI partitionUri = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH);
	DistributedCache.addCacheFile(partitionUri, job.getConfiguration());
	DistributedCache.createSymlink(job.getConfiguration());
	System.out.println("Sampling finished");

	MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SHARED, SequenceFileOutputFormat.class, Text.class, NullWritable.class);
	MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SUBJECTS, SequenceFileOutputFormat.class, Text.class, NullWritable.class);
	MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.PREDICATES, SequenceFileOutputFormat.class, Text.class, NullWritable.class);
	MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.OBJECTS, SequenceFileOutputFormat.class, Text.class, NullWritable.class);

	SequenceFileOutputFormat.setCompressOutput(job, true);
	SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
	SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

	jobOK = job.waitForCompletion(true);

	this.numShared = job.getCounters().findCounter(Counters.Shared).getValue();
	this.numSubjects = job.getCounters().findCounter(Counters.Subjects).getValue();
	this.numPredicates = job.getCounters().findCounter(Counters.Predicates).getValue();
	this.numObjects = job.getCounters().findCounter(Counters.Objects).getValue();

	bufferedWriter = new BufferedWriter(new OutputStreamWriter(this.dictionaryFS.create(this.conf.getDictionaryCountersFile())));

	bufferedWriter.write(HDTBuilderConfiguration.SHARED + "=" + this.numShared + "\n");
	bufferedWriter.write(HDTBuilderConfiguration.SUBJECTS + "=" + this.numSubjects + "\n");
	bufferedWriter.write(HDTBuilderConfiguration.PREDICATES + "=" + this.numPredicates + "\n");
	bufferedWriter.write(HDTBuilderConfiguration.OBJECTS + "=" + this.numObjects + "\n");

	bufferedWriter.close();

	return jobOK;
}
 
开发者ID:rdfhdt,项目名称:hdt-mr,代码行数:80,代码来源:HDTBuilderDriver.java

示例12: runTriplesJobSampling

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
protected boolean runTriplesJobSampling() throws ClassNotFoundException, IOException, InterruptedException {
	Job job = null;
	boolean jobOK;
	BufferedWriter bufferedWriter;

	// if input path does not exists, fail
	if (!this.inputFS.exists(this.conf.getInputPath())) {
		System.out.println("Dictionary input path does not exist: " + this.conf.getInputPath());
		System.exit(-1);
	}

	// if dictionary output path does not exists, fail
	if (!this.dictionaryFS.exists(this.conf.getInputPath())) {
		System.out.println("Dictionary output path does not exist: " + this.conf.getInputPath());
		System.exit(-1);
	}

	// if samples path exists, fail
	if (this.dictionaryFS.exists(this.conf.getTriplesSamplesPath())) {
		if (this.conf.getDeleteTriplesSamplesPath()) { // ... and option
			// provided, delete
			// recursively
			this.dictionaryFS.delete(this.conf.getTriplesSamplesPath(), true);
		} else { // ... and option not provided, fail
			System.out.println("Triples samples path does exist: " + this.conf.getTriplesSamplesPath());
			System.out.println("Select other path or use option -dst to overwrite");
			System.exit(-1);
		}
	}

	this.conf.setProperty("mapred.child.java.opts", "-XX:ErrorFile=/home/hadoop/tmp/hs_err_pid%p.log -Xmx2500m");

	// Job to create a SequenceInputFormat
	job = new Job(this.conf.getConfigurationObject(), this.conf.getTriplesJobName() + " phase 1");

	job.setJarByClass(HDTBuilderDriver.class);

	FileInputFormat.addInputPath(job, this.conf.getInputPath());
	FileOutputFormat.setOutputPath(job, this.conf.getTriplesSamplesPath());

	job.setInputFormatClass(LzoTextInputFormat.class);
	LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

	job.setMapperClass(TriplesSPOMapper.class);
	job.setSortComparatorClass(TripleSPOComparator.class);
	job.setGroupingComparatorClass(TripleSPOComparator.class);
	job.setMapOutputKeyClass(TripleSPOWritable.class);
	job.setMapOutputValueClass(NullWritable.class);
	job.setOutputKeyClass(TripleSPOWritable.class);
	job.setOutputValueClass(NullWritable.class);

	job.setNumReduceTasks(this.conf.getTriplesReducers());

	DistributedCache.addCacheFile(this.conf.getDictionaryFile().toUri(), job.getConfiguration());

	SequenceFileOutputFormat.setCompressOutput(job, true);
	SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
	SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

	jobOK = job.waitForCompletion(true);

	this.numTriples = job.getCounters().findCounter(Counters.Triples).getValue();
	bufferedWriter = new BufferedWriter(new OutputStreamWriter(this.triplesFS.create(this.conf.getTriplesCountersFile())));
	bufferedWriter.write(this.numTriples.toString() + "\n");
	bufferedWriter.close();

	return jobOK;
}
 
开发者ID:rdfhdt,项目名称:hdt-mr,代码行数:69,代码来源:HDTBuilderDriver.java

示例13: runTriplesJob

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
protected boolean runTriplesJob() throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
	Job job = null;
	boolean jobOK;

	// if triples output path exists...
	if (this.triplesFS.exists(this.conf.getTriplesOutputPath())) {
		if (this.conf.getDeleteTriplesOutputPath()) { // ... and option provided, delete recursively
			this.triplesFS.delete(this.conf.getTriplesOutputPath(), true);
		} else { // ... and option not provided, fail
			System.out.println("Triples output path does exist: " + this.conf.getTriplesOutputPath());
			System.out.println("Select other path or use option -dt to overwrite");
			System.exit(-1);
		}
	}

	job = new Job(this.conf.getConfigurationObject(), this.conf.getTriplesJobName() + " phase 2");

	job.setJarByClass(HDTBuilderDriver.class);

	FileInputFormat.addInputPath(job, this.conf.getTriplesSamplesPath());
	FileOutputFormat.setOutputPath(job, this.conf.getTriplesOutputPath());

	job.setInputFormatClass(SequenceFileInputFormat.class);
	LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

	job.setSortComparatorClass(TripleSPOComparator.class);
	job.setGroupingComparatorClass(TripleSPOComparator.class);

	job.setPartitionerClass(TotalOrderPartitioner.class);

	job.setOutputKeyClass(TripleSPOWritable.class);
	job.setOutputValueClass(NullWritable.class);

	job.setNumReduceTasks(this.conf.getTriplesReducers());

	System.out.println("Sampling started");
	InputSampler.writePartitionFile(job, new InputSampler.IntervalSampler<Text, Text>(this.conf.getSampleProbability()));
	String partitionFile = TotalOrderPartitioner.getPartitionFile(job.getConfiguration());
	URI partitionUri = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH);
	DistributedCache.addCacheFile(partitionUri, job.getConfiguration());
	DistributedCache.createSymlink(job.getConfiguration());
	System.out.println("Sampling finished");

	SequenceFileOutputFormat.setCompressOutput(job, true);
	SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
	SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

	jobOK = job.waitForCompletion(true);

	return jobOK;
}
 
开发者ID:rdfhdt,项目名称:hdt-mr,代码行数:52,代码来源:HDTBuilderDriver.java


注:本文中的org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat.setOutputCompressionType方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。