本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat.setOutputCompressorClass方法的典型用法代码示例。如果您正苦于以下问题:Java SequenceFileOutputFormat.setOutputCompressorClass方法的具体用法?Java SequenceFileOutputFormat.setOutputCompressorClass怎么用?Java SequenceFileOutputFormat.setOutputCompressorClass使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
的用法示例。
在下文中一共展示了SequenceFileOutputFormat.setOutputCompressorClass方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: setStoreLocation
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public void setStoreLocation(String location, Job job)
throws IOException {
job.setOutputKeyClass(keyClass);
job.setOutputValueClass(valueClass);
if (compressionType != null && compressionCodecClass != null) {
Class<? extends CompressionCodec> codecClass =
FileOutputFormat.getOutputCompressorClass(job,
DefaultCodec.class);
SequenceFileOutputFormat.
setOutputCompressorClass(job, codecClass);
SequenceFileOutputFormat.setOutputCompressionType(job,
SequenceFile.CompressionType.valueOf(compressionType));
}
FileOutputFormat.setOutputPath(job, new Path(location));
}
示例2: createJob
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
public static Job createJob(Path[] inputPaths, Path outputPath, Map<String, String> metadata, Configuration config)
throws IOException
{
final Job job = new Job(config);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(BytesWritable.class);
job.setOutputFormatClass(MetadataSequenceFileOutputFormat.class);
SequenceFileInputFormat.setInputPaths(job, inputPaths);
SequenceFileOutputFormat.setOutputPath(job, outputPath);
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
if (metadata != null)
MetadataConfiguration.setMetadata(metadata, job.getConfiguration());
return job;
}
示例3: run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
/**
* Write the sequence file.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));
Configuration conf = super.getConf();
Job job = new Job(conf);
job.setJarByClass(SequenceFileProtobufMapReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Stock.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setMapperClass(PbMapper.class);
job.setReducerClass(PbReducer.class);
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
ProtobufSerialization.register(job.getConfiguration());
FileInputFormat.setInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
if (job.waitForCompletion(true)) {
return 0;
}
return 1;
}
示例4: run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
/**
* Write the sequence file.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));
Configuration conf = super.getConf();
Job job = new Job(conf);
job.setJarByClass(SequenceFileStockMapReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(StockPriceWritable.class);
job.setInputFormatClass(
SequenceFileInputFormat.class); //<co id="ch03_comment_seqfile_mr1"/>
job.setOutputFormatClass(SequenceFileOutputFormat.class); //<co id="ch03_comment_seqfile_mr2"/>
SequenceFileOutputFormat.setCompressOutput(job, true); //<co id="ch03_comment_seqfile_mr3"/>
SequenceFileOutputFormat.setOutputCompressionType(job, //<co id="ch03_comment_seqfile_mr4"/>
SequenceFile.CompressionType.BLOCK);
SequenceFileOutputFormat.setOutputCompressorClass(job, //<co id="ch03_comment_seqfile_mr5"/>
DefaultCodec.class);
FileInputFormat.setInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
if (job.waitForCompletion(true)) {
return 0;
}
return 1;
}
示例5: setupOutput
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
protected void setupOutput(final Job job, final SampleDataForSplitPoints operation, final Store store) throws IOException {
job.setOutputFormatClass(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputPath(job, new Path(operation.getOutputPath()));
if (null != operation.getCompressionCodec()) {
if (GzipCodec.class.isAssignableFrom(operation.getCompressionCodec()) && !NativeCodeLoader.isNativeCodeLoaded() && !ZlibFactory.isNativeZlibLoaded(job.getConfiguration())) {
LOGGER.warn("SequenceFile doesn't work with GzipCodec without native-hadoop code!");
} else {
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, operation.getCompressionCodec());
SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
}
}
}
示例6: run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
final HadoopDownloaderOptions options = new HadoopDownloaderOptions(args);
options.prepare(true);
final Job job = new Job(getConf());
job.setJarByClass(HadoopDownloader.class);
job.setJobName("Hadoop Downloader Utility");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(BytesWritable.class);
if (options.getNumberOfThreads() <= 1) {
job.setMapperClass(DownloadMapper.class);
} else {
job.setMapperClass(MultithreadedMapper.class);
MultithreadedMapper.setMapperClass(job, DownloadMapper.class);
MultithreadedMapper.setNumberOfThreads(job, options.getNumberOfThreads());
}
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setNumReduceTasks(options.getNumberOfReducers());
job.getConfiguration().setStrings(ARGS_KEY, args);
FileInputFormat.setInputPaths(job, options.getInputPaths());
SequenceFileOutputFormat.setOutputPath(job, options.getOutputPath());
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
job.waitForCompletion(true);
return 0;
}
示例7: runDictionaryJobSampling
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
protected boolean runDictionaryJobSampling() throws IOException, ClassNotFoundException, InterruptedException {
boolean jobOK;
Job job = null;
// if input path does not exists, fail
if (!this.inputFS.exists(this.conf.getInputPath())) {
System.out.println("Dictionary input path does not exist: " + this.conf.getInputPath());
System.exit(-1);
}
// if samples path exists...
if (this.dictionaryFS.exists(this.conf.getDictionarySamplesPath())) {
if (this.conf.getDeleteDictionarySamplesPath()) { // ... and option provided, delete recursively
this.dictionaryFS.delete(this.conf.getDictionarySamplesPath(), true);
} else { // ... and option not provided, fail
System.out.println("Dictionary samples path does exist: " + this.conf.getDictionarySamplesPath());
System.out.println("Select other path or use option -ds to overwrite");
System.exit(-1);
}
}
// Job to create a SequenceInputFormat with Roles
job = new Job(this.conf.getConfigurationObject(), this.conf.getDictionaryJobName() + " phase 1");
job.setJarByClass(HDTBuilderDriver.class);
System.out.println("input = " + this.conf.getInputPath());
System.out.println("samples = " + this.conf.getDictionarySamplesPath());
FileInputFormat.addInputPath(job, this.conf.getInputPath());
FileOutputFormat.setOutputPath(job, this.conf.getDictionarySamplesPath());
job.setInputFormatClass(LzoTextInputFormat.class);
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
job.setMapperClass(DictionarySamplerMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setCombinerClass(DictionarySamplerReducer.class);
job.setReducerClass(DictionarySamplerReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(this.conf.getDictionarySampleReducers());
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
jobOK = job.waitForCompletion(true);
return jobOK;
}
示例8: runDictionaryJob
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
protected boolean runDictionaryJob() throws ClassNotFoundException, IOException, InterruptedException, URISyntaxException {
boolean jobOK;
Job job = null;
BufferedWriter bufferedWriter;
// if output path exists...
if (this.dictionaryFS.exists(this.conf.getDictionaryOutputPath())) {
if (this.conf.getDeleteDictionaryOutputPath()) { // ... and option provided, delete recursively
this.dictionaryFS.delete(this.conf.getDictionaryOutputPath(), true);
} else { // ... and option not provided, fail
System.out.println("Dictionary output path does exist: " + this.conf.getDictionaryOutputPath());
System.out.println("Select other path or use option -dd to overwrite");
System.exit(-1);
}
}
// Sample the SequenceInputFormat to do TotalSort and create final output
job = new Job(this.conf.getConfigurationObject(), this.conf.getDictionaryJobName() + " phase 2");
job.setJarByClass(HDTBuilderDriver.class);
System.out.println("samples = " + this.conf.getDictionarySamplesPath());
System.out.println("output = " + this.conf.getDictionaryOutputPath());
FileInputFormat.addInputPath(job, this.conf.getDictionarySamplesPath());
FileOutputFormat.setOutputPath(job, this.conf.getDictionaryOutputPath());
job.setInputFormatClass(SequenceFileInputFormat.class);
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
// Identity Mapper
// job.setMapperClass(Mapper.class);
job.setCombinerClass(DictionaryCombiner.class);
job.setPartitionerClass(TotalOrderPartitioner.class);
job.setReducerClass(DictionaryReducer.class);
job.setNumReduceTasks(this.conf.getDictionaryReducers());
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
System.out.println("Sampling started");
InputSampler.writePartitionFile(job, new InputSampler.IntervalSampler<Text, Text>(this.conf.getSampleProbability()));
String partitionFile = TotalOrderPartitioner.getPartitionFile(job.getConfiguration());
URI partitionUri = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH);
DistributedCache.addCacheFile(partitionUri, job.getConfiguration());
DistributedCache.createSymlink(job.getConfiguration());
System.out.println("Sampling finished");
MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SHARED, SequenceFileOutputFormat.class, Text.class, NullWritable.class);
MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SUBJECTS, SequenceFileOutputFormat.class, Text.class, NullWritable.class);
MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.PREDICATES, SequenceFileOutputFormat.class, Text.class, NullWritable.class);
MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.OBJECTS, SequenceFileOutputFormat.class, Text.class, NullWritable.class);
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
jobOK = job.waitForCompletion(true);
this.numShared = job.getCounters().findCounter(Counters.Shared).getValue();
this.numSubjects = job.getCounters().findCounter(Counters.Subjects).getValue();
this.numPredicates = job.getCounters().findCounter(Counters.Predicates).getValue();
this.numObjects = job.getCounters().findCounter(Counters.Objects).getValue();
bufferedWriter = new BufferedWriter(new OutputStreamWriter(this.dictionaryFS.create(this.conf.getDictionaryCountersFile())));
bufferedWriter.write(HDTBuilderConfiguration.SHARED + "=" + this.numShared + "\n");
bufferedWriter.write(HDTBuilderConfiguration.SUBJECTS + "=" + this.numSubjects + "\n");
bufferedWriter.write(HDTBuilderConfiguration.PREDICATES + "=" + this.numPredicates + "\n");
bufferedWriter.write(HDTBuilderConfiguration.OBJECTS + "=" + this.numObjects + "\n");
bufferedWriter.close();
return jobOK;
}
示例9: runTriplesJobSampling
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
protected boolean runTriplesJobSampling() throws ClassNotFoundException, IOException, InterruptedException {
Job job = null;
boolean jobOK;
BufferedWriter bufferedWriter;
// if input path does not exists, fail
if (!this.inputFS.exists(this.conf.getInputPath())) {
System.out.println("Dictionary input path does not exist: " + this.conf.getInputPath());
System.exit(-1);
}
// if dictionary output path does not exists, fail
if (!this.dictionaryFS.exists(this.conf.getInputPath())) {
System.out.println("Dictionary output path does not exist: " + this.conf.getInputPath());
System.exit(-1);
}
// if samples path exists, fail
if (this.dictionaryFS.exists(this.conf.getTriplesSamplesPath())) {
if (this.conf.getDeleteTriplesSamplesPath()) { // ... and option
// provided, delete
// recursively
this.dictionaryFS.delete(this.conf.getTriplesSamplesPath(), true);
} else { // ... and option not provided, fail
System.out.println("Triples samples path does exist: " + this.conf.getTriplesSamplesPath());
System.out.println("Select other path or use option -dst to overwrite");
System.exit(-1);
}
}
this.conf.setProperty("mapred.child.java.opts", "-XX:ErrorFile=/home/hadoop/tmp/hs_err_pid%p.log -Xmx2500m");
// Job to create a SequenceInputFormat
job = new Job(this.conf.getConfigurationObject(), this.conf.getTriplesJobName() + " phase 1");
job.setJarByClass(HDTBuilderDriver.class);
FileInputFormat.addInputPath(job, this.conf.getInputPath());
FileOutputFormat.setOutputPath(job, this.conf.getTriplesSamplesPath());
job.setInputFormatClass(LzoTextInputFormat.class);
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
job.setMapperClass(TriplesSPOMapper.class);
job.setSortComparatorClass(TripleSPOComparator.class);
job.setGroupingComparatorClass(TripleSPOComparator.class);
job.setMapOutputKeyClass(TripleSPOWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(TripleSPOWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setNumReduceTasks(this.conf.getTriplesReducers());
DistributedCache.addCacheFile(this.conf.getDictionaryFile().toUri(), job.getConfiguration());
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
jobOK = job.waitForCompletion(true);
this.numTriples = job.getCounters().findCounter(Counters.Triples).getValue();
bufferedWriter = new BufferedWriter(new OutputStreamWriter(this.triplesFS.create(this.conf.getTriplesCountersFile())));
bufferedWriter.write(this.numTriples.toString() + "\n");
bufferedWriter.close();
return jobOK;
}
示例10: runTriplesJob
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
protected boolean runTriplesJob() throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
Job job = null;
boolean jobOK;
// if triples output path exists...
if (this.triplesFS.exists(this.conf.getTriplesOutputPath())) {
if (this.conf.getDeleteTriplesOutputPath()) { // ... and option provided, delete recursively
this.triplesFS.delete(this.conf.getTriplesOutputPath(), true);
} else { // ... and option not provided, fail
System.out.println("Triples output path does exist: " + this.conf.getTriplesOutputPath());
System.out.println("Select other path or use option -dt to overwrite");
System.exit(-1);
}
}
job = new Job(this.conf.getConfigurationObject(), this.conf.getTriplesJobName() + " phase 2");
job.setJarByClass(HDTBuilderDriver.class);
FileInputFormat.addInputPath(job, this.conf.getTriplesSamplesPath());
FileOutputFormat.setOutputPath(job, this.conf.getTriplesOutputPath());
job.setInputFormatClass(SequenceFileInputFormat.class);
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
job.setSortComparatorClass(TripleSPOComparator.class);
job.setGroupingComparatorClass(TripleSPOComparator.class);
job.setPartitionerClass(TotalOrderPartitioner.class);
job.setOutputKeyClass(TripleSPOWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setNumReduceTasks(this.conf.getTriplesReducers());
System.out.println("Sampling started");
InputSampler.writePartitionFile(job, new InputSampler.IntervalSampler<Text, Text>(this.conf.getSampleProbability()));
String partitionFile = TotalOrderPartitioner.getPartitionFile(job.getConfiguration());
URI partitionUri = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH);
DistributedCache.addCacheFile(partitionUri, job.getConfiguration());
DistributedCache.createSymlink(job.getConfiguration());
System.out.println("Sampling finished");
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
jobOK = job.waitForCompletion(true);
return jobOK;
}