本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat.setOutputCompressionType方法的典型用法代码示例。如果您正苦于以下问题:Java SequenceFileOutputFormat.setOutputCompressionType方法的具体用法?Java SequenceFileOutputFormat.setOutputCompressionType怎么用?Java SequenceFileOutputFormat.setOutputCompressionType使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
的用法示例。
在下文中一共展示了SequenceFileOutputFormat.setOutputCompressionType方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: setStoreLocation
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public void setStoreLocation(String location, Job job)
throws IOException {
job.setOutputKeyClass(keyClass);
job.setOutputValueClass(valueClass);
if (compressionType != null && compressionCodecClass != null) {
Class<? extends CompressionCodec> codecClass =
FileOutputFormat.getOutputCompressorClass(job,
DefaultCodec.class);
SequenceFileOutputFormat.
setOutputCompressorClass(job, codecClass);
SequenceFileOutputFormat.setOutputCompressionType(job,
SequenceFile.CompressionType.valueOf(compressionType));
}
FileOutputFormat.setOutputPath(job, new Path(location));
}
示例2: setTasksClasses
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
/**
* Sets task classes with related info if needed into configuration object.
*
* @param job Configuration to change.
* @param setMapper Option to set mapper and input format classes.
* @param setCombiner Option to set combiner class.
* @param setReducer Option to set reducer and output format classes.
*/
public static void setTasksClasses(Job job, boolean setMapper, boolean setCombiner, boolean setReducer,
boolean outputCompression) {
if (setMapper) {
job.setMapperClass(HadoopWordCount2Mapper.class);
job.setInputFormatClass(TextInputFormat.class);
}
if (setCombiner)
job.setCombinerClass(HadoopWordCount2Combiner.class);
if (setReducer) {
job.setReducerClass(HadoopWordCount2Reducer.class);
job.setOutputFormatClass(TextOutputFormat.class);
}
if (outputCompression) {
job.setOutputFormatClass(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
SequenceFileOutputFormat.setCompressOutput(job, true);
job.getConfiguration().set(FileOutputFormat.COMPRESS_CODEC, SnappyCodec.class.getName());
}
}
示例3: run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.out.printf("Usage: CreateSequenceFile <input dir> <output dir>\n");
return -1;
}
Job job = new Job(getConf());
job.setJarByClass(CreateSequenceFile.class);
job.setJobName("Create Sequence File");
job.setNumReduceTasks(0);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
SequenceFileOutputFormat.setOutputPath(job, new Path(args[1]));
FileOutputFormat.setCompressOutput(job,true);
FileOutputFormat.setOutputCompressorClass(job,SnappyCodec.class);
SequenceFileOutputFormat.setOutputCompressionType(job,
CompressionType.BLOCK);
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
示例4: createJob
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
public static Job createJob(Path[] inputPaths, Path outputPath, Map<String, String> metadata, Configuration config)
throws IOException
{
final Job job = new Job(config);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(BytesWritable.class);
job.setOutputFormatClass(MetadataSequenceFileOutputFormat.class);
SequenceFileInputFormat.setInputPaths(job, inputPaths);
SequenceFileOutputFormat.setOutputPath(job, outputPath);
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
if (metadata != null)
MetadataConfiguration.setMetadata(metadata, job.getConfiguration());
return job;
}
示例5: runPartitionerJob
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
private int runPartitionerJob() throws Exception
{
Job partitionerJob = new Job(getConf(), "Partition Wikipedia");
Configuration partitionerConf = partitionerJob.getConfiguration();
partitionerConf.set("mapred.map.tasks.speculative.execution", "false");
configurePartitionerJob(partitionerJob);
List<Path> inputPaths = new ArrayList<Path>();
SortedSet<String> languages = new TreeSet<String>();
FileSystem fs = FileSystem.get(partitionerConf);
Path parent = new Path(partitionerConf.get("wikipedia.input"));
listFiles(parent, fs, inputPaths, languages);
System.out.println("Input files in " + parent + ":" + inputPaths.size());
Path[] inputPathsArray = new Path[inputPaths.size()];
inputPaths.toArray(inputPathsArray);
System.out.println("Languages:" + languages.size());
// setup input format
WikipediaInputFormat.setInputPaths(partitionerJob, inputPathsArray);
partitionerJob.setMapperClass(WikipediaPartitioner.class);
partitionerJob.setNumReduceTasks(0);
// setup output format
partitionerJob.setMapOutputKeyClass(Text.class);
partitionerJob.setMapOutputValueClass(Article.class);
partitionerJob.setOutputKeyClass(Text.class);
partitionerJob.setOutputValueClass(Article.class);
partitionerJob.setOutputFormatClass(SequenceFileOutputFormat.class);
Path outputDir = WikipediaConfiguration.getPartitionedArticlesPath(partitionerConf);
SequenceFileOutputFormat.setOutputPath(partitionerJob, outputDir);
SequenceFileOutputFormat.setCompressOutput(partitionerJob, true);
SequenceFileOutputFormat.setOutputCompressionType(partitionerJob, CompressionType.RECORD);
return partitionerJob.waitForCompletion(true) ? 0 : 1;
}
示例6: run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
/**
* Write the sequence file.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));
Configuration conf = super.getConf();
Job job = new Job(conf);
job.setJarByClass(SequenceFileProtobufMapReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Stock.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setMapperClass(PbMapper.class);
job.setReducerClass(PbReducer.class);
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
ProtobufSerialization.register(job.getConfiguration());
FileInputFormat.setInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
if (job.waitForCompletion(true)) {
return 0;
}
return 1;
}
示例7: run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
/**
* Write the sequence file.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));
Configuration conf = super.getConf();
Job job = new Job(conf);
job.setJarByClass(SequenceFileStockMapReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(StockPriceWritable.class);
job.setInputFormatClass(
SequenceFileInputFormat.class); //<co id="ch03_comment_seqfile_mr1"/>
job.setOutputFormatClass(SequenceFileOutputFormat.class); //<co id="ch03_comment_seqfile_mr2"/>
SequenceFileOutputFormat.setCompressOutput(job, true); //<co id="ch03_comment_seqfile_mr3"/>
SequenceFileOutputFormat.setOutputCompressionType(job, //<co id="ch03_comment_seqfile_mr4"/>
SequenceFile.CompressionType.BLOCK);
SequenceFileOutputFormat.setOutputCompressorClass(job, //<co id="ch03_comment_seqfile_mr5"/>
DefaultCodec.class);
FileInputFormat.setInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
if (job.waitForCompletion(true)) {
return 0;
}
return 1;
}
示例8: setupOutput
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
protected void setupOutput(final Job job, final SampleDataForSplitPoints operation, final Store store) throws IOException {
job.setOutputFormatClass(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputPath(job, new Path(operation.getOutputPath()));
if (null != operation.getCompressionCodec()) {
if (GzipCodec.class.isAssignableFrom(operation.getCompressionCodec()) && !NativeCodeLoader.isNativeCodeLoaded() && !ZlibFactory.isNativeZlibLoaded(job.getConfiguration())) {
LOGGER.warn("SequenceFile doesn't work with GzipCodec without native-hadoop code!");
} else {
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, operation.getCompressionCodec());
SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
}
}
}
示例9: run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
final HadoopDownloaderOptions options = new HadoopDownloaderOptions(args);
options.prepare(true);
final Job job = new Job(getConf());
job.setJarByClass(HadoopDownloader.class);
job.setJobName("Hadoop Downloader Utility");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(BytesWritable.class);
if (options.getNumberOfThreads() <= 1) {
job.setMapperClass(DownloadMapper.class);
} else {
job.setMapperClass(MultithreadedMapper.class);
MultithreadedMapper.setMapperClass(job, DownloadMapper.class);
MultithreadedMapper.setNumberOfThreads(job, options.getNumberOfThreads());
}
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setNumReduceTasks(options.getNumberOfReducers());
job.getConfiguration().setStrings(ARGS_KEY, args);
FileInputFormat.setInputPaths(job, options.getInputPaths());
SequenceFileOutputFormat.setOutputPath(job, options.getOutputPath());
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
job.waitForCompletion(true);
return 0;
}
示例10: runDictionaryJobSampling
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
protected boolean runDictionaryJobSampling() throws IOException, ClassNotFoundException, InterruptedException {
boolean jobOK;
Job job = null;
// if input path does not exists, fail
if (!this.inputFS.exists(this.conf.getInputPath())) {
System.out.println("Dictionary input path does not exist: " + this.conf.getInputPath());
System.exit(-1);
}
// if samples path exists...
if (this.dictionaryFS.exists(this.conf.getDictionarySamplesPath())) {
if (this.conf.getDeleteDictionarySamplesPath()) { // ... and option provided, delete recursively
this.dictionaryFS.delete(this.conf.getDictionarySamplesPath(), true);
} else { // ... and option not provided, fail
System.out.println("Dictionary samples path does exist: " + this.conf.getDictionarySamplesPath());
System.out.println("Select other path or use option -ds to overwrite");
System.exit(-1);
}
}
// Job to create a SequenceInputFormat with Roles
job = new Job(this.conf.getConfigurationObject(), this.conf.getDictionaryJobName() + " phase 1");
job.setJarByClass(HDTBuilderDriver.class);
System.out.println("input = " + this.conf.getInputPath());
System.out.println("samples = " + this.conf.getDictionarySamplesPath());
FileInputFormat.addInputPath(job, this.conf.getInputPath());
FileOutputFormat.setOutputPath(job, this.conf.getDictionarySamplesPath());
job.setInputFormatClass(LzoTextInputFormat.class);
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
job.setMapperClass(DictionarySamplerMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setCombinerClass(DictionarySamplerReducer.class);
job.setReducerClass(DictionarySamplerReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(this.conf.getDictionarySampleReducers());
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
jobOK = job.waitForCompletion(true);
return jobOK;
}
示例11: runDictionaryJob
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
protected boolean runDictionaryJob() throws ClassNotFoundException, IOException, InterruptedException, URISyntaxException {
boolean jobOK;
Job job = null;
BufferedWriter bufferedWriter;
// if output path exists...
if (this.dictionaryFS.exists(this.conf.getDictionaryOutputPath())) {
if (this.conf.getDeleteDictionaryOutputPath()) { // ... and option provided, delete recursively
this.dictionaryFS.delete(this.conf.getDictionaryOutputPath(), true);
} else { // ... and option not provided, fail
System.out.println("Dictionary output path does exist: " + this.conf.getDictionaryOutputPath());
System.out.println("Select other path or use option -dd to overwrite");
System.exit(-1);
}
}
// Sample the SequenceInputFormat to do TotalSort and create final output
job = new Job(this.conf.getConfigurationObject(), this.conf.getDictionaryJobName() + " phase 2");
job.setJarByClass(HDTBuilderDriver.class);
System.out.println("samples = " + this.conf.getDictionarySamplesPath());
System.out.println("output = " + this.conf.getDictionaryOutputPath());
FileInputFormat.addInputPath(job, this.conf.getDictionarySamplesPath());
FileOutputFormat.setOutputPath(job, this.conf.getDictionaryOutputPath());
job.setInputFormatClass(SequenceFileInputFormat.class);
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
// Identity Mapper
// job.setMapperClass(Mapper.class);
job.setCombinerClass(DictionaryCombiner.class);
job.setPartitionerClass(TotalOrderPartitioner.class);
job.setReducerClass(DictionaryReducer.class);
job.setNumReduceTasks(this.conf.getDictionaryReducers());
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
System.out.println("Sampling started");
InputSampler.writePartitionFile(job, new InputSampler.IntervalSampler<Text, Text>(this.conf.getSampleProbability()));
String partitionFile = TotalOrderPartitioner.getPartitionFile(job.getConfiguration());
URI partitionUri = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH);
DistributedCache.addCacheFile(partitionUri, job.getConfiguration());
DistributedCache.createSymlink(job.getConfiguration());
System.out.println("Sampling finished");
MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SHARED, SequenceFileOutputFormat.class, Text.class, NullWritable.class);
MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SUBJECTS, SequenceFileOutputFormat.class, Text.class, NullWritable.class);
MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.PREDICATES, SequenceFileOutputFormat.class, Text.class, NullWritable.class);
MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.OBJECTS, SequenceFileOutputFormat.class, Text.class, NullWritable.class);
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
jobOK = job.waitForCompletion(true);
this.numShared = job.getCounters().findCounter(Counters.Shared).getValue();
this.numSubjects = job.getCounters().findCounter(Counters.Subjects).getValue();
this.numPredicates = job.getCounters().findCounter(Counters.Predicates).getValue();
this.numObjects = job.getCounters().findCounter(Counters.Objects).getValue();
bufferedWriter = new BufferedWriter(new OutputStreamWriter(this.dictionaryFS.create(this.conf.getDictionaryCountersFile())));
bufferedWriter.write(HDTBuilderConfiguration.SHARED + "=" + this.numShared + "\n");
bufferedWriter.write(HDTBuilderConfiguration.SUBJECTS + "=" + this.numSubjects + "\n");
bufferedWriter.write(HDTBuilderConfiguration.PREDICATES + "=" + this.numPredicates + "\n");
bufferedWriter.write(HDTBuilderConfiguration.OBJECTS + "=" + this.numObjects + "\n");
bufferedWriter.close();
return jobOK;
}
示例12: runTriplesJobSampling
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
protected boolean runTriplesJobSampling() throws ClassNotFoundException, IOException, InterruptedException {
Job job = null;
boolean jobOK;
BufferedWriter bufferedWriter;
// if input path does not exists, fail
if (!this.inputFS.exists(this.conf.getInputPath())) {
System.out.println("Dictionary input path does not exist: " + this.conf.getInputPath());
System.exit(-1);
}
// if dictionary output path does not exists, fail
if (!this.dictionaryFS.exists(this.conf.getInputPath())) {
System.out.println("Dictionary output path does not exist: " + this.conf.getInputPath());
System.exit(-1);
}
// if samples path exists, fail
if (this.dictionaryFS.exists(this.conf.getTriplesSamplesPath())) {
if (this.conf.getDeleteTriplesSamplesPath()) { // ... and option
// provided, delete
// recursively
this.dictionaryFS.delete(this.conf.getTriplesSamplesPath(), true);
} else { // ... and option not provided, fail
System.out.println("Triples samples path does exist: " + this.conf.getTriplesSamplesPath());
System.out.println("Select other path or use option -dst to overwrite");
System.exit(-1);
}
}
this.conf.setProperty("mapred.child.java.opts", "-XX:ErrorFile=/home/hadoop/tmp/hs_err_pid%p.log -Xmx2500m");
// Job to create a SequenceInputFormat
job = new Job(this.conf.getConfigurationObject(), this.conf.getTriplesJobName() + " phase 1");
job.setJarByClass(HDTBuilderDriver.class);
FileInputFormat.addInputPath(job, this.conf.getInputPath());
FileOutputFormat.setOutputPath(job, this.conf.getTriplesSamplesPath());
job.setInputFormatClass(LzoTextInputFormat.class);
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
job.setMapperClass(TriplesSPOMapper.class);
job.setSortComparatorClass(TripleSPOComparator.class);
job.setGroupingComparatorClass(TripleSPOComparator.class);
job.setMapOutputKeyClass(TripleSPOWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(TripleSPOWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setNumReduceTasks(this.conf.getTriplesReducers());
DistributedCache.addCacheFile(this.conf.getDictionaryFile().toUri(), job.getConfiguration());
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
jobOK = job.waitForCompletion(true);
this.numTriples = job.getCounters().findCounter(Counters.Triples).getValue();
bufferedWriter = new BufferedWriter(new OutputStreamWriter(this.triplesFS.create(this.conf.getTriplesCountersFile())));
bufferedWriter.write(this.numTriples.toString() + "\n");
bufferedWriter.close();
return jobOK;
}
示例13: runTriplesJob
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
protected boolean runTriplesJob() throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
Job job = null;
boolean jobOK;
// if triples output path exists...
if (this.triplesFS.exists(this.conf.getTriplesOutputPath())) {
if (this.conf.getDeleteTriplesOutputPath()) { // ... and option provided, delete recursively
this.triplesFS.delete(this.conf.getTriplesOutputPath(), true);
} else { // ... and option not provided, fail
System.out.println("Triples output path does exist: " + this.conf.getTriplesOutputPath());
System.out.println("Select other path or use option -dt to overwrite");
System.exit(-1);
}
}
job = new Job(this.conf.getConfigurationObject(), this.conf.getTriplesJobName() + " phase 2");
job.setJarByClass(HDTBuilderDriver.class);
FileInputFormat.addInputPath(job, this.conf.getTriplesSamplesPath());
FileOutputFormat.setOutputPath(job, this.conf.getTriplesOutputPath());
job.setInputFormatClass(SequenceFileInputFormat.class);
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
job.setSortComparatorClass(TripleSPOComparator.class);
job.setGroupingComparatorClass(TripleSPOComparator.class);
job.setPartitionerClass(TotalOrderPartitioner.class);
job.setOutputKeyClass(TripleSPOWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setNumReduceTasks(this.conf.getTriplesReducers());
System.out.println("Sampling started");
InputSampler.writePartitionFile(job, new InputSampler.IntervalSampler<Text, Text>(this.conf.getSampleProbability()));
String partitionFile = TotalOrderPartitioner.getPartitionFile(job.getConfiguration());
URI partitionUri = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH);
DistributedCache.addCacheFile(partitionUri, job.getConfiguration());
DistributedCache.createSymlink(job.getConfiguration());
System.out.println("Sampling finished");
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
jobOK = job.waitForCompletion(true);
return jobOK;
}