本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat.setCompressOutput方法的典型用法代码示例。如果您正苦于以下问题:Java SequenceFileOutputFormat.setCompressOutput方法的具体用法?Java SequenceFileOutputFormat.setCompressOutput怎么用?Java SequenceFileOutputFormat.setCompressOutput使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
的用法示例。
在下文中一共展示了SequenceFileOutputFormat.setCompressOutput方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: setTasksClasses
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
/**
* Sets task classes with related info if needed into configuration object.
*
* @param job Configuration to change.
* @param setMapper Option to set mapper and input format classes.
* @param setCombiner Option to set combiner class.
* @param setReducer Option to set reducer and output format classes.
*/
public static void setTasksClasses(Job job, boolean setMapper, boolean setCombiner, boolean setReducer,
boolean outputCompression) {
if (setMapper) {
job.setMapperClass(HadoopWordCount2Mapper.class);
job.setInputFormatClass(TextInputFormat.class);
}
if (setCombiner)
job.setCombinerClass(HadoopWordCount2Combiner.class);
if (setReducer) {
job.setReducerClass(HadoopWordCount2Reducer.class);
job.setOutputFormatClass(TextOutputFormat.class);
}
if (outputCompression) {
job.setOutputFormatClass(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
SequenceFileOutputFormat.setCompressOutput(job, true);
job.getConfiguration().set(FileOutputFormat.COMPRESS_CODEC, SnappyCodec.class.getName());
}
}
示例2: run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
// String clusterFileString = options.getInputString();
Path[] paths = new Path[]{new Path(args[0])};
TextBytesSequenceFileUtility util = new TextBytesSequenceFileUtility(paths[0].toUri() , true);
HadoopToolsUtil.removeFile(args[1]);
Job job = TextBytesJobUtil.createJob(paths, new Path(args[1]), new HashMap<String,String>(), this.getConf());
job.setJarByClass(this.getClass());
job.setMapperClass(CounterMapper.class);
job.setReducerClass(CounterReducer.class);
SequenceFileOutputFormat.setCompressOutput(job, false);
long start,end;
start = System.currentTimeMillis();
job.waitForCompletion(true);
end = System.currentTimeMillis();
System.out.println("Took: " + (end - start) + "ms");
return 0;
}
示例3: run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
HadoopEXIFOptions options = new HadoopEXIFOptions(args,true);
options.prepare();
// String clusterFileString = options.getInputString();
Path[] paths = options.getInputPaths();
TextBytesSequenceFileUtility util = new TextBytesSequenceFileUtility(paths[0].toUri() , true);
Map<String,String> metadata = new HashMap<String,String>();
if (util.getUUID() != null) metadata.put(MetadataConfiguration.UUID_KEY, util.getUUID());
metadata.put(MetadataConfiguration.CONTENT_TYPE_KEY, "application/imageexif");
Job job = TextBytesJobUtil.createJob(paths, options.getOutputPath(), metadata,this.getConf());
// job.setOutputValueClass(Text.class);
job.setJarByClass(this.getClass());
job.setMapperClass(HadoopEXIF.HadoopEXIFMapper.class);
job.getConfiguration().setStrings(ARGS_KEY, args);
job.setNumReduceTasks(0);
SequenceFileOutputFormat.setCompressOutput(job, false);
long start,end;
start = System.currentTimeMillis();
job.waitForCompletion(true);
end = System.currentTimeMillis();
System.out.println("Took: " + (end - start) + "ms");
return 0;
}
示例4: createJob
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
public static Job createJob(Path[] inputPaths, Path outputPath, Map<String, String> metadata, Configuration config)
throws IOException
{
final Job job = new Job(config);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(BytesWritable.class);
job.setOutputFormatClass(MetadataSequenceFileOutputFormat.class);
SequenceFileInputFormat.setInputPaths(job, inputPaths);
SequenceFileOutputFormat.setOutputPath(job, outputPath);
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
if (metadata != null)
MetadataConfiguration.setMetadata(metadata, job.getConfiguration());
return job;
}
示例5: runPartitionerJob
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
private int runPartitionerJob() throws Exception
{
Job partitionerJob = new Job(getConf(), "Partition Wikipedia");
Configuration partitionerConf = partitionerJob.getConfiguration();
partitionerConf.set("mapred.map.tasks.speculative.execution", "false");
configurePartitionerJob(partitionerJob);
List<Path> inputPaths = new ArrayList<Path>();
SortedSet<String> languages = new TreeSet<String>();
FileSystem fs = FileSystem.get(partitionerConf);
Path parent = new Path(partitionerConf.get("wikipedia.input"));
listFiles(parent, fs, inputPaths, languages);
System.out.println("Input files in " + parent + ":" + inputPaths.size());
Path[] inputPathsArray = new Path[inputPaths.size()];
inputPaths.toArray(inputPathsArray);
System.out.println("Languages:" + languages.size());
// setup input format
WikipediaInputFormat.setInputPaths(partitionerJob, inputPathsArray);
partitionerJob.setMapperClass(WikipediaPartitioner.class);
partitionerJob.setNumReduceTasks(0);
// setup output format
partitionerJob.setMapOutputKeyClass(Text.class);
partitionerJob.setMapOutputValueClass(Article.class);
partitionerJob.setOutputKeyClass(Text.class);
partitionerJob.setOutputValueClass(Article.class);
partitionerJob.setOutputFormatClass(SequenceFileOutputFormat.class);
Path outputDir = WikipediaConfiguration.getPartitionedArticlesPath(partitionerConf);
SequenceFileOutputFormat.setOutputPath(partitionerJob, outputDir);
SequenceFileOutputFormat.setCompressOutput(partitionerJob, true);
SequenceFileOutputFormat.setOutputCompressionType(partitionerJob, CompressionType.RECORD);
return partitionerJob.waitForCompletion(true) ? 0 : 1;
}
示例6: run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
/**
* Write the sequence file.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));
Configuration conf = super.getConf();
Job job = new Job(conf);
job.setJarByClass(SequenceFileProtobufMapReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Stock.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setMapperClass(PbMapper.class);
job.setReducerClass(PbReducer.class);
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
ProtobufSerialization.register(job.getConfiguration());
FileInputFormat.setInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
if (job.waitForCompletion(true)) {
return 0;
}
return 1;
}
示例7: run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
/**
* Write the sequence file.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));
Configuration conf = super.getConf();
Job job = new Job(conf);
job.setJarByClass(SequenceFileStockMapReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(StockPriceWritable.class);
job.setInputFormatClass(
SequenceFileInputFormat.class); //<co id="ch03_comment_seqfile_mr1"/>
job.setOutputFormatClass(SequenceFileOutputFormat.class); //<co id="ch03_comment_seqfile_mr2"/>
SequenceFileOutputFormat.setCompressOutput(job, true); //<co id="ch03_comment_seqfile_mr3"/>
SequenceFileOutputFormat.setOutputCompressionType(job, //<co id="ch03_comment_seqfile_mr4"/>
SequenceFile.CompressionType.BLOCK);
SequenceFileOutputFormat.setOutputCompressorClass(job, //<co id="ch03_comment_seqfile_mr5"/>
DefaultCodec.class);
FileInputFormat.setInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
if (job.waitForCompletion(true)) {
return 0;
}
return 1;
}
示例8: setupOutput
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
protected void setupOutput(final Job job, final SampleDataForSplitPoints operation, final Store store) throws IOException {
job.setOutputFormatClass(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputPath(job, new Path(operation.getOutputPath()));
if (null != operation.getCompressionCodec()) {
if (GzipCodec.class.isAssignableFrom(operation.getCompressionCodec()) && !NativeCodeLoader.isNativeCodeLoaded() && !ZlibFactory.isNativeZlibLoaded(job.getConfiguration())) {
LOGGER.warn("SequenceFile doesn't work with GzipCodec without native-hadoop code!");
} else {
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, operation.getCompressionCodec());
SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
}
}
}
示例9: run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
final HadoopDownloaderOptions options = new HadoopDownloaderOptions(args);
options.prepare(true);
final Job job = new Job(getConf());
job.setJarByClass(HadoopDownloader.class);
job.setJobName("Hadoop Downloader Utility");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(BytesWritable.class);
if (options.getNumberOfThreads() <= 1) {
job.setMapperClass(DownloadMapper.class);
} else {
job.setMapperClass(MultithreadedMapper.class);
MultithreadedMapper.setMapperClass(job, DownloadMapper.class);
MultithreadedMapper.setNumberOfThreads(job, options.getNumberOfThreads());
}
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setNumReduceTasks(options.getNumberOfReducers());
job.getConfiguration().setStrings(ARGS_KEY, args);
FileInputFormat.setInputPaths(job, options.getInputPaths());
SequenceFileOutputFormat.setOutputPath(job, options.getOutputPath());
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
job.waitForCompletion(true);
return 0;
}
示例10: run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
final CmdLineParser parser = new CmdLineParser(this);
try {
parser.parseArgument(args);
} catch (final CmdLineException e) {
System.err.println(e.getMessage());
System.err.println("Usage: hadoop jar HadoopImageIndexer.jar [options]");
parser.printUsage(System.err);
return -1;
}
final Path[] paths = SequenceFileUtility.getFilePaths(input, "part");
final Path outputPath = new Path(output);
if (outputPath.getFileSystem(this.getConf()).exists(outputPath) && replace)
outputPath.getFileSystem(this.getConf()).delete(outputPath, true);
final Job job = TextBytesJobUtil.createJob(paths, outputPath, null, this.getConf());
job.setMapOutputKeyClass(MapperOut.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
job.setJarByClass(this.getClass());
job.setMapperClass(LSHMapper.class);
job.setReducerClass(LSHReducer.class);
job.setNumReduceTasks(4);
job.setPartitionerClass(LSHPartitioner.class);
job.setGroupingComparatorClass(LSHGroupingComparator.class);
SequenceFileOutputFormat.setCompressOutput(job, !dontcompress);
job.waitForCompletion(true);
return 0;
}
示例11: run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
final CmdLineParser parser = new CmdLineParser(this);
try {
parser.parseArgument(args);
} catch (final CmdLineException e) {
System.err.println(e.getMessage());
System.err.println("Usage: hadoop jar HadoopImageIndexer.jar [options]");
parser.printUsage(System.err);
return -1;
}
final Path[] paths = SequenceFileUtility.getFilePaths(input, "part");
final Path outputPath = new Path(output);
if (outputPath.getFileSystem(this.getConf()).exists(outputPath) && replace)
outputPath.getFileSystem(this.getConf()).delete(outputPath, true);
final Job job = TextBytesJobUtil.createJob(paths, outputPath, null, this.getConf());
job.setJarByClass(this.getClass());
job.setMapperClass(PqPcaVladMapper.class);
job.setNumReduceTasks(0);
MultipleOutputs.addNamedOutput(job, "pcavlad", SequenceFileOutputFormat.class, Text.class, BytesWritable.class);
DistributedCache.createSymlink(job.getConfiguration());
DistributedCache.addCacheFile(new URI(indexerData + "#vlad-data.bin"), job.getConfiguration());
SequenceFileOutputFormat.setCompressOutput(job, !dontcompress);
job.waitForCompletion(true);
return 0;
}
示例12: run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
final CmdLineParser parser = new CmdLineParser(this);
try {
parser.parseArgument(args);
} catch (final CmdLineException e) {
System.err.println(e.getMessage());
System.err.println("Usage: hadoop jar HadoopImageIndexer.jar [options]");
parser.printUsage(System.err);
return -1;
}
final Path[] paths = SequenceFileUtility.getFilePaths(input, "part");
final Path outputPath = new Path(output);
if (outputPath.getFileSystem(this.getConf()).exists(outputPath) && replace)
outputPath.getFileSystem(this.getConf()).delete(outputPath, true);
final Job job = TextBytesJobUtil.createJob(paths, outputPath, null, this.getConf());
job.setJarByClass(this.getClass());
job.setMapperClass(PcaVladMapper.class);
job.setNumReduceTasks(0);
DistributedCache.addFileToClassPath(new Path(indexerData), job.getConfiguration());
job.getConfiguration().set(VLAD_INDEXER_DATA_PATH_KEY, new Path(indexerData).getName());
SequenceFileOutputFormat.setCompressOutput(job, !dontcompress);
job.waitForCompletion(true);
return 0;
}
示例13: run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
// Create the output path
Path outpath = new Path(SequenceFileUtility.convertToURI(this.outputFilePath).toString());
System.out.println("It is all going to: " + outpath);
Path[] sequenceFiles = SequenceFileUtility.getFilePaths(inputFilePath, "part");
Job job = new Job(this.getConf(), "featureselect");
job.setNumReduceTasks(1);
job.setJarByClass(SequenceFileByteImageFeatureSelector.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(BytesWritable.class);
job.setMapperClass(FeatureSelect.Map.class);
job.setReducerClass(FeatureSelect.Reduce.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.getConfiguration().setStrings(FeatureSelect.FILETYPE_KEY, new String[]{options.fileType});
job.getConfiguration().setStrings(FeatureSelect.NFEATURE_KEY, new String[]{"" + this.nRandomRows});
((JobConf)job.getConfiguration()).setNumTasksToExecutePerJvm(-1);
SequenceFileInputFormat.setInputPaths(job, sequenceFiles);
SequenceFileOutputFormat.setOutputPath(job, outpath);
SequenceFileOutputFormat.setCompressOutput(job, false);
job.waitForCompletion(true);
return 0;
}
示例14: getRandomRows
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
public Path getRandomRows(int k) throws IOException, InterruptedException, ClassNotFoundException {
// Create the output path
File tmpFile = File.createTempFile("feature",".select");
tmpFile.delete();
Path outpath = new Path(SequenceFileUtility.convertToURI(tmpFile.getAbsolutePath()).toString());
System.out.println("It is all going to: " + outpath);
Path[] sequenceFiles = SequenceFileUtility.getFilePaths(sequenceFilePath, "part");
Configuration conf = new Configuration();
Job job = new Job(conf, "featureselect");
job.setNumReduceTasks(1);
job.setJarByClass(SequenceFileByteDataSelector.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(BytesWritable.class);
job.setMapperClass(FeatureSelect.Map.class);
// job.setCombinerClass(FeatureSelect.Reduce.class);
job.setReducerClass(FeatureSelect.Reduce.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
// job.setOutputFormatClass(TextOutputFormat.class);
job.getConfiguration().setStrings(FeatureSelect.FILETYPE_KEY, new String[]{fileType});
job.getConfiguration().setStrings(FeatureSelect.NFEATURE_KEY, new String[]{"" + k});
SequenceFileInputFormat.setInputPaths(job, sequenceFiles);
SequenceFileOutputFormat.setOutputPath(job, outpath);
SequenceFileOutputFormat.setCompressOutput(job, false);
// FileOutputFormat.setOutputPath(job, outpath);
job.waitForCompletion(true);
return outpath;
}
示例15: run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
final HadoopLocalFeaturesToolOptions options = new HadoopLocalFeaturesToolOptions(args, true);
options.prepare();
final Path[] paths = options.getInputPaths();
final TextBytesSequenceFileUtility util = new TextBytesSequenceFileUtility(paths[0].toUri(), true);
final Map<String, String> metadata = new HashMap<String, String>();
if (util.getUUID() != null)
metadata.put(MetadataConfiguration.UUID_KEY, util.getUUID());
metadata.put(MetadataConfiguration.CONTENT_TYPE_KEY, "application/localfeatures-" + options.getMode().name()
+ "-" + (options.isAsciiMode() ? "ascii" : "bin"));
final Job job = TextBytesJobUtil.createJob(paths, options.getOutputPath(), metadata, this.getConf());
job.setJarByClass(this.getClass());
options.mapperModeOp.prepareJobMapper(job, LocalFeaturesMapper.class);
job.getConfiguration().setStrings(ARGS_KEY, args);
job.setNumReduceTasks(0);
SequenceFileOutputFormat.setCompressOutput(job, !options.dontcompress);
long start, end;
start = System.currentTimeMillis();
job.waitForCompletion(true);
end = System.currentTimeMillis();
System.out.println("Took: " + (end - start) + "ms");
options.serialiseExtractor();
return 0;
}