本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat.setOutputPath方法的典型用法代码示例。如果您正苦于以下问题:Java SequenceFileOutputFormat.setOutputPath方法的具体用法?Java SequenceFileOutputFormat.setOutputPath怎么用?Java SequenceFileOutputFormat.setOutputPath使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
的用法示例。
在下文中一共展示了SequenceFileOutputFormat.setOutputPath方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(final String[] args) throws Exception {
final Configuration conf = getConf();
final Job job = new Job(conf);
job.setJarByClass(getClass());
job.setInputFormatClass(ElementInputFormat.class);
job.setMapperClass(AMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputPath(job, new Path(outputDir));
job.setNumReduceTasks(0);
job.waitForCompletion(true);
return job.isSuccessful() ? 0 : 1;
}
示例2: run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.out.printf("Usage: CreateSequenceFile <input dir> <output dir>\n");
return -1;
}
Job job = new Job(getConf());
job.setJarByClass(CreateSequenceFile.class);
job.setJobName("Create Sequence File");
job.setNumReduceTasks(0);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
SequenceFileOutputFormat.setOutputPath(job, new Path(args[1]));
FileOutputFormat.setCompressOutput(job,true);
FileOutputFormat.setOutputCompressorClass(job,SnappyCodec.class);
SequenceFileOutputFormat.setOutputCompressionType(job,
CompressionType.BLOCK);
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
示例3: createJob
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
public static Job createJob(Path[] inputPaths, Path outputPath, Map<String, String> metadata, Configuration config)
throws IOException
{
final Job job = new Job(config);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(BytesWritable.class);
job.setOutputFormatClass(MetadataSequenceFileOutputFormat.class);
SequenceFileInputFormat.setInputPaths(job, inputPaths);
SequenceFileOutputFormat.setOutputPath(job, outputPath);
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
if (metadata != null)
MetadataConfiguration.setMetadata(metadata, job.getConfiguration());
return job;
}
示例4: total
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
public static void total(String name, String in, String out)
throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
conf.set(QUERIED_NAME, name);
Job job = Job.getInstance(new Cluster(conf), conf);
job.setJarByClass(Total.class);
// in
if (!in.endsWith("/"))
in = in.concat("/");
in = in.concat("employees");
SequenceFileInputFormat.addInputPath(job, new Path(in));
job.setInputFormatClass(SequenceFileInputFormat.class);
// map
job.setMapperClass(TotalMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DoubleWritable.class);
// reduce
job.setCombinerClass(TotalReducer.class);
job.setReducerClass(TotalReducer.class);
// out
SequenceFileOutputFormat.setOutputPath(job, new Path(out));
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
job.waitForCompletion(true);
}
示例5: createJob
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
public static Job createJob(String name, String base) throws IOException {
Configuration conf = new Configuration();
conf.set(Total.QUERIED_NAME, name);
Job job = Job.getInstance(new Cluster(conf), conf);
job.setJarByClass(Cut.class);
// in
String in = base;
if (!base.endsWith("/"))
in = in.concat("/");
in = in.concat("employees");
SequenceFileInputFormat.addInputPath(job, new Path(in));
job.setInputFormatClass(SequenceFileInputFormat.class);
// map
job.setMapperClass(CutMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Employee.class);
// out
SequenceFileOutputFormat.setOutputPath(job, new Path(base + "/tmp"));
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Employee.class);
return job;
}
示例6: runPartitionerJob
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
private int runPartitionerJob() throws Exception
{
Job partitionerJob = new Job(getConf(), "Partition Wikipedia");
Configuration partitionerConf = partitionerJob.getConfiguration();
partitionerConf.set("mapred.map.tasks.speculative.execution", "false");
configurePartitionerJob(partitionerJob);
List<Path> inputPaths = new ArrayList<Path>();
SortedSet<String> languages = new TreeSet<String>();
FileSystem fs = FileSystem.get(partitionerConf);
Path parent = new Path(partitionerConf.get("wikipedia.input"));
listFiles(parent, fs, inputPaths, languages);
System.out.println("Input files in " + parent + ":" + inputPaths.size());
Path[] inputPathsArray = new Path[inputPaths.size()];
inputPaths.toArray(inputPathsArray);
System.out.println("Languages:" + languages.size());
// setup input format
WikipediaInputFormat.setInputPaths(partitionerJob, inputPathsArray);
partitionerJob.setMapperClass(WikipediaPartitioner.class);
partitionerJob.setNumReduceTasks(0);
// setup output format
partitionerJob.setMapOutputKeyClass(Text.class);
partitionerJob.setMapOutputValueClass(Article.class);
partitionerJob.setOutputKeyClass(Text.class);
partitionerJob.setOutputValueClass(Article.class);
partitionerJob.setOutputFormatClass(SequenceFileOutputFormat.class);
Path outputDir = WikipediaConfiguration.getPartitionedArticlesPath(partitionerConf);
SequenceFileOutputFormat.setOutputPath(partitionerJob, outputDir);
SequenceFileOutputFormat.setCompressOutput(partitionerJob, true);
SequenceFileOutputFormat.setOutputCompressionType(partitionerJob, CompressionType.RECORD);
return partitionerJob.waitForCompletion(true) ? 0 : 1;
}
示例7: startJob
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
/**
* creates and submits a job, updates file index and job index
*/
private Job startJob(String jobName, Set<String> lostFiles, Priority priority,
long detectTime)
throws IOException, InterruptedException, ClassNotFoundException {
Path inDir = new Path(JOB_NAME_PREFIX + "/in/" + jobName);
Path outDir = new Path(JOB_NAME_PREFIX + "/out/" + jobName);
List<String> filesInJob = createInputFile(
jobName, inDir, lostFiles);
if (filesInJob.isEmpty()) return null;
Configuration jobConf = new Configuration(getConf());
DistBlockIntegrityMonitor.updateBlockFixerMapreduceConfigs(jobConf, BLOCKFIXER);
RaidUtils.parseAndSetOptions(jobConf, priority.configOption);
Job job = new Job(jobConf, jobName);
job.getConfiguration().set(CORRUPT_FILE_DETECT_TIME, Long.toString(detectTime));
configureJob(job, this.RECONSTRUCTOR_CLASS);
job.setJarByClass(getClass());
job.setMapperClass(ReconstructionMapper.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(ReconstructionInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
ReconstructionInputFormat.setInputPaths(job, inDir);
SequenceFileOutputFormat.setOutputPath(job, outDir);
submitJob(job, filesInJob, priority);
List<LostFileInfo> fileInfos =
updateFileIndex(jobName, filesInJob, priority);
// The implementation of submitJob() need not update jobIndex.
// So check if the job exists in jobIndex before updating jobInfos.
if (jobIndex.containsKey(job)) {
jobIndex.put(job, fileInfos);
}
numJobsRunning.incrementAndGet();
return job;
}
示例8: initTabToSeqFileJob
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
public static void initTabToSeqFileJob(Job job, String intable, String outpath, String auths) throws AccumuloSecurityException {
Configuration conf = job.getConfiguration();
String username = conf.get(USERNAME);
String password = conf.get(PASSWORD);
String instance = conf.get(INSTANCE);
String zookeepers = conf.get(ZOOKEEPERS);
System.out.println("Zookeepers are " + auths);
if (zookeepers != null) {
AccumuloInputFormat.setZooKeeperInstance(job, instance, zookeepers);
} else {
throw new IllegalArgumentException("Must specify either mock or zookeepers");
}
AccumuloInputFormat.setConnectorInfo(job, username, new PasswordToken(password));
AccumuloInputFormat.setScanAuthorizations(job, new Authorizations(auths));
AccumuloInputFormat.setInputTableName(job, intable);
job.setInputFormatClass(AccumuloInputFormat.class);
job.setMapOutputKeyClass(CompositeType.class);
job.setMapOutputValueClass(TripleCard.class);
// OUTPUT
SequenceFileOutputFormat.setOutputPath(job, new Path(outpath));
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(CompositeType.class);
job.setOutputValueClass(TripleCard.class);
}
示例9: initJoinMRJob
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
public static void initJoinMRJob(Job job, String prospectsPath, String spoPath, Class<? extends Mapper<CompositeType,TripleCard,?,?>> mapperClass,
String outPath, String auths) throws AccumuloSecurityException {
MultipleInputs.addInputPath(job, new Path(prospectsPath), SequenceFileInputFormat.class, mapperClass);
MultipleInputs.addInputPath(job, new Path(spoPath), SequenceFileInputFormat.class, mapperClass);
job.setMapOutputKeyClass(CompositeType.class);
job.setMapOutputValueClass(TripleCard.class);
SequenceFileOutputFormat.setOutputPath(job, new Path(outPath));
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(TripleEntry.class);
job.setOutputValueClass(CardList.class);
}
示例10: run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
String outpath = conf.get(OUTPUTPATH);
Job job = new Job(conf, this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
job.setJarByClass(this.getClass());
conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, true);
MultipleInputs.addInputPath(job, new Path(PROSPECTSOUT.getAbsolutePath()),
SequenceFileInputFormat.class, JoinSelectAggregateMapper.class);
MultipleInputs.addInputPath(job,new Path(SPOOUT.getAbsolutePath()) ,
SequenceFileInputFormat.class, JoinSelectAggregateMapper.class);
job.setMapOutputKeyClass(CompositeType.class);
job.setMapOutputValueClass(TripleCard.class);
tempDir = new File(File.createTempFile(outpath, "txt").getParentFile(), System.currentTimeMillis() + "");
SequenceFileOutputFormat.setOutputPath(job, new Path(tempDir.getAbsolutePath()));
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(TripleEntry.class);
job.setOutputValueClass(CardList.class);
job.setSortComparatorClass(JoinSelectSortComparator.class);
job.setGroupingComparatorClass(JoinSelectGroupComparator.class);
job.setPartitionerClass(JoinSelectPartitioner.class);
job.setReducerClass(JoinReducer.class);
job.setNumReduceTasks(32);
job.waitForCompletion(true);
return job.isSuccessful() ? 0 : 1;
}
示例11: initTabToSeqFileJob
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
public static void initTabToSeqFileJob(Job job, String intable, String outpath) throws AccumuloSecurityException, IOException {
Configuration conf = job.getConfiguration();
String username = conf.get(USERNAME);
System.out.println("Username is " + username);
String password = conf.get(PASSWORD);
String instance = conf.get(INSTANCE);
System.out.println("Instance is " + instance);
AccumuloInputFormat.setMockInstance(job, instance);
AccumuloInputFormat.setConnectorInfo(job, username, new PasswordToken(password));
AccumuloInputFormat.setInputTableName(job, intable);
job.setInputFormatClass(AccumuloInputFormat.class);
job.setMapOutputKeyClass(CompositeType.class);
job.setMapOutputValueClass(TripleCard.class);
System.out.println("Outpath is " + outpath);
// OUTPUT
if(outpath.equals("spo")) {
SPOOUT = new File(File.createTempFile(outpath, "txt").getParentFile(), System.currentTimeMillis() + "spo");
SequenceFileOutputFormat.setOutputPath(job, new Path(SPOOUT.getAbsolutePath()));
} else {
PROSPECTSOUT = new File(File.createTempFile(outpath, "txt").getParentFile(), System.currentTimeMillis() + "prospects");
SequenceFileOutputFormat.setOutputPath(job, new Path(PROSPECTSOUT.getAbsolutePath()));
}
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(CompositeType.class);
job.setOutputValueClass(TripleCard.class);
}
示例12: configureSchemaOutput
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
/**
* Set up the MapReduce job to output a schema (TBox).
*/
protected void configureSchemaOutput() {
Path outPath = MRReasoningUtils.getSchemaPath(job.getConfiguration());
SequenceFileOutputFormat.setOutputPath(job, outPath);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(SchemaWritable.class);
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
MultipleOutputs.addNamedOutput(job, "schemaobj",
SequenceFileOutputFormat.class, NullWritable.class, SchemaWritable.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT,
TextOutputFormat.class, Text.class, Text.class);
MultipleOutputs.setCountersEnabled(job, true);
}
示例13: configureDerivationOutput
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
/**
* Set up a MapReduce job to output newly derived triples.
* @param intermediate True if this is intermediate data. Outputs
* to [base]-[iteration]-[temp].
*/
protected void configureDerivationOutput(boolean intermediate) {
Path outPath;
Configuration conf = job.getConfiguration();
int iteration = MRReasoningUtils.getCurrentIteration(conf);
if (intermediate) {
outPath = MRReasoningUtils.getOutputPath(conf,
MRReasoningUtils.OUTPUT_BASE + iteration
+ MRReasoningUtils.TEMP_SUFFIX);
}
else {
outPath = MRReasoningUtils.getOutputPath(conf,
MRReasoningUtils.OUTPUT_BASE + iteration);
}
SequenceFileOutputFormat.setOutputPath(job, outPath);
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT,
SequenceFileOutputFormat.class, Fact.class, NullWritable.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT,
SequenceFileOutputFormat.class, Fact.class, NullWritable.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT,
SequenceFileOutputFormat.class, Fact.class, NullWritable.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT,
SequenceFileOutputFormat.class, Derivation.class, NullWritable.class);
MultipleOutputs.setCountersEnabled(job, true);
// Set up an output for diagnostic info, if needed
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT,
TextOutputFormat.class, Text.class, Text.class);
}
示例14: run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
PreSortConfig config = new PreSortConfig();
config.fromArray(args);
Job job = Job.getInstance(getConf());
job.setJobName("pre-sort");
job.setJarByClass(PreSortDriver.class);
Path mapInputPath = new Path(config.getInput());
Path mapOutputPath = new Path(config.getOutput());
LOGGER.info("use " + mapInputPath.toString() + " as pre-sort input ");
LOGGER.info("use " + mapOutputPath.toString() + " as pre-sort output ");
// define the mapper
job.getConfiguration().set(PreSortMapper.COLUMN_INDEX_CONFIG_NAME, config.getKeyColumnAsString());
job.setMapperClass(PreSortMapper.class);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.setInputPaths(job, mapInputPath);
// define reducer
job.setNumReduceTasks(NUM_REDUCER);
// define the output, NOTE: we do not have reducer
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputPath(job, mapOutputPath);
// clean up the output folder
mapOutputPath.getFileSystem(job.getConfiguration()).delete(mapOutputPath, true);
// run the job and wait until it complete
return job.waitForCompletion(true) ? 0 : 1;
}
示例15: setupOutput
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入方法依赖的package包/类
protected void setupOutput(final Job job, final SampleDataForSplitPoints operation, final Store store) throws IOException {
job.setOutputFormatClass(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputPath(job, new Path(operation.getOutputPath()));
if (null != operation.getCompressionCodec()) {
if (GzipCodec.class.isAssignableFrom(operation.getCompressionCodec()) && !NativeCodeLoader.isNativeCodeLoaded() && !ZlibFactory.isNativeZlibLoaded(job.getConfiguration())) {
LOGGER.warn("SequenceFile doesn't work with GzipCodec without native-hadoop code!");
} else {
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, operation.getCompressionCodec());
SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
}
}
}