本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat类的典型用法代码示例。如果您正苦于以下问题:Java SequenceFileOutputFormat类的具体用法?Java SequenceFileOutputFormat怎么用?Java SequenceFileOutputFormat使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
SequenceFileOutputFormat类属于org.apache.hadoop.mapreduce.lib.output包,在下文中一共展示了SequenceFileOutputFormat类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
public static void run(Configuration conf, Path inputPath, Path output, double params) throws IOException, ClassNotFoundException, InterruptedException {
String jobName = "calculating parameter";
conf.set("params",String.valueOf(params));
Job job = new Job(conf, jobName);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(indexToCountWritable.class);
job.setOutputKeyClass(twoDimensionIndexWritable.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setMapperClass(CalParamsMapper.class);
job.setReducerClass(CalParamsReducer.class);
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job,output);
job.setJarByClass(LDADriver.class);
if (!job.waitForCompletion(true)) {
throw new InterruptedException("calculating parameter failed");
}
}
示例2: runRandomInputGenerator
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
public int runRandomInputGenerator(int numMappers, long numNodes, Path tmpOutput,
Integer width, Integer wrapMuplitplier) throws Exception {
LOG.info("Running RandomInputGenerator with numMappers=" + numMappers
+ ", numNodes=" + numNodes);
Job job = Job.getInstance(getConf());
job.setJobName("Random Input Generator");
job.setNumReduceTasks(0);
job.setJarByClass(getClass());
job.setInputFormatClass(GeneratorInputFormat.class);
job.setOutputKeyClass(BytesWritable.class);
job.setOutputValueClass(NullWritable.class);
setJobConf(job, numMappers, numNodes, width, wrapMuplitplier);
job.setMapperClass(Mapper.class); //identity mapper
FileOutputFormat.setOutputPath(job, tmpOutput);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
boolean success = jobCompletion(job);
return success ? 0 : 1;
}
示例3: getOutputFormatClass
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
@Override
protected Class<? extends OutputFormat> getOutputFormatClass()
throws ClassNotFoundException {
if (isHCatJob) {
LOG.debug("Returning HCatOutputFormat for output format");
return SqoopHCatUtilities.getOutputFormatClass();
}
if (options.getFileLayout() == SqoopOptions.FileLayout.TextFile) {
return RawKeyTextOutputFormat.class;
} else if (options.getFileLayout()
== SqoopOptions.FileLayout.SequenceFile) {
return SequenceFileOutputFormat.class;
} else if (options.getFileLayout()
== SqoopOptions.FileLayout.AvroDataFile) {
return AvroOutputFormat.class;
} else if (options.getFileLayout()
== SqoopOptions.FileLayout.ParquetFile) {
return DatasetKeyOutputFormat.class;
}
return null;
}
示例4: joinAs
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
private static void joinAs(String jointype,
Class<? extends SimpleCheckerMapBase<?>> map,
Class<? extends SimpleCheckerReduceBase> reduce) throws Exception {
final int srcs = 4;
Configuration conf = new Configuration();
Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
Path[] src = writeSimpleSrc(base, conf, srcs);
conf.set(CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose(jointype,
SequenceFileInputFormat.class, src));
conf.setInt("testdatamerge.sources", srcs);
Job job = Job.getInstance(conf);
job.setInputFormatClass(CompositeInputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(base, "out"));
job.setMapperClass(map);
job.setReducerClass(reduce);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
job.waitForCompletion(true);
assertTrue("Job failed", job.isSuccessful());
if ("outer".equals(jointype)) {
checkOuterConsistency(job, src);
}
base.getFileSystem(conf).delete(base, true);
}
示例5: createJob
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
public Job createJob(Configuration conf) throws IOException {
long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 10 * 1024);
long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numBytesToWritePerMap);
int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
if (numMaps == 0 && totalBytesToWrite > 0) {
numMaps = 1;
conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
}
conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
Job job = Job.getInstance(conf);
job.setJarByClass(RandomTextWriterJob.class);
job.setJobName("random-text-writer");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(RandomInputFormat.class);
job.setMapperClass(RandomTextMapper.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
//FileOutputFormat.setOutputPath(job, new Path("random-output"));
job.setNumReduceTasks(0);
return job;
}
示例6: runJob
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
public static void runJob(Configuration conf, Path inputPath, Path output) throws IOException, ClassNotFoundException, InterruptedException {
Job job = new Job(conf, "Input Drive running input:"+inputPath);
log.info("start running InputDriver");
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(indexToWordWritable.class);
job.setOutputKeyClass(twoDimensionIndexWritable.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(InputMapper.class);
job.setReducerClass(InputReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setJarByClass(InputDriver.class);
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job, output);
boolean succeeded = job.waitForCompletion(true);
if (!succeeded) {
throw new IllegalStateException("Job failed!");
}
}
示例7: call
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
@Override
public void call(JavaPairRDD<K,M> rdd, Time time) throws IOException {
if (rdd.isEmpty()) {
log.info("RDD was empty, not saving to HDFS");
} else {
String file = prefix + "-" + time.milliseconds() + "." + suffix;
Path path = new Path(file);
FileSystem fs = FileSystem.get(path.toUri(), hadoopConf);
if (fs.exists(path)) {
log.warn("Saved data already existed, possibly from a failed job. Deleting {}", path);
fs.delete(path, true);
}
log.info("Saving RDD to HDFS at {}", file);
rdd.mapToPair(
new ValueToWritableFunction<>(keyClass, messageClass, keyWritableClass, messageWritableClass)
).saveAsNewAPIHadoopFile(
file,
keyWritableClass,
messageWritableClass,
SequenceFileOutputFormat.class,
hadoopConf);
}
}
示例8: createSubmittableJob
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
/**
* Sets up the actual job.
*
* @param conf The current configuration.
* @param args The command line parameters.
* @return The newly created job.
* @throws IOException When setting up the job fails.
*/
public static Job createSubmittableJob(Configuration conf, String[] args)
throws IOException {
String tableName = args[0];
Path outputDir = new Path(args[1]);
Job job = new Job(conf, NAME + "_" + tableName);
job.setJobName(NAME + "_" + tableName);
job.setJarByClass(Export.class);
// Set optional scan parameters
Scan s = getConfiguredScanForJob(conf, args);
IdentityTableMapper.initJob(tableName, s, IdentityTableMapper.class, job);
// No reducers. Just write straight to output files.
job.setNumReduceTasks(0);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Result.class);
FileOutputFormat.setOutputPath(job, outputDir); // job conf doesn't contain the conf so doesn't have a default fs.
return job;
}
示例9: Run
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
public static void Run(String output, String outputFormat, int reducerNum, Configuration conf)
throws IOException, ClassNotFoundException, InterruptedException {
Job job = Job.getInstance(conf);
// job.setJobName(Es2Json.class.getName());
job.setJarByClass(Es2Json.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setMapperClass(MapTask.class);
job.setReducerClass(ReduceTask.class);
job.setInputFormatClass(EsInputFormat.class);
if (outputFormat.equals("sequencefile")) {
job.setOutputFormatClass(SequenceFileOutputFormat.class);
}
job.setNumReduceTasks(reducerNum);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileOutputFormat.setOutputPath(job, new Path(output));
job.setSpeculativeExecution(false);
job.waitForCompletion(true);
}
示例10: writeMemoryRDD
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
@Override
public <K, V> Iterator<KeyValue<K, V>> writeMemoryRDD(final Configuration configuration, final String memoryKey, JavaPairRDD<K, V> memoryRDD) {
final org.apache.hadoop.conf.Configuration hadoopConfiguration = ConfUtil.makeHadoopConfiguration(configuration);
final String outputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION);
if (null != outputLocation) {
// map back to a Hadoop stream for output
memoryRDD.mapToPair(keyValue -> new Tuple2<>(new ObjectWritable<>(keyValue._1()), new ObjectWritable<>(keyValue._2())))
.saveAsNewAPIHadoopFile(Constants.getMemoryLocation(outputLocation, memoryKey),
ObjectWritable.class,
ObjectWritable.class,
SequenceFileOutputFormat.class, hadoopConfiguration);
try {
return (Iterator) new ObjectWritableIterator(hadoopConfiguration, new Path(Constants.getMemoryLocation(outputLocation, memoryKey)));
} catch (final IOException e) {
throw new IllegalStateException(e.getMessage(), e);
}
}
return Collections.emptyIterator();
}
示例11: bigItemCount
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
private boolean bigItemCount(String output) throws IOException, ClassNotFoundException, InterruptedException {
Job job = Job.getInstance(this.getConf(), "Counting items from " + this.input);
job.setJarByClass(TopPIoverHadoop.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(this.input));
FileOutputFormat.setOutputPath(job, new Path(output));
job.setMapperClass(ItemBigCountingMapper.class);
job.setReducerClass(ItemBigCountingReducer.class);
boolean success = job.waitForCompletion(true);
if (success) {
Counter rebasingMaxID = job.getCounters().findCounter(TaskCounter.REDUCE_OUTPUT_RECORDS);
this.getConf().setInt(KEY_REBASING_MAX_ID, (int) rebasingMaxID.getValue());
}
return success;
}
示例12: genBigItemMap
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
private boolean genBigItemMap(String input, String output) throws IOException, ClassNotFoundException,
InterruptedException {
Job job = Job.getInstance(this.getConf(), "Computing items remapping for " + this.input);
job.setJarByClass(TopPIoverHadoop.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(input));
FileOutputFormat.setOutputPath(job, new Path(output));
job.setMapperClass(InverseMapper.class);
job.setReducerClass(ItemBigRebasingReducer.class);
job.setNumReduceTasks(1);
return job.waitForCompletion(true);
}
示例13: filterInput
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
private boolean filterInput(String output, String rebasingMapPath) throws IOException, ClassNotFoundException,
InterruptedException {
Job job = Job.getInstance(this.getConf(), "Computing items remapping for " + this.input);
job.setJarByClass(TopPIoverHadoop.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(ConcatenatedTransactionsWritable.class);
DistCache.copyToCache(job, rebasingMapPath);
FileInputFormat.addInputPath(job, new Path(input));
FileOutputFormat.setOutputPath(job, new Path(output));
job.setMapperClass(FilteringMapper.class);
job.setNumReduceTasks(0);
return job.waitForCompletion(true);
}
示例14: handleRelations
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
private static JavaRDD<Tuple3<String, String, Float>> handleRelations(JavaRDD<DocumentToSoftwareUrlWithMeta> documentToSoftwareUrl, String actionSetId,
Configuration jobConfig, String outputAvroPath) {
JavaRDD<Tuple3<String, String, Float>> distinctRelationTriples = documentToSoftwareUrl
.map(e -> new Tuple3<>(e.getDocumentId().toString(), generateSoftwareEntityId(pickUrl(e)), e.getConfidenceLevel()))
.distinct();
JavaPairRDD<String, Tuple3<String, String, Float>> relationTriplesByIdPair = distinctRelationTriples
.mapToPair(e -> new Tuple2<String, Tuple3<String, String, Float>>(
joinDocumentAndSoftwareIds(e._1(), e._2()), e));
JavaRDD<Tuple3<String, String, Float>> dedupedRelationTriples = relationTriplesByIdPair
.reduceByKey((x, y) -> pickBestConfidence(x, y)).values();
// to be used by both entity exporter and reporter consumers
dedupedRelationTriples.cache();
JavaPairRDD<Text, Text> relationResult = dedupedRelationTriples.flatMapToPair(x -> (Iterable<Tuple2<Text, Text>>)
buildRelationActions(x._1(), x._2(), x._3(), actionSetId).stream()
.map(action -> new Tuple2<Text, Text>(new Text(action.getRowKey()),
new Text(action.toString())))::iterator);
relationResult.coalesce(numberOfOutputFiles).saveAsNewAPIHadoopFile(outputAvroPath, Text.class, Text.class, SequenceFileOutputFormat.class, jobConfig);
return dedupedRelationTriples;
}
示例15: runRandomInputGenerator
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
public int runRandomInputGenerator(int numMappers, long numNodes, Path tmpOutput,
Integer width, Integer wrapMuplitplier) throws Exception {
LOG.info("Running RandomInputGenerator with numMappers=" + numMappers
+ ", numNodes=" + numNodes);
Job job = new Job(getConf());
job.setJobName("Random Input Generator");
job.setNumReduceTasks(0);
job.setJarByClass(getClass());
job.setInputFormatClass(GeneratorInputFormat.class);
job.setOutputKeyClass(BytesWritable.class);
job.setOutputValueClass(NullWritable.class);
setJobConf(job, numMappers, numNodes, width, wrapMuplitplier);
job.setMapperClass(Mapper.class); //identity mapper
FileOutputFormat.setOutputPath(job, tmpOutput);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}