本文整理汇总了Java中org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath方法的典型用法代码示例。如果您正苦于以下问题:Java SequenceFileOutputFormat.setOutputPath方法的具体用法?Java SequenceFileOutputFormat.setOutputPath怎么用?Java SequenceFileOutputFormat.setOutputPath使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapred.SequenceFileOutputFormat
的用法示例。
在下文中一共展示了SequenceFileOutputFormat.setOutputPath方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入方法依赖的package包/类
@Override
public void run(String[] args) throws Exception {
Flags flags = new Flags();
flags.addWithDefaultValue(
"tag_subject_data", "/media/work/datasets(secret)/douban/raw/tag_subject.dat", "");
flags.addWithDefaultValue(
"subject_data", "/media/work/datasets(secret)/douban/raw/subject.dat", "");
flags.add("output");
flags.parseAndCheck(args);
JobConf job = new JobConf(this.getClass());
job.setJobName("convert-douban-raw-to-posts");
MapReduceHelper.setAllOutputTypes(job, Text.class);
MapReduceHelper.setMR(
job, DoubanRawMapper.class, DoubanToPostReducer.class);
job.setInputFormat(TextInputFormat.class);
TextInputFormat.addInputPath(
job, new Path(flags.getString("tag_subject_data")));
TextInputFormat.addInputPath(
job, new Path(flags.getString("subject_data")));
job.setOutputFormat(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputPath(
job, new Path(flags.getString("output")));
JobClient.runJob(job);
}
示例2: IDMappingJob
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入方法依赖的package包/类
public static void IDMappingJob(String[] args) throws IOException {
JobConf job = new JobConf();
new GenericOptionsParser(job, args);
job.setJarByClass(HybridDriver.class);
job.setJobName("Converting binary similarity scores to text");
job.setMapperClass(IDMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
Path inputPath = new Path(OUTPUT_DIR);
job.setInputFormat(SequenceFileInputFormat.class);
SequenceFileInputFormat.setInputPaths(job, inputPath);
Path outputPath = new Path("SimilarityScores");
job.setOutputFormat(TextOutputFormat.class);
SequenceFileOutputFormat.setOutputPath(job, outputPath);
FileSystem.get(job).delete(outputPath, true);
HashPagesDriver.prepareDistribCache(job, HashPagesDriver.IDS_FILE2); //remove not sure
JobSubmitter.run(job,"BINARY TO TEXT",job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE));
}
示例3: writeSequence
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入方法依赖的package包/类
/**
* Runs a MR job with maps only to convert input directory of numeric valued
* records to hadoop sequence format. It assumes a text input of format of
* [id feature weight ..] to be the format of input.
*/
public static void writeSequence() throws IOException {
JobConf job = new JobConf();
job.setJobName("Convert text vectors to hadoop seqeunce ");
job.setJarByClass(SeqWriter.class);
job.setMapperClass(SeqMapper.class);
job.setNumReduceTasks(0);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(FeatureWeightArrayWritable.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(FeatureWeightArrayWritable.class);
job.setInputFormat(TextInputFormat.class);
TextInputFormat.addInputPath(job, new Path(INPUT_DIR));
FileSystem.get(job).delete(new Path(HashPagesDriver.IDS_FILE2), true);
Path outputPath = new Path(OUTPUT_DIR);
FileSystem.get(job).delete(outputPath, true);
job.setOutputFormat(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputPath(job, outputPath);
JobSubmitter.run(job,"PREPROCESS",-1);
}
示例4: SetSeqFileInputOutput
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入方法依赖的package包/类
public static void SetSeqFileInputOutput(JobConf job, String inputPaths, Path output) throws IOException {
job.setInputFormat(SequenceFileInputFormat.class);
job.setOutputFormat(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputPath(job, output);
// Expand input pattern.
FileSystem fs = FileSystem.get(job);
String[] paths = inputPaths.split(",");
for (String p : paths) {
int lastslash = p.lastIndexOf("/");
if (lastslash < 0) {
p = "./" + p;
lastslash = 1;
}
String parent = p.substring(0, lastslash);
p = p.substring(lastslash + 1);
// Each path is treated as a pattern.
p = p.replace("\\", "\\\\");
p = p.replace(".", "\\.");
p = p.replace("*", ".*");
p = p.replace("?", ".");
LOG.info("Use pattern:" + p);
Pattern re = Pattern.compile(p);
// List all files.
FileStatus[] files = fs.listStatus(new Path(parent));
for (FileStatus f : files) {
if (re.matcher(f.getPath().getName()).matches()) {
SequenceFileInputFormat.addInputPath(job, f.getPath());
LOG.info("Adding input:" + f.getPath());
}
}
}
}
示例5: main
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入方法依赖的package包/类
@SuppressWarnings("unused")
public static void main(String[] args) throws IOException {
JobConf conf = new JobConf(EdgelistPartitioner.class);
if (conf == null) {
return;
}
String dir1 = "/user/miyuru/merged";
String dir2 = "/user/miyuru/merged-out";
// We first delete the temporary directories if they exist on the HDFS
FileSystem fs1 = FileSystem.get(new JobConf());
// only delete dir2 because dir1 is uploaded externally.
if (fs1.exists(new Path(dir2))) {
fs1.delete(new Path(dir2), true);
}
conf.setInputFormat(WholeFileInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
WholeFileInputFormat.setInputPaths(conf, new Path(dir1));
SequenceFileOutputFormat.setOutputPath(conf, new Path(dir2));
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
conf.setMapperClass(SequenceFileMapper.class);
conf.setReducerClass(MultipleOutputsInvertedReducer.class);
conf.setOutputFormat(NullOutputFormat.class);
conf.setJobName("EdgelistPartitioner");
MultipleOutputs.addMultiNamedOutput(conf, "partition",
TextOutputFormat.class, NullWritable.class, Text.class);
JobClient.runJob(conf);
}
示例6: setSeqFileInputOutput
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入方法依赖的package包/类
public static void setSeqFileInputOutput(JobConf job, Path input, Path output) {
job.setInputFormat(SequenceFileInputFormat.class);
job.setOutputFormat(SequenceFileOutputFormat.class);
SequenceFileInputFormat.addInputPath(job, input);
SequenceFileOutputFormat.setOutputPath(job, output);
}
示例7: run
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
// Get current configuration.
Configuration conf = getConf();
// Parse command line arguments.
String inputPath = args[0];
String outputPath = args[1];
String maxArcFiles = "";
if (args.length == 3)
maxArcFiles = args[2];
// Set the maximum number of arc files to process.
conf.set(MAX_FILES_KEY, maxArcFiles);
JobConf job = new JobConf(conf);
// Set input path.
if (inputPath.length() > 0) {
LOG.info("Setting input path to " + inputPath);
FileInputFormat.addInputPath(job, new Path(inputPath));
FileInputFormat.setInputPathFilter(job, FileCountFilter.class);
} else {
System.err.println("No input path found.");
return 1;
}
// Set output path.
if (outputPath.length() > 0) {
LOG.info("Setting output path to " + outputPath);
SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));
// Compress output to boost performance.
SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
SequenceFileOutputFormat.setCompressOutput(job, true);
} else {
System.err.println("No output path found.");
return 1;
}
// Load other classes from same jar a this class.
job.setJarByClass(WikiReverse.class);
// Input is in WARC file format.
job.setInputFormat(WarcFileInputFormat.class);
// Output is Hadoop sequence file format.
job.setOutputFormat(SequenceFileOutputFormat.class);
// Set the output data types.
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LinkArrayWritable.class);
// Use custom mapper class.
job.setMapRunnerClass(WikiReverseMapper.class);
// Use custom reducer class.
job.setReducerClass(LinkArrayReducer.class);
// Allow 5 percent of map tasks to fail.
job.setMaxMapTaskFailuresPercent(MAX_MAP_TASK_FAILURES_PERCENT);
if (JobClient.runJob(job).isSuccessful())
return 0;
else
return 1;
}
示例8: run
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
// Get current configuration.
Configuration conf = getConf();
// Parse command line arguments.
String inputPaths = args[0];
String outputPath = args[1];
JobConf job = new JobConf(conf);
// Set input path.
if (inputPaths.length() > 0) {
List<String> segmentPaths = Lists.newArrayList(Splitter.on(",")
.split(inputPaths));
for (String segmentPath : segmentPaths) {
LOG.info("Adding input path " + segmentPath);
FileInputFormat.addInputPath(job, new Path(segmentPath));
}
} else {
System.err.println("No input path found.");
return 1;
}
// Set output path.
if (outputPath.length() > 0) {
LOG.info("Setting output path to " + outputPath);
SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));
// Compress output to boost performance.
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.getOutputCompressorClass(job, GzipCodec.class);
} else {
System.err.println("No output path found.");
return 1;
}
// Load other classes from same jar as this class.
job.setJarByClass(SegmentCombiner.class);
// Input is Hadoop sequence file format.
job.setInputFormat(SequenceFileInputFormat.class);
// Output is Hadoop sequence file format.
job.setOutputFormat(SequenceFileOutputFormat.class);
// Set the output data types.
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LinkArrayWritable.class);
// Use custom mapper class.
job.setMapperClass(SegmentCombinerMapper.class);
// Use custom reducer class.
job.setReducerClass(LinkArrayReducer.class);
if (JobClient.runJob(job).isSuccessful())
return 0;
else
return 1;
}
示例9: main
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入方法依赖的package包/类
public static void main(String args[]) throws ParseException, IOException {
// job.set("mapred.job.tracker", "local");
// job.set("fs.default.name", "file:///");
JobConf job = new JobConf();
job.setJarByClass(HybridDriver.class);
new GenericOptionsParser(job, args);
setMapperAndRunner(job);
job.setMapOutputKeyClass(DocDocWritable.class);
job.setMapOutputValueClass(FloatWritable.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(DocDocWritable.class);
job.setOutputValueClass(FloatWritable.class);
Path inputPath = new Path(INPUT_DIR);
CustomSequenceFileInputFormat.addInputPath(job, inputPath);
Path outputPath = new Path(OUTPUT_DIR);
job.setOutputFormat(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputPath(job, outputPath);
FileSystem.get(job).delete(outputPath, true);
job.setBoolean("fs.hdfs.impl.disable.cache", true); //xun not sure if needed
if (job.getBoolean(Config.SPLITABLE_PROPERTY, Config.SPLITABLE_VALUE)) {
job.setInputFormat(CustomSequenceFileInputFormat.class);
Long splitMB = job.getLong(Config.SPLIT_MB_PROPERTY, Config.SPLIT_MB_VALUE) * 1024 * 1024;
job.setLong("mapred.min.split.size", splitMB);
job.setLong("mapred.max.split.size", splitMB);
job.setLong("dfs.block.size", splitMB);
} else {
// Comment the following of splitter for www experiments it assumes no splitting
// of partitions for load balancing, should be fixed.
Splitter.configure(job, inputPath);// remove comment unless for www
job.setInputFormat(NonSplitableSequenceInputFormat.class); //remove comment
}
//SIGIR'14 two-stage balancing //not yet fully incorporated
if (job.getInt(Config.LOAD_BALANCE_PROPERTY, Config.LOAD_BALANCE_VALUE) != 0) {
TwoStageLoadbalancing.main(job.getInt(Config.LOAD_BALANCE_PROPERTY, Config.LOAD_BALANCE_VALUE),
new Path(PartDriver.OUTPUT_DIR), job);
}
JobSubmitter.run(job,"SIMILARITY",job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE));
if(job.getBoolean(Config.CONVERT_TEXT_PROPERTY, Config.CONVERT_TEXT_VALUE))
IDMappingJob(args);
}
示例10: main
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
JobConf job = new JobConf();
job.setJobName("InvertedIndexDriver-BuildII");
job.setJarByClass(InvertedIndexDriver.class);
GenericOptionsParser gop = new GenericOptionsParser(job, args);
args = gop.getRemainingArgs();
if (args.length != 2)
printUsage();
//
// Job1
//
job.setMapperClass(InvertedMapper.class);
job.setReducerClass(InvertedReducer.class);
job.setNumReduceTasks(4);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(DocWeight.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(DocWeightArrayWritable.class);
job.setInputFormat(SequenceFileInputFormat.class);
SequenceFileInputFormat.addInputPath(job, new Path(args[0]));
job.setOutputFormat(SequenceFileOutputFormat.class);
Path interPath = new Path("inverted");
FileSystem.get(job).delete(interPath, true);
SequenceFileOutputFormat.setOutputPath(job, interPath);
HybridDriver.run(job);
//
// Collect statistics
//
//
// Job2
//
job = new JobConf(new Configuration());
job.setJarByClass(InvertedIndexDriver.class);
job.setJobName("InvertedIndexDriver-Similarity (SII)");
job.setMapperClass(InvertedSimMapper.class);
job.setReducerClass(InvertedSimReducer.class);
job.setNumReduceTasks(5);
job.setInputFormat(SequenceFileInputFormat.class);
SequenceFileInputFormat.addInputPath(job, new Path("inverted"));
job.setOutputFormat(SequenceFileOutputFormat.class);
Path outputPath = new Path(args[1]);
FileSystem.get(job).delete(outputPath, true);
SequenceFileOutputFormat.setOutputPath(job, outputPath);
job.setOutputKeyClass(DocDocWritable.class);
job.setOutputValueClass(FloatWritable.class);
long t = System.currentTimeMillis();
HybridDriver.run(job);
System.out.println("Job took " + (System.currentTimeMillis() - t) + " millisec.");
}
示例11: run
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
// Get current configuration.
Configuration conf = getConf();
// Parse command line arguments.
String inputPath = args[0];
String outputPath = args[1];
// Set Max Files if specified otherwise parse the entire segment.
String maxFiles = "";
if (args.length == 3)
maxFiles = args[2];
conf.set(MAX_FILES_KEY, maxFiles);
JobConf job = new JobConf(conf);
// Set input path.
if (inputPath.length() > 0) {
LOG.info("Setting input path to " + inputPath);
FileInputFormat.addInputPath(job, new Path(inputPath));
FileInputFormat.setInputPathFilter(job, FileCountFilter.class);
} else {
System.err.println("No input path found.");
return 1;
}
// Set output path.
if (outputPath.length() > 0) {
LOG.info("Setting output path to " + outputPath);
SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));
// Compress output to boost performance.
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.getOutputCompressorClass(job, GzipCodec.class);
} else {
System.err.println("No output path found.");
return 1;
}
// Load other classes from same jar as this class.
job.setJarByClass(WordCount.class);
// Input is in WARC file format.
job.setInputFormat(WarcFileInputFormat.class);
// Output to Hadoop sequence file format.
job.setOutputFormat(SequenceFileOutputFormat.class);
// Set the output data types.
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
// Use custom mapper class.
job.setMapperClass(WordCountMapper.class);
// Use standard reducer class.
job.setReducerClass(LongSumReducer.class);
// Allow 5 percent of map tasks to fail.
job.setMaxMapTaskFailuresPercent(MAX_MAP_TASK_FAILURES_PERCENT);
if (JobClient.runJob(job).isSuccessful())
return 0;
else
return 1;
}