本文整理汇总了Java中org.apache.hadoop.mapred.SequenceFileOutputFormat类的典型用法代码示例。如果您正苦于以下问题:Java SequenceFileOutputFormat类的具体用法?Java SequenceFileOutputFormat怎么用?Java SequenceFileOutputFormat使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
SequenceFileOutputFormat类属于org.apache.hadoop.mapred包,在下文中一共展示了SequenceFileOutputFormat类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getSeqRecords
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
private List<Writable> getSeqRecords(Path dir, Text key) throws Exception {
SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(
getConf(), dir);
ArrayList<Writable> res = new ArrayList<Writable>();
Class<?> keyClass = readers[0].getKeyClass();
Class<?> valueClass = readers[0].getValueClass();
if (!keyClass.getName().equals("org.apache.hadoop.io.Text"))
throw new IOException("Incompatible key (" + keyClass.getName() + ")");
Writable aKey = (Writable) keyClass.newInstance();
Writable value = (Writable) valueClass.newInstance();
for (int i = 0; i < readers.length; i++) {
while (readers[i].next(aKey, value)) {
if (aKey.equals(key)) {
res.add(value);
value = (Writable) valueClass.newInstance();
}
}
readers[i].close();
}
return res;
}
示例2: run
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
@Override
public void run(String[] args) throws Exception {
Flags flags = new Flags();
flags.addWithDefaultValue(
"tag_subject_data", "/media/work/datasets(secret)/douban/raw/tag_subject.dat", "");
flags.addWithDefaultValue(
"subject_data", "/media/work/datasets(secret)/douban/raw/subject.dat", "");
flags.add("output");
flags.parseAndCheck(args);
JobConf job = new JobConf(this.getClass());
job.setJobName("convert-douban-raw-to-posts");
MapReduceHelper.setAllOutputTypes(job, Text.class);
MapReduceHelper.setMR(
job, DoubanRawMapper.class, DoubanToPostReducer.class);
job.setInputFormat(TextInputFormat.class);
TextInputFormat.addInputPath(
job, new Path(flags.getString("tag_subject_data")));
TextInputFormat.addInputPath(
job, new Path(flags.getString("subject_data")));
job.setOutputFormat(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputPath(
job, new Path(flags.getString("output")));
JobClient.runJob(job);
}
示例3: createJobConf
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
private JobConf createJobConf() {
JobConf jobConf = new JobConf(getConf());
String jobName = NAME + " " + dateForm.format(new Date(System.currentTimeMillis()));
jobConf.setJobName(jobName);
jobConf.setMapSpeculativeExecution(false);
jobConf.setJarByClass(DataFsck.class);
jobConf.setInputFormat(DataFsckInputFormat.class);
jobConf.setOutputFormat(SequenceFileOutputFormat.class);
jobConf.setOutputKeyClass(Text.class);
jobConf.setOutputValueClass(Text.class);
jobConf.setMapperClass(DataFsckMapper.class);
jobConf.setNumReduceTasks(0);
return jobConf;
}
示例4: createJobConf
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
private JobConf createJobConf(Configuration conf) {
JobConf jobConf = new JobConf(conf);
String jobName = NAME + "_" + dateForm.format(new Date(System.currentTimeMillis()));
jobConf.setJobName(jobName);
jobConf.setMapSpeculativeExecution(false);
jobConf.setJarByClass(FastFileCheck.class);
jobConf.setInputFormat(FileCheckInputFormat.class);
jobConf.setOutputFormat(SequenceFileOutputFormat.class);
jobConf.setOutputKeyClass(Text.class);
jobConf.setOutputValueClass(Text.class);
jobConf.setMapperClass(FileCheckMapper.class);
jobConf.setNumReduceTasks(0);
jobConf.setBoolean(SOURCE_ONLY_CONF, sourceOnly);
return jobConf;
}
示例5: getSeqRecords
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
private List<Writable> getSeqRecords(Path dir, Text key) throws Exception {
SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(getConf(), dir);
ArrayList<Writable> res = new ArrayList<Writable>();
Class keyClass = readers[0].getKeyClass();
Class valueClass = readers[0].getValueClass();
if (!keyClass.getName().equals("org.apache.hadoop.io.Text"))
throw new IOException("Incompatible key (" + keyClass.getName() + ")");
Writable aKey = (Writable)keyClass.newInstance();
Writable value = (Writable)valueClass.newInstance();
for (int i = 0; i < readers.length; i++) {
while (readers[i].next(aKey, value)) {
if (aKey.equals(key)) {
res.add(value);
value = (Writable)valueClass.newInstance();
}
}
readers[i].close();
}
return res;
}
示例6: IDMappingJob
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
public static void IDMappingJob(String[] args) throws IOException {
JobConf job = new JobConf();
new GenericOptionsParser(job, args);
job.setJarByClass(HybridDriver.class);
job.setJobName("Converting binary similarity scores to text");
job.setMapperClass(IDMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
Path inputPath = new Path(OUTPUT_DIR);
job.setInputFormat(SequenceFileInputFormat.class);
SequenceFileInputFormat.setInputPaths(job, inputPath);
Path outputPath = new Path("SimilarityScores");
job.setOutputFormat(TextOutputFormat.class);
SequenceFileOutputFormat.setOutputPath(job, outputPath);
FileSystem.get(job).delete(outputPath, true);
HashPagesDriver.prepareDistribCache(job, HashPagesDriver.IDS_FILE2); //remove not sure
JobSubmitter.run(job,"BINARY TO TEXT",job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE));
}
示例7: writeSequence
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
/**
* Runs a MR job with maps only to convert input directory of numeric valued
* records to hadoop sequence format. It assumes a text input of format of
* [id feature weight ..] to be the format of input.
*/
public static void writeSequence() throws IOException {
JobConf job = new JobConf();
job.setJobName("Convert text vectors to hadoop seqeunce ");
job.setJarByClass(SeqWriter.class);
job.setMapperClass(SeqMapper.class);
job.setNumReduceTasks(0);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(FeatureWeightArrayWritable.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(FeatureWeightArrayWritable.class);
job.setInputFormat(TextInputFormat.class);
TextInputFormat.addInputPath(job, new Path(INPUT_DIR));
FileSystem.get(job).delete(new Path(HashPagesDriver.IDS_FILE2), true);
Path outputPath = new Path(OUTPUT_DIR);
FileSystem.get(job).delete(outputPath, true);
job.setOutputFormat(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputPath(job, outputPath);
JobSubmitter.run(job,"PREPROCESS",-1);
}
示例8: main
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length != 2) {
throw new Exception("Usage BasicSaveSequenceFile [sparkMaster] [output]");
}
String master = args[0];
String fileName = args[1];
JavaSparkContext sc = new JavaSparkContext(
master, "basicloadsequencefile", System.getenv("SPARK_HOME"), System.getenv("JARS"));
List<Tuple2<String, Integer>> input = new ArrayList();
input.add(new Tuple2("coffee", 1));
input.add(new Tuple2("coffee", 2));
input.add(new Tuple2("pandas", 3));
JavaPairRDD<String, Integer> rdd = sc.parallelizePairs(input);
JavaPairRDD<Text, IntWritable> result = rdd.mapToPair(new ConvertToWritableTypes());
result.saveAsHadoopFile(fileName, Text.class, IntWritable.class, SequenceFileOutputFormat.class);
}
示例9: produceSamples
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
public long produceSamples(Path samplePath) throws Exception {
Path input = new Path(samplePath.toString()+"-seeds");
this.numSamples = writeSeeds(input);
LOG.info("Generating "+this.numSamples+" of samples");
JobConf jobConf = getJobConf();
jobConf.set("genkmeansdataset.dimensions",Integer.toString(dimension));
FileInputFormat.setInputPaths(jobConf, input);
FileOutputFormat.setOutputPath(jobConf, samplePath);
jobConf.setMapperClass(MapClass.class);
jobConf.setInputFormat(SequenceFileInputFormat.class);
jobConf.setOutputFormat(SequenceFileOutputFormat.class);
jobConf.setOutputKeyClass(LongWritable.class);
jobConf.setOutputValueClass(VectorWritable.class);
jobConf.setNumReduceTasks(0);
JobClient.runJob(jobConf);
return this.numSamples;
}
示例10: createJobConf
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
/**
* Create a job configuration
*/
@SuppressWarnings("rawtypes")
public static JobConf createJobConf(String name, String topic, Props props, Class classobj)
throws Exception {
JobConf conf = getJobConf(name, props, classobj);
conf.set("topic", topic);
// input format
conf.setInputFormat(KafkaETLInputFormat.class);
//turn off mapper speculative execution
conf.setMapSpeculativeExecution(false);
// setup multiple outputs
MultipleOutputs.addMultiNamedOutput(conf, "offsets", SequenceFileOutputFormat.class,
KafkaETLKey.class, BytesWritable.class);
return conf;
}
示例11: runCreateJob
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
int runCreateJob(String inputPathString, String outputPathString, String jobName) throws IOException {
/* 134 */ JobConf jobConf = new JobConf(this.conf);
/* 135 */ jobConf.setJobName(jobName);
/* 136 */ jobConf.setMapSpeculativeExecution(false);
/* */
/* 138 */ FileInputFormat.addInputPath(jobConf, new Path(inputPathString));
/* 139 */ FileOutputFormat.setOutputPath(jobConf, new Path(outputPathString));
/* */
/* 141 */ jobConf.setInputFormat(SequenceFileInputFormat.class);
/* 142 */ jobConf.setOutputKeyClass(LongWritable.class);
/* 143 */ jobConf.setOutputValueClass(CreateFileInfo.class);
/* 144 */ jobConf.setMapperClass(CreateFileMapper.class);
/* 145 */ jobConf.setReducerClass(IdentityReducer.class);
/* 146 */ jobConf.setOutputFormat(SequenceFileOutputFormat.class);
/* */
/* 148 */ RunningJob result = JobClient.runJob(jobConf);
/* 149 */ return result.isSuccessful() ? 0 : -1;
/* */ }
示例12: getBaseRecordWriter
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
@Override
protected RecordWriter<K, V> getBaseRecordWriter(FileSystem fs,
JobConf job,
String name,
Progressable arg3)
throws IOException {
if (theSequenceFileOutputFormat == null) {
theSequenceFileOutputFormat = new SequenceFileOutputFormat<K,V>();
}
return theSequenceFileOutputFormat.getRecordWriter(fs, job, name, arg3);
}
示例13: createBayesData
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
private void createBayesData() throws IOException, URISyntaxException {
log.info("creating bayes text data ... ");
JobConf job = new JobConf();
Path fout = options.getResultPath();
Utils.checkHdfsPath(fout);
String jobname = "Create bayes data";
job.setJobName(jobname);
Utils.shareDict(options, job);
setBayesOptions(job);
FileInputFormat.setInputPaths(job, dummy.getPath());
job.setInputFormat(NLineInputFormat.class);
job.setJarByClass(CreateBayesPages.class);
job.setMapperClass(CreateBayesPages.class);
job.setNumReduceTasks(0);
FileOutputFormat.setOutputPath(job, fout);
job.setOutputFormat(SequenceFileOutputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
log.info("Running Job: " +jobname);
log.info("Pages file " + dummy.getPath() + " as input");
log.info("Rankings file " + fout + " as output");
JobClient.runJob(job);
log.info("Finished Running Job: " + jobname);
}
示例14: runInverter
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
/**
* Runs the inverter job. The inverter job flips outlinks to inlinks to be
* passed into the analysis job.
*
* @param nodeDb
* The node database to use.
* @param outlinkDb
* The outlink database to use.
* @param output
* The output directory.
*
* @throws IOException
* If an error occurs while running the inverter job.
*/
private void runInverter(Path nodeDb, Path outlinkDb, Path output)
throws IOException {
// configure the inverter
JobConf inverter = new NutchJob(getConf());
inverter.setJobName("LinkAnalysis Inverter");
FileInputFormat.addInputPath(inverter, nodeDb);
FileInputFormat.addInputPath(inverter, outlinkDb);
FileOutputFormat.setOutputPath(inverter, output);
inverter.setInputFormat(SequenceFileInputFormat.class);
inverter.setMapperClass(Inverter.class);
inverter.setReducerClass(Inverter.class);
inverter.setMapOutputKeyClass(Text.class);
inverter.setMapOutputValueClass(ObjectWritable.class);
inverter.setOutputKeyClass(Text.class);
inverter.setOutputValueClass(LinkDatum.class);
inverter.setOutputFormat(SequenceFileOutputFormat.class);
inverter.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs",
false);
// run the inverter job
LOG.info("Starting inverter job");
try {
JobClient.runJob(inverter);
} catch (IOException e) {
LOG.error(StringUtils.stringifyException(e));
throw e;
}
LOG.info("Finished inverter job.");
}
示例15: task0
import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
/**
* Extracts redirects and the target for each.
*
* @param inputPath
* @param outputPath
* @throws IOException
*/
private void task0(String inputPath, String outputPath) throws IOException {
LOG.info("Extracting redirects (phase 0)...");
LOG.info(" - input: " + inputPath);
LOG.info(" - output: " + outputPath);
JobConf conf = new JobConf(getConf(), ExtractWikipediaAnchorText.class);
conf.setJobName(String.format("ExtractWikipediaAnchorText:phase0[input: %s, output: %s]", inputPath, outputPath));
conf.setNumReduceTasks(1);
FileInputFormat.addInputPath(conf, new Path(inputPath));
FileOutputFormat.setOutputPath(conf, new Path(outputPath));
conf.setInputFormat(SequenceFileInputFormat.class);
conf.setOutputFormat(SequenceFileOutputFormat.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(Text.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
conf.setMapperClass(MyMapper0.class);
conf.setReducerClass(IdentityReducer.class);
JobClient.runJob(conf);
}