Java SequenceFileOutputFormat类代码示例

本文整理汇总了Java中org.apache.hadoop.mapred.SequenceFileOutputFormat类的典型用法代码示例。如果您正苦于以下问题：Java SequenceFileOutputFormat类的具体用法？Java SequenceFileOutputFormat怎么用？Java SequenceFileOutputFormat使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

SequenceFileOutputFormat类属于org.apache.hadoop.mapred包，在下文中一共展示了SequenceFileOutputFormat类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getSeqRecords

import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
private List<Writable> getSeqRecords(Path dir, Text key) throws Exception {
  SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(
      getConf(), dir);
  ArrayList<Writable> res = new ArrayList<Writable>();
  Class<?> keyClass = readers[0].getKeyClass();
  Class<?> valueClass = readers[0].getValueClass();
  if (!keyClass.getName().equals("org.apache.hadoop.io.Text"))
    throw new IOException("Incompatible key (" + keyClass.getName() + ")");
  Writable aKey = (Writable) keyClass.newInstance();
  Writable value = (Writable) valueClass.newInstance();
  for (int i = 0; i < readers.length; i++) {
    while (readers[i].next(aKey, value)) {
      if (aKey.equals(key)) {
        res.add(value);
        value = (Writable) valueClass.newInstance();
      }
    }
    readers[i].close();
  }
  return res;
}

开发者ID:jorcox，项目名称:GeoCrawler，代码行数:22，代码来源:SegmentReader.java

示例2: run

import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
@Override
public void run(String[] args) throws Exception {
  Flags flags = new Flags();
  flags.addWithDefaultValue(
      "tag_subject_data", "/media/work/datasets(secret)/douban/raw/tag_subject.dat", "");
  flags.addWithDefaultValue(
      "subject_data", "/media/work/datasets(secret)/douban/raw/subject.dat", "");
  flags.add("output");
  flags.parseAndCheck(args);
  
  JobConf job = new JobConf(this.getClass());
  job.setJobName("convert-douban-raw-to-posts");
  MapReduceHelper.setAllOutputTypes(job, Text.class);
  MapReduceHelper.setMR(
      job, DoubanRawMapper.class, DoubanToPostReducer.class);
  job.setInputFormat(TextInputFormat.class);
  TextInputFormat.addInputPath(
      job, new Path(flags.getString("tag_subject_data")));
  TextInputFormat.addInputPath(
      job, new Path(flags.getString("subject_data")));
  job.setOutputFormat(SequenceFileOutputFormat.class);
  SequenceFileOutputFormat.setOutputPath(
      job, new Path(flags.getString("output")));
  JobClient.runJob(job);
}

开发者ID:thunlp，项目名称:THUTag，代码行数:26，代码来源:ImportDouban.java

示例3: createJobConf

import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
private JobConf createJobConf() {
  JobConf jobConf = new JobConf(getConf());
  String jobName = NAME + " " + dateForm.format(new Date(System.currentTimeMillis()));
  jobConf.setJobName(jobName);
  jobConf.setMapSpeculativeExecution(false);

  jobConf.setJarByClass(DataFsck.class);
  jobConf.setInputFormat(DataFsckInputFormat.class);
  jobConf.setOutputFormat(SequenceFileOutputFormat.class);
  jobConf.setOutputKeyClass(Text.class);
  jobConf.setOutputValueClass(Text.class);

  jobConf.setMapperClass(DataFsckMapper.class);
  jobConf.setNumReduceTasks(0);
  return jobConf;
}

开发者ID:rhli，项目名称:hadoop-EAR，代码行数:17，代码来源:DataFsck.java

示例4: createJobConf

import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
private JobConf createJobConf(Configuration conf) {
  JobConf jobConf = new JobConf(conf);
  String jobName = NAME + "_" + dateForm.format(new Date(System.currentTimeMillis()));
  jobConf.setJobName(jobName);
  jobConf.setMapSpeculativeExecution(false);
  jobConf.setJarByClass(FastFileCheck.class);
  jobConf.setInputFormat(FileCheckInputFormat.class);
  jobConf.setOutputFormat(SequenceFileOutputFormat.class);
  jobConf.setOutputKeyClass(Text.class);
  jobConf.setOutputValueClass(Text.class);
  jobConf.setMapperClass(FileCheckMapper.class);
  jobConf.setNumReduceTasks(0);
  jobConf.setBoolean(SOURCE_ONLY_CONF, sourceOnly);
  
  return jobConf;
}

开发者ID:rhli，项目名称:hadoop-EAR，代码行数:17，代码来源:FastFileCheck.java

示例5: getSeqRecords

import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
private List<Writable> getSeqRecords(Path dir, Text key) throws Exception {
  SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(getConf(), dir);
  ArrayList<Writable> res = new ArrayList<Writable>();
  Class keyClass = readers[0].getKeyClass();
  Class valueClass = readers[0].getValueClass();
  if (!keyClass.getName().equals("org.apache.hadoop.io.Text"))
    throw new IOException("Incompatible key (" + keyClass.getName() + ")");
  Writable aKey = (Writable)keyClass.newInstance();
  Writable value = (Writable)valueClass.newInstance();
  for (int i = 0; i < readers.length; i++) {
    while (readers[i].next(aKey, value)) {
      if (aKey.equals(key)) {
        res.add(value);
        value = (Writable)valueClass.newInstance();
      }
    }
    readers[i].close();
  }
  return res;
}

开发者ID:yahoo，项目名称:anthelion，代码行数:21，代码来源:SegmentReader.java

示例6: IDMappingJob

import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
public static void IDMappingJob(String[] args) throws  IOException {

		JobConf job = new JobConf();
		new GenericOptionsParser(job, args);
		job.setJarByClass(HybridDriver.class);
		job.setJobName("Converting binary similarity scores to text");
		job.setMapperClass(IDMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setNumReduceTasks(0);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);

		Path inputPath = new Path(OUTPUT_DIR);
		job.setInputFormat(SequenceFileInputFormat.class);
		SequenceFileInputFormat.setInputPaths(job, inputPath);
		Path outputPath = new Path("SimilarityScores"); 
		job.setOutputFormat(TextOutputFormat.class);
		SequenceFileOutputFormat.setOutputPath(job, outputPath);
		FileSystem.get(job).delete(outputPath, true);
		HashPagesDriver.prepareDistribCache(job, HashPagesDriver.IDS_FILE2); //remove not sure
		JobSubmitter.run(job,"BINARY TO TEXT",job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE)); 
	}

开发者ID:mahaucsb，项目名称:pss，代码行数:24，代码来源:HybridDriver.java

示例7: writeSequence

import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
/**
 * Runs a MR job with maps only to convert input directory of numeric valued
 * records to hadoop sequence format. It assumes a text input of format of
 * [id feature weight ..] to be the format of input.
 */
public static void writeSequence() throws IOException {

	JobConf job = new JobConf();
	job.setJobName("Convert text vectors to hadoop seqeunce ");
	job.setJarByClass(SeqWriter.class);

	job.setMapperClass(SeqMapper.class);
	job.setNumReduceTasks(0);
	job.setMapOutputKeyClass(LongWritable.class);
	job.setMapOutputValueClass(FeatureWeightArrayWritable.class);
	job.setOutputKeyClass(LongWritable.class);
	job.setOutputValueClass(FeatureWeightArrayWritable.class);

	job.setInputFormat(TextInputFormat.class);
	TextInputFormat.addInputPath(job, new Path(INPUT_DIR));
	FileSystem.get(job).delete(new Path(HashPagesDriver.IDS_FILE2), true);
	Path outputPath = new Path(OUTPUT_DIR);
	FileSystem.get(job).delete(outputPath, true);
	job.setOutputFormat(SequenceFileOutputFormat.class);
	SequenceFileOutputFormat.setOutputPath(job, outputPath);

	JobSubmitter.run(job,"PREPROCESS",-1);
}

开发者ID:mahaucsb，项目名称:pss，代码行数:29，代码来源:SeqWriter.java

示例8: main

import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
	if (args.length != 2) {
     throw new Exception("Usage BasicSaveSequenceFile [sparkMaster] [output]");
	}
   String master = args[0];
   String fileName = args[1];

	JavaSparkContext sc = new JavaSparkContext(
     master, "basicloadsequencefile", System.getenv("SPARK_HOME"), System.getenv("JARS"));
   List<Tuple2<String, Integer>> input = new ArrayList();
   input.add(new Tuple2("coffee", 1));
   input.add(new Tuple2("coffee", 2));
   input.add(new Tuple2("pandas", 3));
   JavaPairRDD<String, Integer> rdd = sc.parallelizePairs(input);
   JavaPairRDD<Text, IntWritable> result = rdd.mapToPair(new ConvertToWritableTypes());
   result.saveAsHadoopFile(fileName, Text.class, IntWritable.class, SequenceFileOutputFormat.class);
}

开发者ID:holdenk，项目名称:learning-spark-examples，代码行数:18，代码来源:BasicSaveSequenceFile.java

示例9: produceSamples

import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
public long produceSamples(Path samplePath) throws Exception {
      Path input = new Path(samplePath.toString()+"-seeds");
this.numSamples = writeSeeds(input);
      LOG.info("Generating "+this.numSamples+" of samples");

JobConf jobConf = getJobConf();
jobConf.set("genkmeansdataset.dimensions",Integer.toString(dimension));

FileInputFormat.setInputPaths(jobConf, input);
		FileOutputFormat.setOutputPath(jobConf, samplePath);

		jobConf.setMapperClass(MapClass.class);
		
		jobConf.setInputFormat(SequenceFileInputFormat.class);
		jobConf.setOutputFormat(SequenceFileOutputFormat.class);
		jobConf.setOutputKeyClass(LongWritable.class);
		jobConf.setOutputValueClass(VectorWritable.class);		
jobConf.setNumReduceTasks(0);
JobClient.runJob(jobConf);		

      return this.numSamples;
  }

开发者ID:yanghaogn，项目名称:HiBench-CDH5，代码行数:23，代码来源:GenKMeansDataset.java

示例10: createJobConf

import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
/**
 * Create a job configuration
 */
@SuppressWarnings("rawtypes")
public static JobConf createJobConf(String name, String topic, Props props, Class classobj) 
throws Exception {
    JobConf conf = getJobConf(name, props, classobj);
    
    conf.set("topic", topic);
    
    // input format
    conf.setInputFormat(KafkaETLInputFormat.class);

    //turn off mapper speculative execution
    conf.setMapSpeculativeExecution(false);
    
    // setup multiple outputs
    MultipleOutputs.addMultiNamedOutput(conf, "offsets", SequenceFileOutputFormat.class, 
                KafkaETLKey.class, BytesWritable.class);


    return conf;
}

开发者ID:yanfang724，项目名称:hadoop-consumer，代码行数:24，代码来源:KafkaETLJob.java

示例11: runCreateJob

import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
int runCreateJob(String inputPathString, String outputPathString, String jobName) throws IOException {
/* 134 */     JobConf jobConf = new JobConf(this.conf);
/* 135 */     jobConf.setJobName(jobName);
/* 136 */     jobConf.setMapSpeculativeExecution(false);
/*     */ 
/* 138 */     FileInputFormat.addInputPath(jobConf, new Path(inputPathString));
/* 139 */     FileOutputFormat.setOutputPath(jobConf, new Path(outputPathString));
/*     */ 
/* 141 */     jobConf.setInputFormat(SequenceFileInputFormat.class);
/* 142 */     jobConf.setOutputKeyClass(LongWritable.class);
/* 143 */     jobConf.setOutputValueClass(CreateFileInfo.class);
/* 144 */     jobConf.setMapperClass(CreateFileMapper.class);
/* 145 */     jobConf.setReducerClass(IdentityReducer.class);
/* 146 */     jobConf.setOutputFormat(SequenceFileOutputFormat.class);
/*     */ 
/* 148 */     RunningJob result = JobClient.runJob(jobConf);
/* 149 */     return result.isSuccessful() ? 0 : -1;
/*     */   }

开发者ID:libin，项目名称:s3distcp，代码行数:19，代码来源:CreateSampleData.java

示例12: getBaseRecordWriter

import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
@Override
protected RecordWriter<K, V> getBaseRecordWriter(FileSystem fs,
                                                 JobConf job,
                                                 String name,
                                                 Progressable arg3) 
throws IOException {
  if (theSequenceFileOutputFormat == null) {
    theSequenceFileOutputFormat = new SequenceFileOutputFormat<K,V>();
  }
  return theSequenceFileOutputFormat.getRecordWriter(fs, job, name, arg3);
}

开发者ID:naver，项目名称:hadoop，代码行数:12，代码来源:MultipleSequenceFileOutputFormat.java

示例13: createBayesData

import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
private void createBayesData() throws IOException, URISyntaxException {
	
	log.info("creating bayes text data ... ");

	JobConf job = new JobConf();

	Path fout = options.getResultPath();
	Utils.checkHdfsPath(fout);
	
	String jobname = "Create bayes data";
	job.setJobName(jobname);

	Utils.shareDict(options, job);
	
	setBayesOptions(job);
	
	FileInputFormat.setInputPaths(job, dummy.getPath());
	job.setInputFormat(NLineInputFormat.class);

	job.setJarByClass(CreateBayesPages.class);
	job.setMapperClass(CreateBayesPages.class);
	job.setNumReduceTasks(0);
	
	FileOutputFormat.setOutputPath(job, fout);
	job.setOutputFormat(SequenceFileOutputFormat.class);
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(Text.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(Text.class);

	log.info("Running Job: " +jobname);
	log.info("Pages file " + dummy.getPath() + " as input");
	log.info("Rankings file " + fout + " as output");
	JobClient.runJob(job);
	log.info("Finished Running Job: " + jobname);
}

开发者ID:thrill，项目名称:fst-bench，代码行数:37，代码来源:BayesData.java

示例14: runInverter

import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
/**
 * Runs the inverter job. The inverter job flips outlinks to inlinks to be
 * passed into the analysis job.
 * 
 * @param nodeDb
 *          The node database to use.
 * @param outlinkDb
 *          The outlink database to use.
 * @param output
 *          The output directory.
 * 
 * @throws IOException
 *           If an error occurs while running the inverter job.
 */
private void runInverter(Path nodeDb, Path outlinkDb, Path output)
    throws IOException {

  // configure the inverter
  JobConf inverter = new NutchJob(getConf());
  inverter.setJobName("LinkAnalysis Inverter");
  FileInputFormat.addInputPath(inverter, nodeDb);
  FileInputFormat.addInputPath(inverter, outlinkDb);
  FileOutputFormat.setOutputPath(inverter, output);
  inverter.setInputFormat(SequenceFileInputFormat.class);
  inverter.setMapperClass(Inverter.class);
  inverter.setReducerClass(Inverter.class);
  inverter.setMapOutputKeyClass(Text.class);
  inverter.setMapOutputValueClass(ObjectWritable.class);
  inverter.setOutputKeyClass(Text.class);
  inverter.setOutputValueClass(LinkDatum.class);
  inverter.setOutputFormat(SequenceFileOutputFormat.class);
  inverter.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs",
      false);

  // run the inverter job
  LOG.info("Starting inverter job");
  try {
    JobClient.runJob(inverter);
  } catch (IOException e) {
    LOG.error(StringUtils.stringifyException(e));
    throw e;
  }
  LOG.info("Finished inverter job.");
}

开发者ID:jorcox，项目名称:GeoCrawler，代码行数:45，代码来源:LinkRank.java

示例15: task0

import org.apache.hadoop.mapred.SequenceFileOutputFormat; //导入依赖的package包/类
/**
 * Extracts redirects and the target for each.
 *
 * @param inputPath
 * @param outputPath
 * @throws IOException
 */
private void task0(String inputPath, String outputPath) throws IOException {
	LOG.info("Extracting redirects (phase 0)...");
	LOG.info(" - input: " + inputPath);
	LOG.info(" - output: " + outputPath);

	JobConf conf = new JobConf(getConf(), ExtractWikipediaAnchorText.class);
	conf.setJobName(String.format("ExtractWikipediaAnchorText:phase0[input: %s, output: %s]", inputPath, outputPath));

	conf.setNumReduceTasks(1);

	FileInputFormat.addInputPath(conf, new Path(inputPath));
	FileOutputFormat.setOutputPath(conf, new Path(outputPath));

	conf.setInputFormat(SequenceFileInputFormat.class);
	conf.setOutputFormat(SequenceFileOutputFormat.class);

	conf.setMapOutputKeyClass(Text.class);
	conf.setMapOutputValueClass(Text.class);

	conf.setOutputKeyClass(Text.class);
	conf.setOutputValueClass(Text.class);

	conf.setMapperClass(MyMapper0.class);
	conf.setReducerClass(IdentityReducer.class);

	JobClient.runJob(conf);
}

开发者ID:yahoo，项目名称:FEL，代码行数:35，代码来源:ExtractWikipediaAnchorText.java

注：本文中的org.apache.hadoop.mapred.SequenceFileOutputFormat类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。