当前位置: 首页>>代码示例>>Java>>正文


Java SequenceFileOutputFormat类代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat的典型用法代码示例。如果您正苦于以下问题:Java SequenceFileOutputFormat类的具体用法?Java SequenceFileOutputFormat怎么用?Java SequenceFileOutputFormat使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


SequenceFileOutputFormat类属于org.apache.hadoop.mapreduce.lib.output包,在下文中一共展示了SequenceFileOutputFormat类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: run

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
public static void run(Configuration conf, Path inputPath, Path output, double params) throws IOException, ClassNotFoundException, InterruptedException {
    String jobName = "calculating parameter";
    conf.set("params",String.valueOf(params));

    Job job = new Job(conf, jobName);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(indexToCountWritable.class);
    job.setOutputKeyClass(twoDimensionIndexWritable.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(CalParamsMapper.class);
    job.setReducerClass(CalParamsReducer.class);

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job,output);

    job.setJarByClass(LDADriver.class);
    if (!job.waitForCompletion(true)) {
        throw new InterruptedException("calculating parameter failed");
    }
}
 
开发者ID:huyang1,项目名称:LDA,代码行数:25,代码来源:CalParamDriver.java

示例2: runRandomInputGenerator

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
public int runRandomInputGenerator(int numMappers, long numNodes, Path tmpOutput,
    Integer width, Integer wrapMuplitplier) throws Exception {
  LOG.info("Running RandomInputGenerator with numMappers=" + numMappers
      + ", numNodes=" + numNodes);
  Job job = Job.getInstance(getConf());

  job.setJobName("Random Input Generator");
  job.setNumReduceTasks(0);
  job.setJarByClass(getClass());

  job.setInputFormatClass(GeneratorInputFormat.class);
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(NullWritable.class);

  setJobConf(job, numMappers, numNodes, width, wrapMuplitplier);

  job.setMapperClass(Mapper.class); //identity mapper

  FileOutputFormat.setOutputPath(job, tmpOutput);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);

  boolean success = jobCompletion(job);

  return success ? 0 : 1;
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:26,代码来源:IntegrationTestBigLinkedList.java

示例3: getOutputFormatClass

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
@Override
protected Class<? extends OutputFormat> getOutputFormatClass()
    throws ClassNotFoundException {
  if (isHCatJob) {
    LOG.debug("Returning HCatOutputFormat for output format");
    return SqoopHCatUtilities.getOutputFormatClass();
  }
  if (options.getFileLayout() == SqoopOptions.FileLayout.TextFile) {
    return RawKeyTextOutputFormat.class;
  } else if (options.getFileLayout()
      == SqoopOptions.FileLayout.SequenceFile) {
    return SequenceFileOutputFormat.class;
  } else if (options.getFileLayout()
      == SqoopOptions.FileLayout.AvroDataFile) {
    return AvroOutputFormat.class;
  } else if (options.getFileLayout()
      == SqoopOptions.FileLayout.ParquetFile) {
    return DatasetKeyOutputFormat.class;
  }

  return null;
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:23,代码来源:DataDrivenImportJob.java

示例4: joinAs

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
private static void joinAs(String jointype, 
    Class<? extends SimpleCheckerMapBase<?>> map, 
    Class<? extends SimpleCheckerReduceBase> reduce) throws Exception {
  final int srcs = 4;
  Configuration conf = new Configuration();
  Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
  Path[] src = writeSimpleSrc(base, conf, srcs);
  conf.set(CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose(jointype,
      SequenceFileInputFormat.class, src));
  conf.setInt("testdatamerge.sources", srcs);
  Job job = Job.getInstance(conf);
  job.setInputFormatClass(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(map);
  job.setReducerClass(reduce);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  job.waitForCompletion(true);
  assertTrue("Job failed", job.isSuccessful());
  if ("outer".equals(jointype)) {
    checkOuterConsistency(job, src);
  }
  base.getFileSystem(conf).delete(base, true);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:27,代码来源:TestJoinDatamerge.java

示例5: createJob

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
public Job createJob(Configuration conf) throws IOException {
  long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 10 * 1024);
  long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numBytesToWritePerMap);
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);

  Job job = Job.getInstance(conf);

  job.setJarByClass(RandomTextWriterJob.class);
  job.setJobName("random-text-writer");

  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);

  job.setInputFormatClass(RandomInputFormat.class);
  job.setMapperClass(RandomTextMapper.class);

  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  //FileOutputFormat.setOutputPath(job, new Path("random-output"));
  job.setNumReduceTasks(0);
  return job;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:27,代码来源:RandomTextWriterJob.java

示例6: runJob

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
public static void runJob(Configuration conf, Path inputPath, Path output) throws IOException, ClassNotFoundException, InterruptedException {

        Job job = new Job(conf, "Input Drive running input:"+inputPath);
        log.info("start running InputDriver");
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(indexToWordWritable.class);
        job.setOutputKeyClass(twoDimensionIndexWritable.class);
        job.setOutputValueClass(Text.class);

        job.setMapperClass(InputMapper.class);
        job.setReducerClass(InputReducer.class);
        job.setNumReduceTasks(1);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setJarByClass(InputDriver.class);

        FileInputFormat.addInputPath(job, inputPath);
        FileOutputFormat.setOutputPath(job, output);

        boolean succeeded = job.waitForCompletion(true);
        if (!succeeded) {
            throw new IllegalStateException("Job failed!");
        }

    }
 
开发者ID:huyang1,项目名称:LDA,代码行数:25,代码来源:InputDriver.java

示例7: call

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
@Override
public void call(JavaPairRDD<K,M> rdd, Time time) throws IOException {
  if (rdd.isEmpty()) {
    log.info("RDD was empty, not saving to HDFS");
  } else {
    String file = prefix + "-" + time.milliseconds() + "." + suffix;
    Path path = new Path(file);
    FileSystem fs = FileSystem.get(path.toUri(), hadoopConf);
    if (fs.exists(path)) {
      log.warn("Saved data already existed, possibly from a failed job. Deleting {}", path);
      fs.delete(path, true);
    }
    log.info("Saving RDD to HDFS at {}", file);
    rdd.mapToPair(
        new ValueToWritableFunction<>(keyClass, messageClass, keyWritableClass, messageWritableClass)
    ).saveAsNewAPIHadoopFile(
        file,
        keyWritableClass,
        messageWritableClass,
        SequenceFileOutputFormat.class,
        hadoopConf);
  }
}
 
开发者ID:oncewang,项目名称:oryx2,代码行数:24,代码来源:SaveToHDFSFunction.java

示例8: createSubmittableJob

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
/**
 * Sets up the actual job.
 *
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
throws IOException {
  String tableName = args[0];
  Path outputDir = new Path(args[1]);
  Job job = new Job(conf, NAME + "_" + tableName);
  job.setJobName(NAME + "_" + tableName);
  job.setJarByClass(Export.class);
  // Set optional scan parameters
  Scan s = getConfiguredScanForJob(conf, args);
  IdentityTableMapper.initJob(tableName, s, IdentityTableMapper.class, job);
  // No reducers.  Just write straight to output files.
  job.setNumReduceTasks(0);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setOutputKeyClass(ImmutableBytesWritable.class);
  job.setOutputValueClass(Result.class);
  FileOutputFormat.setOutputPath(job, outputDir); // job conf doesn't contain the conf so doesn't have a default fs.
  return job;
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:27,代码来源:Export.java

示例9: Run

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
public static void Run(String output, String outputFormat, int reducerNum, Configuration conf) 
            throws IOException, ClassNotFoundException, InterruptedException {
        Job job = Job.getInstance(conf);
//        job.setJobName(Es2Json.class.getName());
        job.setJarByClass(Es2Json.class);
        
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        
        job.setMapperClass(MapTask.class);
        job.setReducerClass(ReduceTask.class);
        job.setInputFormatClass(EsInputFormat.class);
        
        if (outputFormat.equals("sequencefile")) {
            job.setOutputFormatClass(SequenceFileOutputFormat.class);
        }
        
        job.setNumReduceTasks(reducerNum);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        
        FileOutputFormat.setOutputPath(job, new Path(output));
        
        job.setSpeculativeExecution(false);
        job.waitForCompletion(true);
    }
 
开发者ID:chaopengio,项目名称:elasticsearch-mapreduce,代码行数:27,代码来源:Es2Json.java

示例10: writeMemoryRDD

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
@Override
public <K, V> Iterator<KeyValue<K, V>> writeMemoryRDD(final Configuration configuration, final String memoryKey, JavaPairRDD<K, V> memoryRDD) {
    final org.apache.hadoop.conf.Configuration hadoopConfiguration = ConfUtil.makeHadoopConfiguration(configuration);
    final String outputLocation = hadoopConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION);
    if (null != outputLocation) {
        // map back to a Hadoop stream for output
        memoryRDD.mapToPair(keyValue -> new Tuple2<>(new ObjectWritable<>(keyValue._1()), new ObjectWritable<>(keyValue._2())))
                .saveAsNewAPIHadoopFile(Constants.getMemoryLocation(outputLocation, memoryKey),
                        ObjectWritable.class,
                        ObjectWritable.class,
                        SequenceFileOutputFormat.class, hadoopConfiguration);
        try {
            return (Iterator) new ObjectWritableIterator(hadoopConfiguration, new Path(Constants.getMemoryLocation(outputLocation, memoryKey)));
        } catch (final IOException e) {
            throw new IllegalStateException(e.getMessage(), e);
        }
    }
    return Collections.emptyIterator();
}
 
开发者ID:PKUSilvester,项目名称:LiteGraph,代码行数:20,代码来源:OutputFormatRDD.java

示例11: bigItemCount

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
private boolean bigItemCount(String output) throws IOException, ClassNotFoundException, InterruptedException {
	Job job = Job.getInstance(this.getConf(), "Counting items from " + this.input);
	job.setJarByClass(TopPIoverHadoop.class);

	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(SequenceFileOutputFormat.class);
	job.setOutputKeyClass(IntWritable.class);
	job.setOutputValueClass(IntWritable.class);

	FileInputFormat.addInputPath(job, new Path(this.input));
	FileOutputFormat.setOutputPath(job, new Path(output));

	job.setMapperClass(ItemBigCountingMapper.class);
	job.setReducerClass(ItemBigCountingReducer.class);

	boolean success = job.waitForCompletion(true);

	if (success) {
		Counter rebasingMaxID = job.getCounters().findCounter(TaskCounter.REDUCE_OUTPUT_RECORDS);
		this.getConf().setInt(KEY_REBASING_MAX_ID, (int) rebasingMaxID.getValue());
	}

	return success;
}
 
开发者ID:slide-lig,项目名称:TopPI,代码行数:25,代码来源:TopPIoverHadoop.java

示例12: genBigItemMap

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
private boolean genBigItemMap(String input, String output) throws IOException, ClassNotFoundException,
		InterruptedException {
	Job job = Job.getInstance(this.getConf(), "Computing items remapping for " + this.input);
	job.setJarByClass(TopPIoverHadoop.class);

	job.setInputFormatClass(SequenceFileInputFormat.class);
	job.setOutputFormatClass(SequenceFileOutputFormat.class);
	job.setOutputKeyClass(IntWritable.class);
	job.setOutputValueClass(IntWritable.class);

	FileInputFormat.addInputPath(job, new Path(input));
	FileOutputFormat.setOutputPath(job, new Path(output));

	job.setMapperClass(InverseMapper.class);
	job.setReducerClass(ItemBigRebasingReducer.class);
	job.setNumReduceTasks(1);

	return job.waitForCompletion(true);
}
 
开发者ID:slide-lig,项目名称:TopPI,代码行数:20,代码来源:TopPIoverHadoop.java

示例13: filterInput

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
private boolean filterInput(String output, String rebasingMapPath) throws IOException, ClassNotFoundException,
		InterruptedException {
	Job job = Job.getInstance(this.getConf(), "Computing items remapping for " + this.input);
	job.setJarByClass(TopPIoverHadoop.class);

	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(SequenceFileOutputFormat.class);
	job.setOutputKeyClass(NullWritable.class);
	job.setOutputValueClass(ConcatenatedTransactionsWritable.class);
	DistCache.copyToCache(job, rebasingMapPath);
	FileInputFormat.addInputPath(job, new Path(input));
	FileOutputFormat.setOutputPath(job, new Path(output));

	job.setMapperClass(FilteringMapper.class);
	job.setNumReduceTasks(0);

	return job.waitForCompletion(true);
}
 
开发者ID:slide-lig,项目名称:TopPI,代码行数:19,代码来源:TopPIoverHadoop.java

示例14: handleRelations

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
private static JavaRDD<Tuple3<String, String, Float>> handleRelations(JavaRDD<DocumentToSoftwareUrlWithMeta> documentToSoftwareUrl, String actionSetId, 
        Configuration jobConfig, String outputAvroPath) {
    JavaRDD<Tuple3<String, String, Float>> distinctRelationTriples = documentToSoftwareUrl
            .map(e -> new Tuple3<>(e.getDocumentId().toString(), generateSoftwareEntityId(pickUrl(e)), e.getConfidenceLevel()))
            .distinct();
    
    JavaPairRDD<String, Tuple3<String, String, Float>> relationTriplesByIdPair = distinctRelationTriples
            .mapToPair(e -> new Tuple2<String, Tuple3<String, String, Float>>(
                    joinDocumentAndSoftwareIds(e._1(), e._2()), e));
    
    JavaRDD<Tuple3<String, String, Float>> dedupedRelationTriples = relationTriplesByIdPair
            .reduceByKey((x, y) -> pickBestConfidence(x, y)).values();
    // to be used by both entity exporter and reporter consumers
    dedupedRelationTriples.cache();
    
    JavaPairRDD<Text, Text> relationResult = dedupedRelationTriples.flatMapToPair(x -> (Iterable<Tuple2<Text, Text>>) 
            buildRelationActions(x._1(), x._2(), x._3(), actionSetId).stream()
            .map(action -> new Tuple2<Text, Text>(new Text(action.getRowKey()),
                    new Text(action.toString())))::iterator);
    relationResult.coalesce(numberOfOutputFiles).saveAsNewAPIHadoopFile(outputAvroPath, Text.class, Text.class, SequenceFileOutputFormat.class, jobConfig);
    
    return dedupedRelationTriples;
}
 
开发者ID:openaire,项目名称:iis,代码行数:24,代码来源:SoftwareExporterJob.java

示例15: runRandomInputGenerator

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; //导入依赖的package包/类
public int runRandomInputGenerator(int numMappers, long numNodes, Path tmpOutput,
    Integer width, Integer wrapMuplitplier) throws Exception {
  LOG.info("Running RandomInputGenerator with numMappers=" + numMappers
      + ", numNodes=" + numNodes);
  Job job = new Job(getConf());

  job.setJobName("Random Input Generator");
  job.setNumReduceTasks(0);
  job.setJarByClass(getClass());

  job.setInputFormatClass(GeneratorInputFormat.class);
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(NullWritable.class);

  setJobConf(job, numMappers, numNodes, width, wrapMuplitplier);

  job.setMapperClass(Mapper.class); //identity mapper

  FileOutputFormat.setOutputPath(job, tmpOutput);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);

  boolean success = job.waitForCompletion(true);

  return success ? 0 : 1;
}
 
开发者ID:fengchen8086,项目名称:LCIndex-HBase-0.94.16,代码行数:26,代码来源:IntegrationTestBigLinkedList.java


注:本文中的org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。