当前位置: 首页>>代码示例>>Java>>正文


Java Pipeline.done方法代码示例

本文整理汇总了Java中org.apache.crunch.Pipeline.done方法的典型用法代码示例。如果您正苦于以下问题:Java Pipeline.done方法的具体用法?Java Pipeline.done怎么用?Java Pipeline.done使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.crunch.Pipeline的用法示例。


在下文中一共展示了Pipeline.done方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: run

import org.apache.crunch.Pipeline; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {

    String fooInputPath = args[0];
    String barInputPath = args[1];
    String outputPath = args[2];
    int fooValMax = Integer.parseInt(args[3]);
    int joinValMax = Integer.parseInt(args[4]);
    int numberOfReducers = Integer.parseInt(args[5]);

    Pipeline pipeline = new MRPipeline(JoinFilterExampleCrunch.class, getConf()); //<1>
    
    PCollection<String> fooLines = pipeline.readTextFile(fooInputPath);  //<2>
    PCollection<String> barLines = pipeline.readTextFile(barInputPath);

    PTable<Long, Pair<Long, Integer>> fooTable = fooLines.parallelDo(  //<3>
        new FooIndicatorFn(),
        Avros.tableOf(Avros.longs(),
        Avros.pairs(Avros.longs(), Avros.ints())));

    fooTable = fooTable.filter(new FooFilter(fooValMax));  //<4>

    PTable<Long, Integer> barTable = barLines.parallelDo(new BarIndicatorFn(),
        Avros.tableOf(Avros.longs(), Avros.ints()));

    DefaultJoinStrategy<Long, Pair<Long, Integer>, Integer> joinStrategy =   //<5>
        new DefaultJoinStrategy
          <Long, Pair<Long, Integer>, Integer>
          (numberOfReducers);

    PTable<Long, Pair<Pair<Long, Integer>, Integer>> joinedTable = joinStrategy //<6>
        .join(fooTable, barTable, JoinType.INNER_JOIN);

    PTable<Long, Pair<Pair<Long, Integer>, Integer>> filteredTable = joinedTable.filter(new JoinFilter(joinValMax));

    filteredTable.write(At.textFile(outputPath), WriteMode.OVERWRITE); //<7>

    PipelineResult result = pipeline.done();

    return result.succeeded() ? 0 : 1;
  }
 
开发者ID:amitchmca,项目名称:hadooparchitecturebook,代码行数:41,代码来源:JoinFilterExampleCrunch.java

示例2: run

import org.apache.crunch.Pipeline; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
  JCommander jc = new JCommander(this);
  try {
    jc.parse(args);
  } catch (ParameterException e) {
    jc.usage();
    return 1;
  }

  if (paths == null || paths.size() != 2) {
    jc.usage();
    return 1;
  }

  String inputPathString = paths.get(0);
  String outputPathString = paths.get(1);

  Configuration conf = getConf();
  Path inputPath = new Path(inputPathString);
  Path outputPath = new Path(outputPathString);
  outputPath = outputPath.getFileSystem(conf).makeQualified(outputPath);

  Pipeline pipeline = new MRPipeline(getClass(), conf);

  VariantsLoader variantsLoader;
  if (dataModel.equals("GA4GH")) {
    variantsLoader = new GA4GHVariantsLoader();
  } else if (dataModel.equals("ADAM")) {
    variantsLoader = new ADAMVariantsLoader();
  } else {
    jc.usage();
    return 1;
  }

  Set<String> sampleSet = samples == null ? null :
      Sets.newLinkedHashSet(Splitter.on(',').split(samples));

  PTable<String, SpecificRecord> partitionKeyedRecords =
      variantsLoader.loadPartitionedVariants(inputFormat, inputPath, conf, pipeline,
          variantsOnly, flatten, sampleGroup, sampleSet, redistribute, segmentSize,
          numReducers);

  if (FileUtils.sampleGroupExists(outputPath, conf, sampleGroup)) {
    if (overwrite) {
      FileUtils.deleteSampleGroup(outputPath, conf, sampleGroup);
    } else {
      LOG.error("Sample group already exists: " + sampleGroup);
      return 1;
    }
  }

  pipeline.write(partitionKeyedRecords, new AvroParquetPathPerKeyTarget(outputPath),
      Target.WriteMode.APPEND);

  PipelineResult result = pipeline.done();
  return result.succeeded() ? 0 : 1;
}
 
开发者ID:cloudera,项目名称:quince,代码行数:59,代码来源:LoadVariantsTool.java

示例3: run

import org.apache.crunch.Pipeline; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {

  new JCommander(this, args);

  URI outputUri = URI.create(output);

  // Our crunch job is a MapReduce job
  Pipeline pipeline = new MRPipeline(LegacyHdfs2Cass.class, getConf());

  // Parse & fetch info about target Cassandra cluster
  CassandraParams params = CassandraParams.parse(outputUri);

  // Read records from Avro files in inputFolder
  PCollection<ByteBuffer> records =
      pipeline.read(From.avroFile(inputList(input), Avros.records(ByteBuffer.class)));

  // Transform the input
  String protocol = outputUri.getScheme();
  if (protocol.equalsIgnoreCase("thrift")) {
    records
        // First convert ByteBuffers to ThriftRecords
        .parallelDo(new LegacyHdfsToThrift(), ThriftRecord.PTYPE)
        // Then group the ThriftRecords in preparation for writing them
        .parallelDo(new ThriftRecord.AsPair(), ThriftRecord.AsPair.PTYPE)
        .groupByKey(params.createGroupingOptions())
        // Finally write the ThriftRecords to Cassandra
        .write(new ThriftTarget(outputUri, params));
  }
  else if (protocol.equalsIgnoreCase("cql")) {
    records
        // In case of CQL, convert ByteBuffers to CQLRecords
        .parallelDo(new LegacyHdfsToCQL(), CQLRecord.PTYPE)
        .by(params.getKeyFn(), Avros.bytes())
        .groupByKey(params.createGroupingOptions())
        .write(new CQLTarget(outputUri, params));
  }

  // Execute the pipeline
  PipelineResult result = pipeline.done();
  return result.succeeded() ? 0 : 1;
}
 
开发者ID:spotify,项目名称:hdfs2cass,代码行数:43,代码来源:LegacyHdfs2Cass.java

示例4: run

import org.apache.crunch.Pipeline; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {

  new JCommander(this, args);

  URI outputUri = URI.create(output);

  // Our crunch job is a MapReduce job
  Configuration conf = getConf();
  conf.setBoolean(MRJobConfig.MAP_SPECULATIVE, Boolean.FALSE);
  conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, Boolean.FALSE);
  Pipeline pipeline = new MRPipeline(Hdfs2Cass.class, conf);

  // Parse & fetch info about target Cassandra cluster
  CassandraParams params = CassandraParams.parse(outputUri);

  PCollection<GenericRecord> records =
      ((PCollection<GenericRecord>)(PCollection) pipeline.read(From.avroFile(inputList(input))));

  String protocol = outputUri.getScheme();
  if (protocol.equalsIgnoreCase("thrift")) {
    records
        // First convert ByteBuffers to ThriftRecords
        .parallelDo(new AvroToThrift(rowkey, timestamp, ttl, ignore), ThriftRecord.PTYPE)
        // Then group the ThriftRecords in preparation for writing them
        .parallelDo(new ThriftRecord.AsPair(), ThriftRecord.AsPair.PTYPE)
        .groupByKey(params.createGroupingOptions())
         // Finally write the ThriftRecords to Cassandra
        .write(new ThriftTarget(outputUri, params));
  }
  else if (protocol.equalsIgnoreCase("cql")) {
    records
        // In case of CQL, convert ByteBuffers to CQLRecords
        .parallelDo(new AvroToCQL(rowkey, timestamp, ttl, ignore), CQLRecord.PTYPE)
        .by(params.getKeyFn(), Avros.bytes())
        .groupByKey(params.createGroupingOptions())
        .write(new CQLTarget(outputUri, params));
  }

  // Execute the pipeline
  PipelineResult result = pipeline.done();
  return result.succeeded() ? 0 : 1;
}
 
开发者ID:spotify,项目名称:hdfs2cass,代码行数:44,代码来源:Hdfs2Cass.java


注:本文中的org.apache.crunch.Pipeline.done方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。