本文整理汇总了Java中org.apache.crunch.Pipeline.done方法的典型用法代码示例。如果您正苦于以下问题:Java Pipeline.done方法的具体用法?Java Pipeline.done怎么用?Java Pipeline.done使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.crunch.Pipeline
的用法示例。
在下文中一共展示了Pipeline.done方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import org.apache.crunch.Pipeline; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
String fooInputPath = args[0];
String barInputPath = args[1];
String outputPath = args[2];
int fooValMax = Integer.parseInt(args[3]);
int joinValMax = Integer.parseInt(args[4]);
int numberOfReducers = Integer.parseInt(args[5]);
Pipeline pipeline = new MRPipeline(JoinFilterExampleCrunch.class, getConf()); //<1>
PCollection<String> fooLines = pipeline.readTextFile(fooInputPath); //<2>
PCollection<String> barLines = pipeline.readTextFile(barInputPath);
PTable<Long, Pair<Long, Integer>> fooTable = fooLines.parallelDo( //<3>
new FooIndicatorFn(),
Avros.tableOf(Avros.longs(),
Avros.pairs(Avros.longs(), Avros.ints())));
fooTable = fooTable.filter(new FooFilter(fooValMax)); //<4>
PTable<Long, Integer> barTable = barLines.parallelDo(new BarIndicatorFn(),
Avros.tableOf(Avros.longs(), Avros.ints()));
DefaultJoinStrategy<Long, Pair<Long, Integer>, Integer> joinStrategy = //<5>
new DefaultJoinStrategy
<Long, Pair<Long, Integer>, Integer>
(numberOfReducers);
PTable<Long, Pair<Pair<Long, Integer>, Integer>> joinedTable = joinStrategy //<6>
.join(fooTable, barTable, JoinType.INNER_JOIN);
PTable<Long, Pair<Pair<Long, Integer>, Integer>> filteredTable = joinedTable.filter(new JoinFilter(joinValMax));
filteredTable.write(At.textFile(outputPath), WriteMode.OVERWRITE); //<7>
PipelineResult result = pipeline.done();
return result.succeeded() ? 0 : 1;
}
示例2: run
import org.apache.crunch.Pipeline; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
JCommander jc = new JCommander(this);
try {
jc.parse(args);
} catch (ParameterException e) {
jc.usage();
return 1;
}
if (paths == null || paths.size() != 2) {
jc.usage();
return 1;
}
String inputPathString = paths.get(0);
String outputPathString = paths.get(1);
Configuration conf = getConf();
Path inputPath = new Path(inputPathString);
Path outputPath = new Path(outputPathString);
outputPath = outputPath.getFileSystem(conf).makeQualified(outputPath);
Pipeline pipeline = new MRPipeline(getClass(), conf);
VariantsLoader variantsLoader;
if (dataModel.equals("GA4GH")) {
variantsLoader = new GA4GHVariantsLoader();
} else if (dataModel.equals("ADAM")) {
variantsLoader = new ADAMVariantsLoader();
} else {
jc.usage();
return 1;
}
Set<String> sampleSet = samples == null ? null :
Sets.newLinkedHashSet(Splitter.on(',').split(samples));
PTable<String, SpecificRecord> partitionKeyedRecords =
variantsLoader.loadPartitionedVariants(inputFormat, inputPath, conf, pipeline,
variantsOnly, flatten, sampleGroup, sampleSet, redistribute, segmentSize,
numReducers);
if (FileUtils.sampleGroupExists(outputPath, conf, sampleGroup)) {
if (overwrite) {
FileUtils.deleteSampleGroup(outputPath, conf, sampleGroup);
} else {
LOG.error("Sample group already exists: " + sampleGroup);
return 1;
}
}
pipeline.write(partitionKeyedRecords, new AvroParquetPathPerKeyTarget(outputPath),
Target.WriteMode.APPEND);
PipelineResult result = pipeline.done();
return result.succeeded() ? 0 : 1;
}
示例3: run
import org.apache.crunch.Pipeline; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
new JCommander(this, args);
URI outputUri = URI.create(output);
// Our crunch job is a MapReduce job
Pipeline pipeline = new MRPipeline(LegacyHdfs2Cass.class, getConf());
// Parse & fetch info about target Cassandra cluster
CassandraParams params = CassandraParams.parse(outputUri);
// Read records from Avro files in inputFolder
PCollection<ByteBuffer> records =
pipeline.read(From.avroFile(inputList(input), Avros.records(ByteBuffer.class)));
// Transform the input
String protocol = outputUri.getScheme();
if (protocol.equalsIgnoreCase("thrift")) {
records
// First convert ByteBuffers to ThriftRecords
.parallelDo(new LegacyHdfsToThrift(), ThriftRecord.PTYPE)
// Then group the ThriftRecords in preparation for writing them
.parallelDo(new ThriftRecord.AsPair(), ThriftRecord.AsPair.PTYPE)
.groupByKey(params.createGroupingOptions())
// Finally write the ThriftRecords to Cassandra
.write(new ThriftTarget(outputUri, params));
}
else if (protocol.equalsIgnoreCase("cql")) {
records
// In case of CQL, convert ByteBuffers to CQLRecords
.parallelDo(new LegacyHdfsToCQL(), CQLRecord.PTYPE)
.by(params.getKeyFn(), Avros.bytes())
.groupByKey(params.createGroupingOptions())
.write(new CQLTarget(outputUri, params));
}
// Execute the pipeline
PipelineResult result = pipeline.done();
return result.succeeded() ? 0 : 1;
}
示例4: run
import org.apache.crunch.Pipeline; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
new JCommander(this, args);
URI outputUri = URI.create(output);
// Our crunch job is a MapReduce job
Configuration conf = getConf();
conf.setBoolean(MRJobConfig.MAP_SPECULATIVE, Boolean.FALSE);
conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, Boolean.FALSE);
Pipeline pipeline = new MRPipeline(Hdfs2Cass.class, conf);
// Parse & fetch info about target Cassandra cluster
CassandraParams params = CassandraParams.parse(outputUri);
PCollection<GenericRecord> records =
((PCollection<GenericRecord>)(PCollection) pipeline.read(From.avroFile(inputList(input))));
String protocol = outputUri.getScheme();
if (protocol.equalsIgnoreCase("thrift")) {
records
// First convert ByteBuffers to ThriftRecords
.parallelDo(new AvroToThrift(rowkey, timestamp, ttl, ignore), ThriftRecord.PTYPE)
// Then group the ThriftRecords in preparation for writing them
.parallelDo(new ThriftRecord.AsPair(), ThriftRecord.AsPair.PTYPE)
.groupByKey(params.createGroupingOptions())
// Finally write the ThriftRecords to Cassandra
.write(new ThriftTarget(outputUri, params));
}
else if (protocol.equalsIgnoreCase("cql")) {
records
// In case of CQL, convert ByteBuffers to CQLRecords
.parallelDo(new AvroToCQL(rowkey, timestamp, ttl, ignore), CQLRecord.PTYPE)
.by(params.getKeyFn(), Avros.bytes())
.groupByKey(params.createGroupingOptions())
.write(new CQLTarget(outputUri, params));
}
// Execute the pipeline
PipelineResult result = pipeline.done();
return result.succeeded() ? 0 : 1;
}