本文整理汇总了Java中com.google.cloud.dataflow.sdk.transforms.join.KeyedPCollectionTuple类的典型用法代码示例。如果您正苦于以下问题:Java KeyedPCollectionTuple类的具体用法?Java KeyedPCollectionTuple怎么用?Java KeyedPCollectionTuple使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
KeyedPCollectionTuple类属于com.google.cloud.dataflow.sdk.transforms.join包,在下文中一共展示了KeyedPCollectionTuple类的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: group
import com.google.cloud.dataflow.sdk.transforms.join.KeyedPCollectionTuple; //导入依赖的package包/类
private static PCollection<KV<Long, CoGbkResult>> group(String name,
PCollection<KV<Long, MusicBrainzDataObject>> first,
PCollection<KV<Long, MusicBrainzDataObject>> second,
TupleTag<MusicBrainzDataObject> firstTag,
TupleTag<MusicBrainzDataObject> secondTag
) {
final CoGroupByKey<Long> grouper = CoGroupByKey.create();
PCollection<KV<Long, CoGbkResult>> joinedResult;
try {
joinedResult = KeyedPCollectionTuple.of(firstTag, first)
.and(secondTag, second)
.apply(name, grouper);
} catch (Exception e) {
logger.error("exception grouping.", e);
return null;
}
return joinedResult;
}
示例2: addBackReads
import com.google.cloud.dataflow.sdk.transforms.join.KeyedPCollectionTuple; //导入依赖的package包/类
public static PCollection<KV<GATKRead, Iterable<Variant>>> addBackReads(PCollection<KV<GATKRead, Variant>> readVariants, PCollection<KV<UUID, Iterable<Variant>>> matchedVariants) {
// And now, we do the same song and dance to get the Reads back in.
final TupleTag<GATKRead> justReadTag = new TupleTag<>();
final TupleTag<Iterable<Variant>> iterableVariant = new TupleTag<>();
PCollection<KV<UUID, GATKRead>> kReads = readVariants.apply(Keys.<GATKRead>create()).apply(new KeyReadsByUUID());
PCollection<KV<UUID, CoGbkResult>> coGbkLast = KeyedPCollectionTuple
.of(justReadTag, kReads)
.and(iterableVariant, matchedVariants).apply(CoGroupByKey.<UUID>create());
return coGbkLast.apply(ParDo.of(new DoFn<KV<UUID, CoGbkResult>, KV<GATKRead, Iterable<Variant>>>() {
private static final long serialVersionUID = 1L;
@Override
public void processElement(ProcessContext c) throws Exception {
Iterable<GATKRead> iReads = c.element().getValue().getAll(justReadTag);
// We only care about the first read (the rest are the same.
Iterable<Iterable<Variant>> variants = c.element().getValue().getAll(iterableVariant);
List<GATKRead> reads = Lists.newArrayList();
for (GATKRead r : iReads) {
reads.add(r);
}
if (reads.isEmpty()) {
throw new GATKException("no reads found");
}
for (Iterable<Variant> v : variants) {
c.output(KV.of(reads.get(0), v));
}
}
})).setName("RemoveDuplicatePairedReadVariants_addBackReads");
}
示例3: pair
import com.google.cloud.dataflow.sdk.transforms.join.KeyedPCollectionTuple; //导入依赖的package包/类
public static PCollection<KV<GATKRead, Variant>> pair(PCollection<GATKRead> pRead, PCollection<Variant> pVariant) {
PCollection<KV<VariantShard, GATKRead>> vkReads = pRead.apply(new KeyReadsByOverlappingVariantShard());
PCollection<KV<VariantShard, Variant>> vkVariants =
pVariant.apply(new KeyVariantByOverlappingVariantShard());
// GroupBy VariantShard
final TupleTag<Variant> variantTag = new TupleTag<>();
final TupleTag<GATKRead> readTag = new TupleTag<>();
PCollection<KV<VariantShard, CoGbkResult>> coGbkInput = KeyedPCollectionTuple
.of(variantTag, vkVariants)
.and(readTag, vkReads).apply(CoGroupByKey.<VariantShard>create());
// GroupBy Read
return coGbkInput.apply(ParDo.of(
new DoFn<KV<VariantShard, CoGbkResult>, KV<GATKRead, Variant>>() {
private static final long serialVersionUID = 1L;
@Override
public void processElement(ProcessContext c) throws Exception {
Iterable<Variant> kVariants = c.element().getValue().getAll(variantTag);
Iterable<GATKRead> kReads = c.element().getValue().getAll(readTag);
// Compute overlap.
for (GATKRead r : kReads) {
SimpleInterval readInterval = new SimpleInterval(r);
for (Variant v : kVariants) {
if (readInterval.overlaps(v)) {
c.output(KV.of(r, v));
}
}
}
}
})).setName("PairReadsAndVariants_GroupByRead");
}