本文整理匯總了Java中com.google.cloud.dataflow.sdk.transforms.join.CoGbkResult類的典型用法代碼示例。如果您正苦於以下問題:Java CoGbkResult類的具體用法?Java CoGbkResult怎麽用?Java CoGbkResult使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
CoGbkResult類屬於com.google.cloud.dataflow.sdk.transforms.join包,在下文中一共展示了CoGbkResult類的3個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: group
import com.google.cloud.dataflow.sdk.transforms.join.CoGbkResult; //導入依賴的package包/類
private static PCollection<KV<Long, CoGbkResult>> group(String name,
PCollection<KV<Long, MusicBrainzDataObject>> first,
PCollection<KV<Long, MusicBrainzDataObject>> second,
TupleTag<MusicBrainzDataObject> firstTag,
TupleTag<MusicBrainzDataObject> secondTag
) {
final CoGroupByKey<Long> grouper = CoGroupByKey.create();
PCollection<KV<Long, CoGbkResult>> joinedResult;
try {
joinedResult = KeyedPCollectionTuple.of(firstTag, first)
.and(secondTag, second)
.apply(name, grouper);
} catch (Exception e) {
logger.error("exception grouping.", e);
return null;
}
return joinedResult;
}
示例2: addBackReads
import com.google.cloud.dataflow.sdk.transforms.join.CoGbkResult; //導入依賴的package包/類
public static PCollection<KV<GATKRead, Iterable<Variant>>> addBackReads(PCollection<KV<GATKRead, Variant>> readVariants, PCollection<KV<UUID, Iterable<Variant>>> matchedVariants) {
// And now, we do the same song and dance to get the Reads back in.
final TupleTag<GATKRead> justReadTag = new TupleTag<>();
final TupleTag<Iterable<Variant>> iterableVariant = new TupleTag<>();
PCollection<KV<UUID, GATKRead>> kReads = readVariants.apply(Keys.<GATKRead>create()).apply(new KeyReadsByUUID());
PCollection<KV<UUID, CoGbkResult>> coGbkLast = KeyedPCollectionTuple
.of(justReadTag, kReads)
.and(iterableVariant, matchedVariants).apply(CoGroupByKey.<UUID>create());
return coGbkLast.apply(ParDo.of(new DoFn<KV<UUID, CoGbkResult>, KV<GATKRead, Iterable<Variant>>>() {
private static final long serialVersionUID = 1L;
@Override
public void processElement(ProcessContext c) throws Exception {
Iterable<GATKRead> iReads = c.element().getValue().getAll(justReadTag);
// We only care about the first read (the rest are the same.
Iterable<Iterable<Variant>> variants = c.element().getValue().getAll(iterableVariant);
List<GATKRead> reads = Lists.newArrayList();
for (GATKRead r : iReads) {
reads.add(r);
}
if (reads.isEmpty()) {
throw new GATKException("no reads found");
}
for (Iterable<Variant> v : variants) {
c.output(KV.of(reads.get(0), v));
}
}
})).setName("RemoveDuplicatePairedReadVariants_addBackReads");
}
示例3: pair
import com.google.cloud.dataflow.sdk.transforms.join.CoGbkResult; //導入依賴的package包/類
public static PCollection<KV<GATKRead, Variant>> pair(PCollection<GATKRead> pRead, PCollection<Variant> pVariant) {
PCollection<KV<VariantShard, GATKRead>> vkReads = pRead.apply(new KeyReadsByOverlappingVariantShard());
PCollection<KV<VariantShard, Variant>> vkVariants =
pVariant.apply(new KeyVariantByOverlappingVariantShard());
// GroupBy VariantShard
final TupleTag<Variant> variantTag = new TupleTag<>();
final TupleTag<GATKRead> readTag = new TupleTag<>();
PCollection<KV<VariantShard, CoGbkResult>> coGbkInput = KeyedPCollectionTuple
.of(variantTag, vkVariants)
.and(readTag, vkReads).apply(CoGroupByKey.<VariantShard>create());
// GroupBy Read
return coGbkInput.apply(ParDo.of(
new DoFn<KV<VariantShard, CoGbkResult>, KV<GATKRead, Variant>>() {
private static final long serialVersionUID = 1L;
@Override
public void processElement(ProcessContext c) throws Exception {
Iterable<Variant> kVariants = c.element().getValue().getAll(variantTag);
Iterable<GATKRead> kReads = c.element().getValue().getAll(readTag);
// Compute overlap.
for (GATKRead r : kReads) {
SimpleInterval readInterval = new SimpleInterval(r);
for (Variant v : kVariants) {
if (readInterval.overlaps(v)) {
c.output(KV.of(r, v));
}
}
}
}
})).setName("PairReadsAndVariants_GroupByRead");
}