本文整理汇总了Java中org.apache.spark.broadcast.Broadcast.getValue方法的典型用法代码示例。如果您正苦于以下问题:Java Broadcast.getValue方法的具体用法?Java Broadcast.getValue怎么用?Java Broadcast.getValue使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.broadcast.Broadcast
的用法示例。
在下文中一共展示了Broadcast.getValue方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getReadsFunction
import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
private static FlatMapFunction<Shard<GATKRead>, ReadWalkerContext> getReadsFunction(
Broadcast<ReferenceMultiSource> bReferenceSource, Broadcast<FeatureManager> bFeatureManager,
SAMSequenceDictionary sequenceDictionary, int readShardPadding) {
return (FlatMapFunction<Shard<GATKRead>, ReadWalkerContext>) shard -> {
// get reference bases for this shard (padded)
SimpleInterval paddedInterval = shard.getInterval().expandWithinContig(readShardPadding, sequenceDictionary);
ReferenceDataSource reference = bReferenceSource == null ? null :
new ReferenceMemorySource(bReferenceSource.getValue().getReferenceBases(paddedInterval), sequenceDictionary);
FeatureManager features = bFeatureManager == null ? null : bFeatureManager.getValue();
return StreamSupport.stream(shard.spliterator(), false)
.map(r -> {
final SimpleInterval readInterval = getReadInterval(r);
return new ReadWalkerContext(r, new ReferenceContext(reference, readInterval), new FeatureContext(features, readInterval));
}).iterator();
};
}
示例2: getVariantsFunction
import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
private static FlatMapFunction<Shard<VariantContext>, VariantWalkerContext> getVariantsFunction(
final Broadcast<ReferenceMultiSource> bReferenceSource,
final Broadcast<FeatureManager> bFeatureManager,
final SAMSequenceDictionary sequenceDictionary, final int variantShardPadding) {
return (FlatMapFunction<Shard<VariantContext>, VariantWalkerContext>) shard -> {
// get reference bases for this shard (padded)
SimpleInterval paddedInterval = shard.getInterval().expandWithinContig(variantShardPadding, sequenceDictionary);
ReferenceDataSource reference = bReferenceSource == null ? null :
new ReferenceMemorySource(bReferenceSource.getValue().getReferenceBases(paddedInterval), sequenceDictionary);
FeatureManager features = bFeatureManager == null ? null : bFeatureManager.getValue();
return StreamSupport.stream(shard.spliterator(), false)
.filter(v -> v.getStart() >= shard.getStart() && v.getStart() <= shard.getEnd()) // only include variants that start in the shard
.map(v -> {
final SimpleInterval variantInterval = new SimpleInterval(v);
return new VariantWalkerContext(v,
new ReadsContext(), // empty
new ReferenceContext(reference, variantInterval),
new FeatureContext(features, variantInterval));
}).iterator();
};
}
示例3: shardsToAssemblyRegions
import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
/**
* @return and RDD of {@link Tuple2<AssemblyRegion, SimpleInterval>} which pairs each AssemblyRegion with the
* interval it was generated in
*/
private static FlatMapFunction<Iterator<Shard<GATKRead>>, Tuple2<AssemblyRegion, SimpleInterval>> shardsToAssemblyRegions(
final Broadcast<ReferenceMultiSource> reference,
final Broadcast<HaplotypeCallerArgumentCollection> hcArgsBroadcast,
final ShardingArgumentCollection assemblyArgs,
final SAMFileHeader header,
final Broadcast<VariantAnnotatorEngine> annotatorEngineBroadcast) {
return shards -> {
final ReferenceMultiSource referenceMultiSource = reference.value();
final ReferenceMultiSourceAdapter referenceSource = new ReferenceMultiSourceAdapter(referenceMultiSource);
final HaplotypeCallerEngine hcEngine = new HaplotypeCallerEngine(hcArgsBroadcast.value(), false, false, header, referenceSource, annotatorEngineBroadcast.getValue());
final ReadsDownsampler readsDownsampler = assemblyArgs.maxReadsPerAlignmentStart > 0 ?
new PositionalDownsampler(assemblyArgs.maxReadsPerAlignmentStart, header) : null;
return Utils.stream(shards)
//TODO we've hacked multi interval shards here with a shim, but we should investigate as smarter approach https://github.com/broadinstitute/gatk/issues/4299
.map(shard -> new ShardToMultiIntervalShardAdapter<>(
new DownsampleableSparkReadShard(new ShardBoundary(shard.getInterval(), shard.getPaddedInterval()), shard, readsDownsampler)))
.flatMap(shardToRegion(assemblyArgs, header, referenceSource, hcEngine)).iterator();
};
}
示例4: getBroadcast
import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
@Override
@SuppressWarnings("unchecked")
public <T> Collection<T> getBroadcast(String name) {
final Broadcast<?> broadcast = this.broadcasts.get(name);
if (broadcast == null) {
throw new RheemException("No such broadcast found: " + name);
}
return (Collection<T>) broadcast.getValue();
}
示例5: callVariantsFromAssemblyRegions
import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
/**
* Call variants from Tuples of AssemblyRegion and Simple Interval
* The interval should be the non-padded shard boundary for the shard that the corresponding AssemblyRegion was
* created in, it's used to eliminate redundant variant calls at the edge of shard boundaries.
*/
private static FlatMapFunction<Iterator<Tuple2<AssemblyRegion, SimpleInterval>>, VariantContext> callVariantsFromAssemblyRegions(
final AuthHolder authHolder,
final SAMFileHeader header,
final Broadcast<ReferenceMultiSource> referenceBroadcast,
final Broadcast<HaplotypeCallerArgumentCollection> hcArgsBroadcast) {
return regionAndIntervals -> {
//HaplotypeCallerEngine isn't serializable but is expensive to instantiate, so construct and reuse one for every partition
final ReferenceMultiSourceAdapter referenceReader = new ReferenceMultiSourceAdapter(referenceBroadcast.getValue(), authHolder);
final HaplotypeCallerEngine hcEngine = new HaplotypeCallerEngine(hcArgsBroadcast.value(), header, referenceReader);
return iteratorToStream(regionAndIntervals).flatMap(regionToVariants(hcEngine)).iterator();
};
}
示例6: getAlignmentsFunction
import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
/**
* Return a function that maps a {@link Shard} of reads into a tuple of alignments and their corresponding reference and features.
* @param bReferenceSource the reference source broadcast
* @param bFeatureManager the feature manager broadcast
* @param sequenceDictionary the sequence dictionary for the reads
* @param header the reads header
* @param downsamplingInfo the downsampling method for the reads
* @return a function that maps a {@link Shard} of reads into a tuple of alignments and their corresponding reference and features.
*/
private static FlatMapFunction<Shard<GATKRead>, LocusWalkerContext> getAlignmentsFunction(
Broadcast<ReferenceMultiSource> bReferenceSource, Broadcast<FeatureManager> bFeatureManager,
SAMSequenceDictionary sequenceDictionary, SAMFileHeader header, LIBSDownsamplingInfo downsamplingInfo, boolean isEmitEmptyLoci) {
return (FlatMapFunction<Shard<GATKRead>, LocusWalkerContext>) shardedRead -> {
SimpleInterval interval = shardedRead.getInterval();
SimpleInterval paddedInterval = shardedRead.getPaddedInterval();
Iterator<GATKRead> readIterator = shardedRead.iterator();
ReferenceDataSource reference = bReferenceSource == null ? null :
new ReferenceMemorySource(bReferenceSource.getValue().getReferenceBases(paddedInterval), sequenceDictionary);
FeatureManager fm = bFeatureManager == null ? null : bFeatureManager.getValue();
final SAMSequenceDictionary referenceDictionary = reference == null? null : reference.getSequenceDictionary();
final AlignmentContextIteratorBuilder alignmentContextIteratorBuilder = new AlignmentContextIteratorBuilder();
alignmentContextIteratorBuilder.setDownsamplingInfo(downsamplingInfo);
alignmentContextIteratorBuilder.setEmitEmptyLoci(isEmitEmptyLoci);
alignmentContextIteratorBuilder.setIncludeDeletions(true);
alignmentContextIteratorBuilder.setKeepUniqueReadListInLibs(false);
alignmentContextIteratorBuilder.setIncludeNs(false);
final Iterator<AlignmentContext> alignmentContextIterator = alignmentContextIteratorBuilder.build(
readIterator, header, Collections.singletonList(interval), sequenceDictionary,
reference != null);
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(alignmentContextIterator, 0), false).map(alignmentContext -> {
final SimpleInterval alignmentInterval = new SimpleInterval(alignmentContext);
return new LocusWalkerContext(alignmentContext, new ReferenceContext(reference, alignmentInterval), new FeatureContext(fm, alignmentInterval));
}).iterator();
};
}
示例7: getAssemblyRegionsFunction
import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
private static FlatMapFunction<Shard<GATKRead>, AssemblyRegionWalkerContext> getAssemblyRegionsFunction(
final Broadcast<ReferenceMultiSource> bReferenceSource,
final Broadcast<FeatureManager> bFeatureManager,
final SAMSequenceDictionary sequenceDictionary,
final SAMFileHeader header,
final AssemblyRegionEvaluator evaluator,
final int minAssemblyRegionSize,
final int maxAssemblyRegionSize,
final int assemblyRegionPadding,
final double activeProbThreshold,
final int maxProbPropagationDistance,
final boolean includeReadsWithDeletionsInIsActivePileups) {
return (FlatMapFunction<Shard<GATKRead>, AssemblyRegionWalkerContext>) shardedRead -> {
final SimpleInterval paddedInterval = shardedRead.getPaddedInterval();
final SimpleInterval assemblyRegionPaddedInterval = paddedInterval.expandWithinContig(assemblyRegionPadding, sequenceDictionary);
final ReferenceDataSource reference = bReferenceSource == null ? null :
new ReferenceMemorySource(bReferenceSource.getValue().getReferenceBases(assemblyRegionPaddedInterval), sequenceDictionary);
final FeatureManager features = bFeatureManager == null ? null : bFeatureManager.getValue();
final Iterator<AssemblyRegion> assemblyRegionIter = new AssemblyRegionIterator(
new ShardToMultiIntervalShardAdapter<>(shardedRead),
header, reference, features, evaluator,
minAssemblyRegionSize, maxAssemblyRegionSize, assemblyRegionPadding, activeProbThreshold,
maxProbPropagationDistance, includeReadsWithDeletionsInIsActivePileups);
final Iterable<AssemblyRegion> assemblyRegions = () -> assemblyRegionIter;
return Utils.stream(assemblyRegions).map(assemblyRegion ->
new AssemblyRegionWalkerContext(assemblyRegion,
new ReferenceContext(reference, assemblyRegion.getExtendedSpan()),
new FeatureContext(features, assemblyRegion.getExtendedSpan()))).iterator();
};
}
示例8: getExternalCNVCallAnnotation
import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
private static String getExternalCNVCallAnnotation(final SimpleInterval refLoc,
final int end,
final VariantContextBuilder vcBuilder,
final Broadcast<SAMSequenceDictionary> broadcastSequenceDictionary,
final Broadcast<SVIntervalTree<VariantContext>> broadcastCNVCalls,
final String sampleId) {
final SVInterval variantInterval = new SVInterval(broadcastSequenceDictionary.getValue().getSequenceIndex(refLoc.getContig()),refLoc.getStart(), end);
final SVIntervalTree<VariantContext> cnvCallTree = broadcastCNVCalls.getValue();
final String cnvCallAnnotation =
Utils.stream(cnvCallTree.overlappers(variantInterval))
.map(overlapper -> formatExternalCNVCallAnnotation(overlapper.getValue(), sampleId))
.collect(Collectors.joining(","));
return cnvCallAnnotation;
}
示例9: experimentalInterpretation
import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
private void experimentalInterpretation(final JavaSparkContext ctx,
final FindBreakpointEvidenceSpark.AssembledEvidenceResults assembledEvidenceResults,
final SvDiscoveryInputData svDiscoveryInputData,
final String nonCanonicalChromosomeNamesFile) {
if ( expVariantsOutDir == null )
return;
final Broadcast<SAMSequenceDictionary> referenceSequenceDictionaryBroadcast = svDiscoveryInputData.referenceSequenceDictionaryBroadcast;
final Broadcast<SAMFileHeader> headerBroadcast = svDiscoveryInputData.headerBroadcast;
final SAMFileHeader headerForReads = headerBroadcast.getValue();
final SAMReadGroupRecord contigAlignmentsReadGroup = new SAMReadGroupRecord(SVUtils.GATKSV_CONTIG_ALIGNMENTS_READ_GROUP_ID);
final List<String> refNames = SequenceDictionaryUtils.getContigNamesList(referenceSequenceDictionaryBroadcast.getValue());
List<GATKRead> readsList =
assembledEvidenceResults
.getAlignedAssemblyOrExcuseList().stream()
.filter(AlignedAssemblyOrExcuse::isNotFailure)
.flatMap(aa -> aa.toSAMStreamForAlignmentsOfThisAssembly(headerForReads, refNames, contigAlignmentsReadGroup))
.map(SAMRecordToGATKReadAdapter::new)
.collect(Collectors.toList());
JavaRDD<GATKRead> reads = ctx.parallelize(readsList);
final String sampleId = svDiscoveryInputData.sampleId;
final Broadcast<ReferenceMultiSource> referenceBroadcast = svDiscoveryInputData.referenceBroadcast;
final Broadcast<SVIntervalTree<VariantContext>> cnvCallsBroadcast = svDiscoveryInputData.cnvCallsBroadcast;
final SvDiscoveryInputData updatedSvDiscoveryInputData =
new SvDiscoveryInputData(sampleId, svDiscoveryInputData.discoverStageArgs, expVariantsOutDir,
svDiscoveryInputData.metadata, svDiscoveryInputData.assembledIntervals,
svDiscoveryInputData.evidenceTargetLinks, reads, svDiscoveryInputData.toolLogger,
referenceBroadcast, referenceSequenceDictionaryBroadcast, headerBroadcast, cnvCallsBroadcast);
EnumMap<AssemblyContigAlignmentSignatureClassifier.RawTypes, JavaRDD<AssemblyContigWithFineTunedAlignments>>
contigsByPossibleRawTypes =
SvDiscoverFromLocalAssemblyContigAlignmentsSpark.preprocess(updatedSvDiscoveryInputData, nonCanonicalChromosomeNamesFile,true);
SvDiscoverFromLocalAssemblyContigAlignmentsSpark.dispatchJobs(contigsByPossibleRawTypes, updatedSvDiscoveryInputData);
}
示例10: callVariantsFromAssemblyRegions
import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
/**
* Call variants from Tuples of AssemblyRegion and Simple Interval
* The interval should be the non-padded shard boundary for the shard that the corresponding AssemblyRegion was
* created in, it's used to eliminate redundant variant calls at the edge of shard boundaries.
*/
private static FlatMapFunction<Iterator<Tuple2<AssemblyRegion, SimpleInterval>>, VariantContext> callVariantsFromAssemblyRegions(
final SAMFileHeader header,
final Broadcast<ReferenceMultiSource> referenceBroadcast,
final Broadcast<HaplotypeCallerArgumentCollection> hcArgsBroadcast,
final Broadcast<VariantAnnotatorEngine> annotatorEngineBroadcast) {
return regionAndIntervals -> {
//HaplotypeCallerEngine isn't serializable but is expensive to instantiate, so construct and reuse one for every partition
final ReferenceMultiSource referenceMultiSource = referenceBroadcast.value();
final ReferenceMultiSourceAdapter referenceSource = new ReferenceMultiSourceAdapter(referenceMultiSource);
final HaplotypeCallerEngine hcEngine = new HaplotypeCallerEngine(hcArgsBroadcast.value(), false, false, header, referenceSource, annotatorEngineBroadcast.getValue());
return Utils.stream(regionAndIntervals).flatMap(regionToVariants(hcEngine)).iterator();
};
}
示例11: SecondIterationFunctionAdapter
import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
public SecondIterationFunctionAdapter(Broadcast<Map<String, Object>> word2vecVarMapBroadcast,
Broadcast<double[]> expTableBroadcast, Broadcast<VocabCache<VocabWord>> vocabCacheBroadcast) {
Map<String, Object> word2vecVarMap = word2vecVarMapBroadcast.getValue();
this.expTable = expTableBroadcast.getValue();
this.vectorLength = (int) word2vecVarMap.get("vectorLength");
this.useAdaGrad = (boolean) word2vecVarMap.get("useAdaGrad");
this.negative = (double) word2vecVarMap.get("negative");
this.window = (int) word2vecVarMap.get("window");
this.alpha = (double) word2vecVarMap.get("alpha");
this.minAlpha = (double) word2vecVarMap.get("minAlpha");
this.totalWordCount = (long) word2vecVarMap.get("totalWordCount");
this.seed = (long) word2vecVarMap.get("seed");
this.maxExp = (int) word2vecVarMap.get("maxExp");
this.iterations = (int) word2vecVarMap.get("iterations");
this.batchSize = (int) word2vecVarMap.get("batchSize");
// this.indexSyn0VecMap = new HashMap<>();
// this.pointSyn1VecMap = new HashMap<>();
this.vocab = vocabCacheBroadcast.getValue();
if (this.vocab == null)
throw new RuntimeException("VocabCache is null");
}
示例12: FirstIterationFunctionAdapter
import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
public FirstIterationFunctionAdapter(Broadcast<Map<String, Object>> word2vecVarMapBroadcast,
Broadcast<double[]> expTableBroadcast, Broadcast<VocabCache<VocabWord>> vocabCacheBroadcast) {
Map<String, Object> word2vecVarMap = word2vecVarMapBroadcast.getValue();
this.expTable = expTableBroadcast.getValue();
this.vectorLength = (int) word2vecVarMap.get("vectorLength");
this.useAdaGrad = (boolean) word2vecVarMap.get("useAdaGrad");
this.negative = (double) word2vecVarMap.get("negative");
this.window = (int) word2vecVarMap.get("window");
this.alpha = (double) word2vecVarMap.get("alpha");
this.minAlpha = (double) word2vecVarMap.get("minAlpha");
this.totalWordCount = (long) word2vecVarMap.get("totalWordCount");
this.seed = (long) word2vecVarMap.get("seed");
this.maxExp = (int) word2vecVarMap.get("maxExp");
this.iterations = (int) word2vecVarMap.get("iterations");
this.batchSize = (int) word2vecVarMap.get("batchSize");
this.indexSyn0VecMap = new HashMap<>();
this.pointSyn1VecMap = new HashMap<>();
this.vocab = vocabCacheBroadcast.getValue();
if (this.vocab == null)
throw new RuntimeException("VocabCache is null");
if (negative > 0) {
negativeHolder = NegativeHolder.getInstance();
negativeHolder.initHolder(vocab, expTable, this.vectorLength);
}
}
示例13: setRDDVarMap
import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
public void setRDDVarMap(JavaRDD<String> corpusRDD, Broadcast<Map<String, Object>> broadcasTokenizerVarMap) {
Map<String, Object> tokenizerVarMap = broadcasTokenizerVarMap.getValue();
this.corpusRDD = corpusRDD;
this.numWords = (int) tokenizerVarMap.get("numWords");
// TokenizerFunction Settings
this.nGrams = (int) tokenizerVarMap.get("nGrams");
this.tokenizer = (String) tokenizerVarMap.get("tokenizer");
this.tokenizerPreprocessor = (String) tokenizerVarMap.get("tokenPreprocessor");
this.useUnk = (boolean) tokenizerVarMap.get("useUnk");
this.configuration = (VectorsConfiguration) tokenizerVarMap.get("vectorsConfiguration");
// Remove Stop words
// if ((boolean) tokenizerVarMap.get("removeStop")) {
stopWords = (List<String>) tokenizerVarMap.get("stopWords");
// }
}
示例14: testBroadcastHcArgs
import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
@Test
public void testBroadcastHcArgs() {
Broadcast<HaplotypeCallerArgumentCollection> broadcast = SparkContextFactory.getTestSparkContext().broadcast(new HaplotypeCallerArgumentCollection());
broadcast.getValue();
}
示例15: ChunkProcessingTask
import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
public ChunkProcessingTask(Broadcast<String> transform,
Broadcast<String> header, Broadcast<Properties> exporterProperties) {
fHeader = header.getValue();
fTansform = transform.getValue();
fProperites = exporterProperties.getValue();
}