当前位置: 首页>>代码示例>>Java>>正文


Java Broadcast.getValue方法代码示例

本文整理汇总了Java中org.apache.spark.broadcast.Broadcast.getValue方法的典型用法代码示例。如果您正苦于以下问题:Java Broadcast.getValue方法的具体用法?Java Broadcast.getValue怎么用?Java Broadcast.getValue使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.spark.broadcast.Broadcast的用法示例。


在下文中一共展示了Broadcast.getValue方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getReadsFunction

import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
private static FlatMapFunction<Shard<GATKRead>, ReadWalkerContext> getReadsFunction(
        Broadcast<ReferenceMultiSource> bReferenceSource, Broadcast<FeatureManager> bFeatureManager,
        SAMSequenceDictionary sequenceDictionary, int readShardPadding) {
    return (FlatMapFunction<Shard<GATKRead>, ReadWalkerContext>) shard -> {
        // get reference bases for this shard (padded)
        SimpleInterval paddedInterval = shard.getInterval().expandWithinContig(readShardPadding, sequenceDictionary);
        ReferenceDataSource reference = bReferenceSource == null ? null :
                new ReferenceMemorySource(bReferenceSource.getValue().getReferenceBases(paddedInterval), sequenceDictionary);
        FeatureManager features = bFeatureManager == null ? null : bFeatureManager.getValue();

        return StreamSupport.stream(shard.spliterator(), false)
                .map(r -> {
                    final SimpleInterval readInterval = getReadInterval(r);
                    return new ReadWalkerContext(r, new ReferenceContext(reference, readInterval), new FeatureContext(features, readInterval));
                }).iterator();
    };
}
 
开发者ID:broadinstitute,项目名称:gatk,代码行数:18,代码来源:ReadWalkerSpark.java

示例2: getVariantsFunction

import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
private static FlatMapFunction<Shard<VariantContext>, VariantWalkerContext> getVariantsFunction(
        final Broadcast<ReferenceMultiSource> bReferenceSource,
        final Broadcast<FeatureManager> bFeatureManager,
        final SAMSequenceDictionary sequenceDictionary, final int variantShardPadding) {
    return (FlatMapFunction<Shard<VariantContext>, VariantWalkerContext>) shard -> {
        // get reference bases for this shard (padded)
        SimpleInterval paddedInterval = shard.getInterval().expandWithinContig(variantShardPadding, sequenceDictionary);
        ReferenceDataSource reference = bReferenceSource == null ? null :
                new ReferenceMemorySource(bReferenceSource.getValue().getReferenceBases(paddedInterval), sequenceDictionary);
        FeatureManager features = bFeatureManager == null ? null : bFeatureManager.getValue();

        return StreamSupport.stream(shard.spliterator(), false)
                .filter(v -> v.getStart() >= shard.getStart() && v.getStart() <= shard.getEnd()) // only include variants that start in the shard
                .map(v -> {
                    final SimpleInterval variantInterval = new SimpleInterval(v);
                    return new VariantWalkerContext(v,
                            new ReadsContext(), // empty
                            new ReferenceContext(reference, variantInterval),
                            new FeatureContext(features, variantInterval));
                }).iterator();
    };
}
 
开发者ID:broadinstitute,项目名称:gatk,代码行数:23,代码来源:VariantWalkerSpark.java

示例3: shardsToAssemblyRegions

import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
/**
 * @return and RDD of {@link Tuple2<AssemblyRegion, SimpleInterval>} which pairs each AssemblyRegion with the
 * interval it was generated in
 */
private static FlatMapFunction<Iterator<Shard<GATKRead>>, Tuple2<AssemblyRegion, SimpleInterval>> shardsToAssemblyRegions(
        final Broadcast<ReferenceMultiSource> reference,
        final Broadcast<HaplotypeCallerArgumentCollection> hcArgsBroadcast,
        final ShardingArgumentCollection assemblyArgs,
        final SAMFileHeader header,
        final Broadcast<VariantAnnotatorEngine> annotatorEngineBroadcast) {
    return shards -> {
        final ReferenceMultiSource referenceMultiSource = reference.value();
        final ReferenceMultiSourceAdapter referenceSource = new ReferenceMultiSourceAdapter(referenceMultiSource);
        final HaplotypeCallerEngine hcEngine = new HaplotypeCallerEngine(hcArgsBroadcast.value(), false, false, header, referenceSource, annotatorEngineBroadcast.getValue());

        final ReadsDownsampler readsDownsampler = assemblyArgs.maxReadsPerAlignmentStart > 0 ?
            new PositionalDownsampler(assemblyArgs.maxReadsPerAlignmentStart, header) : null;
        return Utils.stream(shards)
                //TODO we've hacked multi interval shards here with a shim, but we should investigate as smarter approach https://github.com/broadinstitute/gatk/issues/4299
            .map(shard -> new ShardToMultiIntervalShardAdapter<>(
                    new DownsampleableSparkReadShard(new ShardBoundary(shard.getInterval(), shard.getPaddedInterval()), shard, readsDownsampler)))
            .flatMap(shardToRegion(assemblyArgs, header, referenceSource, hcEngine)).iterator();
    };
}
 
开发者ID:broadinstitute,项目名称:gatk,代码行数:25,代码来源:HaplotypeCallerSpark.java

示例4: getBroadcast

import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
@Override
@SuppressWarnings("unchecked")
public <T> Collection<T> getBroadcast(String name) {
    final Broadcast<?> broadcast = this.broadcasts.get(name);
    if (broadcast == null) {
        throw new RheemException("No such broadcast found: " + name);
    }

    return (Collection<T>) broadcast.getValue();
}
 
开发者ID:daqcri,项目名称:rheem,代码行数:11,代码来源:SparkExecutionContext.java

示例5: callVariantsFromAssemblyRegions

import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
/**
 * Call variants from Tuples of AssemblyRegion and Simple Interval
 * The interval should be the non-padded shard boundary for the shard that the corresponding AssemblyRegion was
 * created in, it's used to eliminate redundant variant calls at the edge of shard boundaries.
 */
private static FlatMapFunction<Iterator<Tuple2<AssemblyRegion, SimpleInterval>>, VariantContext> callVariantsFromAssemblyRegions(
        final AuthHolder authHolder,
        final SAMFileHeader header,
        final Broadcast<ReferenceMultiSource> referenceBroadcast,
        final Broadcast<HaplotypeCallerArgumentCollection> hcArgsBroadcast) {
    return regionAndIntervals -> {
        //HaplotypeCallerEngine isn't serializable but is expensive to instantiate, so construct and reuse one for every partition
        final ReferenceMultiSourceAdapter referenceReader = new ReferenceMultiSourceAdapter(referenceBroadcast.getValue(), authHolder);
        final HaplotypeCallerEngine hcEngine = new HaplotypeCallerEngine(hcArgsBroadcast.value(), header, referenceReader);
        return iteratorToStream(regionAndIntervals).flatMap(regionToVariants(hcEngine)).iterator();
    };
}
 
开发者ID:broadinstitute,项目名称:gatk-protected,代码行数:18,代码来源:HaplotypeCallerSpark.java

示例6: getAlignmentsFunction

import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
/**
 * Return a function that maps a {@link Shard} of reads into a tuple of alignments and their corresponding reference and features.
 * @param bReferenceSource the reference source broadcast
 * @param bFeatureManager the feature manager broadcast
 * @param sequenceDictionary the sequence dictionary for the reads
 * @param header the reads header
 * @param downsamplingInfo the downsampling method for the reads
 * @return a function that maps a {@link Shard} of reads into a tuple of alignments and their corresponding reference and features.
 */
private static FlatMapFunction<Shard<GATKRead>, LocusWalkerContext> getAlignmentsFunction(
        Broadcast<ReferenceMultiSource> bReferenceSource, Broadcast<FeatureManager> bFeatureManager,
        SAMSequenceDictionary sequenceDictionary, SAMFileHeader header, LIBSDownsamplingInfo downsamplingInfo, boolean isEmitEmptyLoci) {
    return (FlatMapFunction<Shard<GATKRead>, LocusWalkerContext>) shardedRead -> {
        SimpleInterval interval = shardedRead.getInterval();
        SimpleInterval paddedInterval = shardedRead.getPaddedInterval();
        Iterator<GATKRead> readIterator = shardedRead.iterator();
        ReferenceDataSource reference = bReferenceSource == null ? null :
                new ReferenceMemorySource(bReferenceSource.getValue().getReferenceBases(paddedInterval), sequenceDictionary);
        FeatureManager fm = bFeatureManager == null ? null : bFeatureManager.getValue();

        final SAMSequenceDictionary referenceDictionary = reference == null? null : reference.getSequenceDictionary();

        final AlignmentContextIteratorBuilder alignmentContextIteratorBuilder = new AlignmentContextIteratorBuilder();
        alignmentContextIteratorBuilder.setDownsamplingInfo(downsamplingInfo);
        alignmentContextIteratorBuilder.setEmitEmptyLoci(isEmitEmptyLoci);
        alignmentContextIteratorBuilder.setIncludeDeletions(true);
        alignmentContextIteratorBuilder.setKeepUniqueReadListInLibs(false);
        alignmentContextIteratorBuilder.setIncludeNs(false);

        final Iterator<AlignmentContext> alignmentContextIterator = alignmentContextIteratorBuilder.build(
                readIterator, header, Collections.singletonList(interval), sequenceDictionary,
                reference != null);

        return StreamSupport.stream(Spliterators.spliteratorUnknownSize(alignmentContextIterator, 0), false).map(alignmentContext -> {
            final SimpleInterval alignmentInterval = new SimpleInterval(alignmentContext);
            return new LocusWalkerContext(alignmentContext, new ReferenceContext(reference, alignmentInterval), new FeatureContext(fm, alignmentInterval));
        }).iterator();
    };
}
 
开发者ID:broadinstitute,项目名称:gatk,代码行数:40,代码来源:LocusWalkerSpark.java

示例7: getAssemblyRegionsFunction

import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
private static FlatMapFunction<Shard<GATKRead>, AssemblyRegionWalkerContext> getAssemblyRegionsFunction(
        final Broadcast<ReferenceMultiSource> bReferenceSource,
        final Broadcast<FeatureManager> bFeatureManager,
        final SAMSequenceDictionary sequenceDictionary,
        final SAMFileHeader header,
        final AssemblyRegionEvaluator evaluator,
        final int minAssemblyRegionSize,
        final int maxAssemblyRegionSize,
        final int assemblyRegionPadding,
        final double activeProbThreshold,
        final int maxProbPropagationDistance,
        final boolean includeReadsWithDeletionsInIsActivePileups) {
    return (FlatMapFunction<Shard<GATKRead>, AssemblyRegionWalkerContext>) shardedRead -> {
        final SimpleInterval paddedInterval = shardedRead.getPaddedInterval();
        final SimpleInterval assemblyRegionPaddedInterval = paddedInterval.expandWithinContig(assemblyRegionPadding, sequenceDictionary);

        final ReferenceDataSource reference = bReferenceSource == null ? null :
                new ReferenceMemorySource(bReferenceSource.getValue().getReferenceBases(assemblyRegionPaddedInterval), sequenceDictionary);
        final FeatureManager features = bFeatureManager == null ? null : bFeatureManager.getValue();

        final Iterator<AssemblyRegion> assemblyRegionIter = new AssemblyRegionIterator(
                new ShardToMultiIntervalShardAdapter<>(shardedRead),
                header, reference, features, evaluator,
                minAssemblyRegionSize, maxAssemblyRegionSize, assemblyRegionPadding, activeProbThreshold,
                maxProbPropagationDistance, includeReadsWithDeletionsInIsActivePileups);
        final Iterable<AssemblyRegion> assemblyRegions = () -> assemblyRegionIter;
        return Utils.stream(assemblyRegions).map(assemblyRegion ->
                new AssemblyRegionWalkerContext(assemblyRegion,
                    new ReferenceContext(reference, assemblyRegion.getExtendedSpan()),
                    new FeatureContext(features, assemblyRegion.getExtendedSpan()))).iterator();
    };
}
 
开发者ID:broadinstitute,项目名称:gatk,代码行数:33,代码来源:AssemblyRegionWalkerSpark.java

示例8: getExternalCNVCallAnnotation

import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
private static String getExternalCNVCallAnnotation(final SimpleInterval refLoc,
                                                   final int end,
                                                   final VariantContextBuilder vcBuilder,
                                                   final Broadcast<SAMSequenceDictionary> broadcastSequenceDictionary,
                                                   final Broadcast<SVIntervalTree<VariantContext>> broadcastCNVCalls,
                                                   final String sampleId) {
    final SVInterval variantInterval = new SVInterval(broadcastSequenceDictionary.getValue().getSequenceIndex(refLoc.getContig()),refLoc.getStart(), end);
    final SVIntervalTree<VariantContext> cnvCallTree = broadcastCNVCalls.getValue();
    final String cnvCallAnnotation =
            Utils.stream(cnvCallTree.overlappers(variantInterval))
                    .map(overlapper -> formatExternalCNVCallAnnotation(overlapper.getValue(), sampleId))
                    .collect(Collectors.joining(","));
    return cnvCallAnnotation;
}
 
开发者ID:broadinstitute,项目名称:gatk,代码行数:15,代码来源:AnnotatedVariantProducer.java

示例9: experimentalInterpretation

import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
private void experimentalInterpretation(final JavaSparkContext ctx,
                                        final FindBreakpointEvidenceSpark.AssembledEvidenceResults assembledEvidenceResults,
                                        final SvDiscoveryInputData svDiscoveryInputData,
                                        final String nonCanonicalChromosomeNamesFile) {

    if ( expVariantsOutDir == null )
        return;

    final Broadcast<SAMSequenceDictionary> referenceSequenceDictionaryBroadcast = svDiscoveryInputData.referenceSequenceDictionaryBroadcast;
    final Broadcast<SAMFileHeader> headerBroadcast = svDiscoveryInputData.headerBroadcast;
    final SAMFileHeader headerForReads = headerBroadcast.getValue();
    final SAMReadGroupRecord contigAlignmentsReadGroup = new SAMReadGroupRecord(SVUtils.GATKSV_CONTIG_ALIGNMENTS_READ_GROUP_ID);
    final List<String> refNames = SequenceDictionaryUtils.getContigNamesList(referenceSequenceDictionaryBroadcast.getValue());

    List<GATKRead> readsList =
            assembledEvidenceResults
                    .getAlignedAssemblyOrExcuseList().stream()
                    .filter(AlignedAssemblyOrExcuse::isNotFailure)
                    .flatMap(aa -> aa.toSAMStreamForAlignmentsOfThisAssembly(headerForReads, refNames, contigAlignmentsReadGroup))
                    .map(SAMRecordToGATKReadAdapter::new)
                    .collect(Collectors.toList());
    JavaRDD<GATKRead> reads = ctx.parallelize(readsList);

    final String sampleId = svDiscoveryInputData.sampleId;
    final Broadcast<ReferenceMultiSource> referenceBroadcast = svDiscoveryInputData.referenceBroadcast;
    final Broadcast<SVIntervalTree<VariantContext>> cnvCallsBroadcast = svDiscoveryInputData.cnvCallsBroadcast;

    final SvDiscoveryInputData updatedSvDiscoveryInputData =
            new SvDiscoveryInputData(sampleId, svDiscoveryInputData.discoverStageArgs, expVariantsOutDir,
                    svDiscoveryInputData.metadata, svDiscoveryInputData.assembledIntervals,
                    svDiscoveryInputData.evidenceTargetLinks, reads, svDiscoveryInputData.toolLogger,
                    referenceBroadcast, referenceSequenceDictionaryBroadcast, headerBroadcast, cnvCallsBroadcast);

    EnumMap<AssemblyContigAlignmentSignatureClassifier.RawTypes, JavaRDD<AssemblyContigWithFineTunedAlignments>>
            contigsByPossibleRawTypes =
            SvDiscoverFromLocalAssemblyContigAlignmentsSpark.preprocess(updatedSvDiscoveryInputData, nonCanonicalChromosomeNamesFile,true);

    SvDiscoverFromLocalAssemblyContigAlignmentsSpark.dispatchJobs(contigsByPossibleRawTypes, updatedSvDiscoveryInputData);
}
 
开发者ID:broadinstitute,项目名称:gatk,代码行数:40,代码来源:StructuralVariationDiscoveryPipelineSpark.java

示例10: callVariantsFromAssemblyRegions

import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
/**
 * Call variants from Tuples of AssemblyRegion and Simple Interval
 * The interval should be the non-padded shard boundary for the shard that the corresponding AssemblyRegion was
 * created in, it's used to eliminate redundant variant calls at the edge of shard boundaries.
 */
private static FlatMapFunction<Iterator<Tuple2<AssemblyRegion, SimpleInterval>>, VariantContext> callVariantsFromAssemblyRegions(
        final SAMFileHeader header,
        final Broadcast<ReferenceMultiSource> referenceBroadcast,
        final Broadcast<HaplotypeCallerArgumentCollection> hcArgsBroadcast,
        final Broadcast<VariantAnnotatorEngine> annotatorEngineBroadcast) {
    return regionAndIntervals -> {
        //HaplotypeCallerEngine isn't serializable but is expensive to instantiate, so construct and reuse one for every partition
        final ReferenceMultiSource referenceMultiSource = referenceBroadcast.value();
        final ReferenceMultiSourceAdapter referenceSource = new ReferenceMultiSourceAdapter(referenceMultiSource);
        final HaplotypeCallerEngine hcEngine = new HaplotypeCallerEngine(hcArgsBroadcast.value(), false, false, header, referenceSource, annotatorEngineBroadcast.getValue());
        return Utils.stream(regionAndIntervals).flatMap(regionToVariants(hcEngine)).iterator();
    };
}
 
开发者ID:broadinstitute,项目名称:gatk,代码行数:19,代码来源:HaplotypeCallerSpark.java

示例11: SecondIterationFunctionAdapter

import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
public SecondIterationFunctionAdapter(Broadcast<Map<String, Object>> word2vecVarMapBroadcast,
                Broadcast<double[]> expTableBroadcast, Broadcast<VocabCache<VocabWord>> vocabCacheBroadcast) {

    Map<String, Object> word2vecVarMap = word2vecVarMapBroadcast.getValue();
    this.expTable = expTableBroadcast.getValue();
    this.vectorLength = (int) word2vecVarMap.get("vectorLength");
    this.useAdaGrad = (boolean) word2vecVarMap.get("useAdaGrad");
    this.negative = (double) word2vecVarMap.get("negative");
    this.window = (int) word2vecVarMap.get("window");
    this.alpha = (double) word2vecVarMap.get("alpha");
    this.minAlpha = (double) word2vecVarMap.get("minAlpha");
    this.totalWordCount = (long) word2vecVarMap.get("totalWordCount");
    this.seed = (long) word2vecVarMap.get("seed");
    this.maxExp = (int) word2vecVarMap.get("maxExp");
    this.iterations = (int) word2vecVarMap.get("iterations");
    this.batchSize = (int) word2vecVarMap.get("batchSize");

    // this.indexSyn0VecMap = new HashMap<>();
    // this.pointSyn1VecMap = new HashMap<>();

    this.vocab = vocabCacheBroadcast.getValue();


    if (this.vocab == null)
        throw new RuntimeException("VocabCache is null");


}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:29,代码来源:SecondIterationFunction.java

示例12: FirstIterationFunctionAdapter

import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
public FirstIterationFunctionAdapter(Broadcast<Map<String, Object>> word2vecVarMapBroadcast,
                Broadcast<double[]> expTableBroadcast, Broadcast<VocabCache<VocabWord>> vocabCacheBroadcast) {

    Map<String, Object> word2vecVarMap = word2vecVarMapBroadcast.getValue();
    this.expTable = expTableBroadcast.getValue();
    this.vectorLength = (int) word2vecVarMap.get("vectorLength");
    this.useAdaGrad = (boolean) word2vecVarMap.get("useAdaGrad");
    this.negative = (double) word2vecVarMap.get("negative");
    this.window = (int) word2vecVarMap.get("window");
    this.alpha = (double) word2vecVarMap.get("alpha");
    this.minAlpha = (double) word2vecVarMap.get("minAlpha");
    this.totalWordCount = (long) word2vecVarMap.get("totalWordCount");
    this.seed = (long) word2vecVarMap.get("seed");
    this.maxExp = (int) word2vecVarMap.get("maxExp");
    this.iterations = (int) word2vecVarMap.get("iterations");
    this.batchSize = (int) word2vecVarMap.get("batchSize");
    this.indexSyn0VecMap = new HashMap<>();
    this.pointSyn1VecMap = new HashMap<>();
    this.vocab = vocabCacheBroadcast.getValue();

    if (this.vocab == null)
        throw new RuntimeException("VocabCache is null");

    if (negative > 0) {
        negativeHolder = NegativeHolder.getInstance();
        negativeHolder.initHolder(vocab, expTable, this.vectorLength);
    }
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:29,代码来源:FirstIterationFunctionAdapter.java

示例13: setRDDVarMap

import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
public void setRDDVarMap(JavaRDD<String> corpusRDD, Broadcast<Map<String, Object>> broadcasTokenizerVarMap) {
    Map<String, Object> tokenizerVarMap = broadcasTokenizerVarMap.getValue();
    this.corpusRDD = corpusRDD;
    this.numWords = (int) tokenizerVarMap.get("numWords");
    // TokenizerFunction Settings
    this.nGrams = (int) tokenizerVarMap.get("nGrams");
    this.tokenizer = (String) tokenizerVarMap.get("tokenizer");
    this.tokenizerPreprocessor = (String) tokenizerVarMap.get("tokenPreprocessor");
    this.useUnk = (boolean) tokenizerVarMap.get("useUnk");
    this.configuration = (VectorsConfiguration) tokenizerVarMap.get("vectorsConfiguration");
    // Remove Stop words
    // if ((boolean) tokenizerVarMap.get("removeStop")) {
    stopWords = (List<String>) tokenizerVarMap.get("stopWords");
    //    }
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:16,代码来源:TextPipeline.java

示例14: testBroadcastHcArgs

import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
@Test
public void testBroadcastHcArgs() {
    Broadcast<HaplotypeCallerArgumentCollection> broadcast = SparkContextFactory.getTestSparkContext().broadcast(new HaplotypeCallerArgumentCollection());
    broadcast.getValue();
}
 
开发者ID:broadinstitute,项目名称:gatk-protected,代码行数:6,代码来源:HaplotypeCallerSparkIntegrationTest.java

示例15: ChunkProcessingTask

import org.apache.spark.broadcast.Broadcast; //导入方法依赖的package包/类
public ChunkProcessingTask(Broadcast<String> transform,
                           Broadcast<String> header, Broadcast<Properties> exporterProperties) {
    fHeader = header.getValue();
    fTansform = transform.getValue();
    fProperites = exporterProperties.getValue();
}
 
开发者ID:fusepoolP3,项目名称:p3-batchrefine,代码行数:7,代码来源:ChunkProcessingTask.java


注:本文中的org.apache.spark.broadcast.Broadcast.getValue方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。