当前位置: 首页>>代码示例>>Java>>正文


Java SAMFileHeader.getSequenceDictionary方法代码示例

本文整理汇总了Java中htsjdk.samtools.SAMFileHeader.getSequenceDictionary方法的典型用法代码示例。如果您正苦于以下问题:Java SAMFileHeader.getSequenceDictionary方法的具体用法?Java SAMFileHeader.getSequenceDictionary怎么用?Java SAMFileHeader.getSequenceDictionary使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在htsjdk.samtools.SAMFileHeader的用法示例。


在下文中一共展示了SAMFileHeader.getSequenceDictionary方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: VariantCallingEngine

import htsjdk.samtools.SAMFileHeader; //导入方法依赖的package包/类
/**
 *  constructor
 * @param options
 * @param samFileHeader
 */
public VariantCallingEngine(GenotyperOptions options, SAMFileHeader samFileHeader) {
    this.options = options;
    genomeLocationParser = new GenomeLocationParser(samFileHeader.getSequenceDictionary());
    samples = new HashSet<>();
    for(SAMReadGroupRecord rg : samFileHeader.getReadGroups()) {
        samples.add(rg.getSample());
    }

    GenotypeLikelihoodCalculator.getGenotypeLikelihoodsCalculatorObject(options);
    GenotypeLikelihoodCalculator.getCalculators(options);

    this.N = samples.size() * options.getSamplePloidy();
    log10AlleleFrequencyPriorsSNPs = new double[N+1];
    log10AlleleFrequencyPriorsIndels = new double[N+1];
    computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, options.getHeterozygosity());
    computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, options.getIndelHeterozygosity());
    filter.add(LOW_QUAL_FILTER_NAME);
    tracker = new VariantDataTracker();

    annotationEngine = new VariantAnnotatorEngine(options.getAnnotationGroups(), options.getAnnotations(), null);
}
 
开发者ID:BGI-flexlab,项目名称:SOAPgaea,代码行数:27,代码来源:VariantCallingEngine.java

示例2: setupPipeline

import htsjdk.samtools.SAMFileHeader; //导入方法依赖的package包/类
@Override
protected void setupPipeline(final Pipeline pipeline) {
    final ReadsDataflowSource readsSource = new ReadsDataflowSource(bam, pipeline);
    final SAMFileHeader header = readsSource.getHeader();
    final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
    final List<SimpleInterval> intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(sequenceDictionary):
            IntervalUtils.getAllIntervalsForReference(sequenceDictionary);

    final PCollectionView<SAMFileHeader> headerPcolView = pipeline.apply(Create.of(header)).apply(View.<SAMFileHeader>asSingleton());

    final PCollection<GATKRead> preads = readsSource.getReadPCollection(intervals);

    final OpticalDuplicateFinder finder = opticalDuplicatesArgumentCollection.READ_NAME_REGEX != null ?
        new OpticalDuplicateFinder(opticalDuplicatesArgumentCollection.READ_NAME_REGEX, opticalDuplicatesArgumentCollection.OPTICAL_DUPLICATE_PIXEL_DISTANCE, null) : null;
    final PCollectionView<OpticalDuplicateFinder> finderPcolView = pipeline.apply(Create.of(finder)).apply(View.<OpticalDuplicateFinder>asSingleton());

    final PCollection<GATKRead> results = preads.apply(new MarkDuplicates(headerPcolView, finderPcolView));

    // TODO: support writing large output files (need a sharded BAM writer)
    SmallBamWriter.writeToFile(pipeline, results, header, outputFile);

    if (metricsFile != null) {
        final PCollection<KV<String,DuplicationMetrics>> metrics = results.apply(new MarkDuplicatesDataflowUtils.GenerateMetricsTransform(headerPcolView));
        MarkDuplicatesDataflowUtils.writeMetricsToFile(pipeline, metrics, header, metricsFile);
    }
}
 
开发者ID:broadinstitute,项目名称:gatk-dataflow,代码行数:27,代码来源:MarkDuplicatesDataflow.java

示例3: setupPipeline

import htsjdk.samtools.SAMFileHeader; //导入方法依赖的package包/类
@Override
protected void setupPipeline(Pipeline pipeline) {
    if (readArguments.getReadFilesNames().size()>1) {
        throw new UserException("Sorry, we only support a single input file for now.");
    }
    final String filename = readArguments.getReadFilesNames().get(0);
    final ReadsDataflowSource readsSource = new ReadsDataflowSource(filename, pipeline);
    final SAMFileHeader header = readsSource.getHeader();
    final PCollectionView<SAMFileHeader> headerView = pipeline.apply(Create.of(header)).apply(View.asSingleton());
    final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
    final List<SimpleInterval> intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(sequenceDictionary)
            : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
    final PCollectionView<BaseRecalOutput> recalInfoSingletonView = BaseRecalOutputSource.loadFileOrRemote(pipeline, BQSR_RECAL_FILE_NAME).apply(View.asSingleton());
    final PCollection<GATKRead> output = readsSource.getReadPCollection(intervals, ValidationStringency.SILENT, false)
            .apply(new ApplyBQSRTransform(headerView, recalInfoSingletonView, bqsrOpts));
    intermediateRemoteBam = OUTPUT;
    if (needsIntermediateCopy()) {
        // The user specified remote execution and provided a local file name. So we're going to have to save to remote storage as a go-between.
        // Note that this may require more permissions
        intermediateRemoteBam = BucketUtils.randomRemotePath(stagingLocation, "temp-applyBqsr-output-", ".bam");
        logger.info("Staging results at " + intermediateRemoteBam);
    }
    SmallBamWriter.writeToFile(pipeline, output, header, intermediateRemoteBam);
}
 
开发者ID:broadinstitute,项目名称:gatk-dataflow,代码行数:25,代码来源:ApplyBQSRDataflow.java

示例4: IndelRealigner

import htsjdk.samtools.SAMFileHeader; //导入方法依赖的package包/类
public IndelRealigner(SAMFileHeader mHeader, ArrayList<VariantContext> variants, Window win,
		ChromosomeInformationShare chrInfo, RealignerOptions option) {
	this.parser = new GenomeLocationParser(mHeader.getSequenceDictionary());
	this.variants = variants;
	this.chrInfo = chrInfo;
	this.option = option;
	this.win = win;
	initialization();
}
 
开发者ID:BGI-flexlab,项目名称:SOAPgaea,代码行数:10,代码来源:IndelRealigner.java

示例5: IdentifyRegionsCreator

import htsjdk.samtools.SAMFileHeader; //导入方法依赖的package包/类
public IdentifyRegionsCreator(RealignerOptions option, ArrayList<GaeaSamRecord> records, SAMFileHeader mHeader,
		ChromosomeInformationShare chr, ArrayList<VariantContext> knowIndels) {
	this.records = records;
	this.knowIndels = knowIndels;
	this.parser = new GenomeLocationParser(mHeader.getSequenceDictionary());
	this.chr = chr;
	this.option = option;
	this.intervals = new ArrayList<GenomeLocation>();
	maxIntervalSize = option.getMaxInterval();
}
 
开发者ID:BGI-flexlab,项目名称:SOAPgaea,代码行数:11,代码来源:IdentifyRegionsCreator.java

示例6: setupPipeline

import htsjdk.samtools.SAMFileHeader; //导入方法依赖的package包/类
@Override
protected final void setupPipeline(Pipeline pipeline) {
    final ReadsDataflowSource readsDataflowSource = new ReadsDataflowSource(bam, pipeline);
    final SAMFileHeader header = readsDataflowSource.getHeader();
    final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
    final List<SimpleInterval> intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(sequenceDictionary):
            getAllIntervalsForReference(sequenceDictionary);

    final PCollection<GATKRead> preads = readsDataflowSource.getReadPCollection(intervals, ValidationStringency.SILENT, false);

    final PCollection<?> presult = applyTransformsToPipeline(header, preads);

    final PCollection<String> pstrings = presult.apply(DataflowUtils.convertToString());
    pstrings.apply(TextIO.Write.to(outputFile));
}
 
开发者ID:broadinstitute,项目名称:gatk-dataflow,代码行数:16,代码来源:DataflowReadsPipeline.java

示例7: testLocalFile

import htsjdk.samtools.SAMFileHeader; //导入方法依赖的package包/类
@Test(enabled = false)
public void testLocalFile() {
    final String bam2 = "src/test/resources/org/broadinstitute/hellbender/tools/BQSR/HiSeq.1mb.1RG.2k_lines.alternate.bam";
    Pipeline pipeline = GATKTestPipeline.create();
    DataflowUtils.registerGATKCoders(pipeline);
    ReadsDataflowSource readsSource = new ReadsDataflowSource(bam2, pipeline);
    SAMFileHeader header = readsSource.getHeader();
    final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
    final List<SimpleInterval> intervals = IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
    PCollection<GATKRead> reads = readsSource.getReadPCollection(intervals, ValidationStringency.SILENT, true);
    PCollection<Long> count = reads.apply(Count.globally());
    // for now we only get 1649, because it removes unmapped reads.
    DataflowAssert.thatSingleton(count).isEqualTo(1674L);
    pipeline.run();
}
 
开发者ID:broadinstitute,项目名称:gatk-dataflow,代码行数:16,代码来源:ReadsDataflowSourceTest.java

示例8: testGetInvalidPCollectionLocal

import htsjdk.samtools.SAMFileHeader; //导入方法依赖的package包/类
@Test( enabled = false)
public void testGetInvalidPCollectionLocal() {
    // ValidationStringency.SILENT should prevent any read error even though the input has what looks like invalid reads.
    Pipeline p = GATKTestPipeline.create();
    ReadsDataflowSource source = new ReadsDataflowSource(hiSeqBam, p);
    SAMFileHeader header = source.getHeader();
    final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
    DataflowUtils.registerGATKCoders(p);
    PCollection<GATKRead> reads = source.getReadPCollection(IntervalUtils.getAllIntervalsForReference(sequenceDictionary), ValidationStringency.SILENT, false);
    PCollection<Long> count = reads.apply(Count.globally());
    // There are 1677 total reads in this file
    DataflowAssert.thatSingleton(count).isEqualTo(1677L);
    p.run();
}
 
开发者ID:broadinstitute,项目名称:gatk-dataflow,代码行数:15,代码来源:ReadsDataflowSourceTest.java

示例9: makeAnnotatedCall

import htsjdk.samtools.SAMFileHeader; //导入方法依赖的package包/类
protected VariantContext makeAnnotatedCall(byte[] ref, SimpleInterval refLoc, FeatureContext tracker, SAMFileHeader header, VariantContext mergedVC, ReadLikelihoods<Allele> readAlleleLikelihoods, VariantContext call) {
    final SimpleInterval locus = new SimpleInterval(mergedVC.getContig(), mergedVC.getStart(), mergedVC.getEnd());
    final SimpleInterval refLocInterval= new SimpleInterval(refLoc);
    final ReferenceDataSource refData = new ReferenceMemorySource(new ReferenceBases(ref, refLocInterval), header.getSequenceDictionary());
    final ReferenceContext referenceContext = new ReferenceContext(refData, locus, refLocInterval);

    final VariantContext untrimmedResult =  annotationEngine.annotateContext(call, tracker, referenceContext, readAlleleLikelihoods, a -> true);
    return call.getAlleles().size() == mergedVC.getAlleles().size() ? untrimmedResult
            : GATKVariantContextUtils.reverseTrimAlleles(untrimmedResult);
}
 
开发者ID:broadinstitute,项目名称:gatk-protected,代码行数:11,代码来源:HaplotypeCallerGenotypingEngine.java

示例10: importIntoHdfs

import htsjdk.samtools.SAMFileHeader; //导入方法依赖的package包/类
public boolean importIntoHdfs(String weburl, FileSystem fileSystem,
		String path) throws IOException {

	// check if bai is available else download whole file

	final SamReader reader = SamReaderFactory.makeDefault().validationStringency(htsjdk.samtools.ValidationStringency.SILENT).open(
			SamInputResource.of(new URL(weburl)).index(
					new URL(weburl + ".bai")));

	// path in hdfs
	String[] tiles = weburl.split("/");
	String name = tiles[tiles.length - 1];

	String target = HdfsUtil.path(path, name);

	SAMFileHeader header = reader.getFileHeader();
	SAMSequenceDictionary seqDictionary = header.getSequenceDictionary();

	String referenceName = null;

	for (SAMSequenceRecord record : seqDictionary.getSequences()) {

		if (record.getSequenceLength() == REFERENCE_LENGTH) {
			referenceName = record.getSequenceName();
		}
	}

	if (referenceName == null) {
		reader.close();
		error = "No mitochondrial contig found in " + weburl + ".";
		return false;
	}

	FSDataOutputStream out = fileSystem.create(new Path(target));
	SAMFileWriter writer = new SAMFileWriterFactory().makeBAMWriter(
			reader.getFileHeader(), true, out);

	SAMRecordIterator reads = reader.query(referenceName, 0, 0, false);
	int good = 0;
	int bad = 0;
	int written = 0;
	SAMRecord read = null;
	while (reads.hasNext()) {
		try { // hansi style solution TODO!
			read = reads.next();
			good++;
		} catch (Exception e) {
			// e.printStackTrace(s);
			bad++;
		}
		writer.addAlignment(read);
		written++;
	}

	writer.close();
	reader.close();

	System.out.println("Bad reads: " + bad);
	System.out.println("Good reads: " + good);
	System.out.println("Written reads: " + written);
	
	return true;
}
 
开发者ID:seppinho,项目名称:mutation-server,代码行数:64,代码来源:ImporterBamHttp.java

示例11: bamSpanForIntervals

import htsjdk.samtools.SAMFileHeader; //导入方法依赖的package包/类
@NotNull
private static BAMFileSpan bamSpanForIntervals(@NotNull final File index, @NotNull final SAMFileHeader header,
        @NotNull final QueryInterval[] intervals) {
    final BAMIndex bamIndex = new DiskBasedBAMFileIndex(index, header.getSequenceDictionary(), false);
    return BAMFileReader.getFileSpan(intervals, bamIndex);
}
 
开发者ID:hartwigmedical,项目名称:hmftools,代码行数:7,代码来源:BamSlicerApplication.java

示例12: setupPipeline

import htsjdk.samtools.SAMFileHeader; //导入方法依赖的package包/类
@Override
protected void setupPipeline( Pipeline pipeline ) {
    // Load the reads.
    final ReadsDataflowSource readsDataflowSource = new ReadsDataflowSource(bam, pipeline);
    final SAMFileHeader readsHeader = readsDataflowSource.getHeader();
    final List<SimpleInterval> intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(readsHeader.getSequenceDictionary())
            : IntervalUtils.getAllIntervalsForReference(readsHeader.getSequenceDictionary());

    final PCollectionView<SAMFileHeader> headerSingleton = ReadsDataflowSource.getHeaderView(pipeline, readsHeader);
    final PCollection<GATKRead> initialReads = readsDataflowSource.getReadPCollection(intervals);

    final OpticalDuplicateFinder finder = opticalDuplicatesArgumentCollection.READ_NAME_REGEX != null ?
            new OpticalDuplicateFinder(opticalDuplicatesArgumentCollection.READ_NAME_REGEX, opticalDuplicatesArgumentCollection.OPTICAL_DUPLICATE_PIXEL_DISTANCE, null) : null;
    final PCollectionView<OpticalDuplicateFinder> finderPcolView = pipeline.apply(Create.of(finder)).apply(View.<OpticalDuplicateFinder>asSingleton());

    // Apply MarkDuplicates to produce updated GATKReads.
    final PCollection<GATKRead> markedReads = initialReads.apply(new MarkDuplicates(headerSingleton, finderPcolView));

    // Load the Variants and the Reference and join them to reads.
    final VariantsDataflowSource variantsDataflowSource = new VariantsDataflowSource(baseRecalibrationKnownVariants, pipeline);

    // Use the BQSR_REFERENCE_WINDOW_FUNCTION so that the reference bases required by BQSR for each read are fetched
    final ReferenceMultiSource referenceDataflowSource = new ReferenceMultiSource(pipeline.getOptions(), referenceURL, BaseRecalibrationEngine.BQSR_REFERENCE_WINDOW_FUNCTION);

    final PCollection<KV<GATKRead, ReadContextData>> readsWithContext = AddContextDataToRead.add(markedReads, referenceDataflowSource, variantsDataflowSource);

    // BQSR.
    // default arguments are best practice.
    RecalibrationArgumentCollection recalArgs = new RecalibrationArgumentCollection();
    final SAMSequenceDictionary readsDictionary = readsHeader.getSequenceDictionary();
    final SAMSequenceDictionary refDictionary = referenceDataflowSource.getReferenceSequenceDictionary(readsDictionary);
    checkSequenceDictionaries(refDictionary, readsDictionary);
    PCollectionView<SAMSequenceDictionary> refDictionaryView = pipeline.apply(Create.of(refDictionary)).setName("refDictionary").apply(View.asSingleton());
    BaseRecalibratorTransform baseRecalibrator = new BaseRecalibratorTransform(headerSingleton, refDictionaryView, recalArgs);
    final PCollection<BaseRecalOutput> recalibrationReports = readsWithContext.apply(baseRecalibrator).apply(baseRecalibrator.toBaseRecalOutput());
    final PCollectionView<BaseRecalOutput> mergedRecalibrationReport = recalibrationReports.apply(View.<BaseRecalOutput>asSingleton());

    final ApplyBQSRArgumentCollection applyArgs = new ApplyBQSRArgumentCollection();
    final PCollection<GATKRead> finalReads = markedReads.apply(new ApplyBQSRTransform(headerSingleton, mergedRecalibrationReport, applyArgs));
    SmallBamWriter.writeToFile(pipeline, finalReads, readsHeader, output);
}
 
开发者ID:broadinstitute,项目名称:gatk-dataflow,代码行数:42,代码来源:ReadsPreprocessingPipeline.java


注:本文中的htsjdk.samtools.SAMFileHeader.getSequenceDictionary方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。