当前位置: 首页>>代码示例>>Java>>正文


Java ReferenceSequence.getBases方法代码示例

本文整理汇总了Java中htsjdk.samtools.reference.ReferenceSequence.getBases方法的典型用法代码示例。如果您正苦于以下问题:Java ReferenceSequence.getBases方法的具体用法?Java ReferenceSequence.getBases怎么用?Java ReferenceSequence.getBases使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在htsjdk.samtools.reference.ReferenceSequence的用法示例。


在下文中一共展示了ReferenceSequence.getBases方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getReferenceBase

import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
private char getReferenceBase(long pos)
		throws ArrayIndexOutOfBoundsException, IOException {
	int refID = (int) (pos >> 32);
	if (refID == cachedRefID) {
		return (char) cachedRefSequence[(int) pos];
	}
	SAMSequenceRecord seq = secramHeader.getSamFileHeader().getSequence(
			refID);
	ReferenceSequence rs = mRsf.getSequence(seq.getSequenceName());

	if (rs == null || rs.length() != seq.getSequenceLength()) {
		System.err.println("Could not find the reference sequence "
				+ seq.getSequenceName() + " in the file");
		throw new IOException("No such sequence in file");
	}

	cachedRefID = refID;
	cachedRefSequence = rs.getBases();

	return (char) cachedRefSequence[(int) pos];
}
 
开发者ID:acs6610987,项目名称:secram,代码行数:22,代码来源:SECRAMIterator.java

示例2: getReferenceBase

import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
/**
 * Get the reference base of the position.
 * @param pos
 *            The absolute position. Chromosome ID is in the higher 32 bits,
 *            and chromosome position is in the lower 32 bits.
 * @throws ArrayIndexOutOfBoundsException
 * @throws IOException
 */
private char getReferenceBase(long pos)
		throws ArrayIndexOutOfBoundsException, IOException {
	int refID = (int) (pos >> 32);
	if (refID == cachedRefID) {
		return (char) cachedRefSequence[(int) pos];
	}
	SAMSequenceRecord seq = mSAMFileHeader.getSequence(refID);
	ReferenceSequence rs = mRsf.getSequence(seq.getSequenceName());

	if (rs == null || rs.length() != seq.getSequenceLength()) {
		System.err.println("Could not find the reference sequence "
				+ seq.getSequenceName() + " in the file");
		throw new IOException("No such sequence in file");
	}

	cachedRefID = refID;
	cachedRefSequence = rs.getBases();

	return (char) cachedRefSequence[(int) pos];
}
 
开发者ID:acs6610987,项目名称:secram,代码行数:29,代码来源:Bam2Secram.java

示例3: checkOutputGCContent

import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
private void checkOutputGCContent(final ReferenceFileSource reference, final SimpleInterval interval, final double observed) {
    final ReferenceSequence sequence = reference.queryAndPrefetch(interval);
    int total = 0;
    int gc = 0;
    for (final byte base : sequence.getBases()) {
        switch (Character.toLowerCase(base)) {
            case 'g':
            case 'c':
                gc++; total++; break;
            case 'a':
            case 't':
            case 'u':
                total++; break;
            default:
        }
    }
    final double expectedGCContent = total == 0 ? Double.NaN : ((double) gc / (double) total);
    if (Double.isNaN(expectedGCContent)) {
        Assert.assertTrue(Double.isNaN(observed));
    } else {
        Assert.assertEquals(observed, expectedGCContent, 0.0001);
    }
}
 
开发者ID:broadinstitute,项目名称:gatk-protected,代码行数:24,代码来源:AnnotateTargetsIntegrationTest.java

示例4: makeSequenceRecord

import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
/**
 * Create one SAMSequenceRecord from a single fasta sequence
 */
private SAMSequenceRecord makeSequenceRecord(final ReferenceSequence refSeq) {
    final SAMSequenceRecord ret = new SAMSequenceRecord(refSeq.getName(), refSeq.length());

    // Compute MD5 of upcased bases
    final byte[] bases = refSeq.getBases();
    for (int i = 0; i < bases.length; ++i) {
            bases[i] = StringUtil.toUpperCase(bases[i]);
        }

    ret.setAttribute(SAMSequenceRecord.MD5_TAG, md5Hash(bases));
    if (GENOME_ASSEMBLY != null) {
        ret.setAttribute(SAMSequenceRecord.ASSEMBLY_TAG, GENOME_ASSEMBLY);
    }
    ret.setAttribute(SAMSequenceRecord.URI_TAG, URI);
    if (SPECIES != null) {
            ret.setAttribute(SAMSequenceRecord.SPECIES_TAG, SPECIES);
        }
    return ret;
}
 
开发者ID:broadinstitute,项目名称:picard,代码行数:23,代码来源:CreateSequenceDictionary.java

示例5: calculateRefWindowsByGc

import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
public static int[] calculateRefWindowsByGc(final int windows, final File referenceSequence, final int windowSize) {
    final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(referenceSequence);
    ReferenceSequence ref;

    final int [] windowsByGc = new int [windows];

    while ((ref = refFile.nextSequence()) != null) {
        final byte[] refBases = ref.getBases();
        StringUtil.toUpperCase(refBases);
        final int refLength = refBases.length;
        final int lastWindowStart = refLength - windowSize;

        final CalculateGcState state = new GcBiasUtils().new CalculateGcState();

        for (int i = 1; i < lastWindowStart; ++i) {
            final int windowEnd = i + windowSize;
            final int gcBin = calculateGc(refBases, i, windowEnd, state);
            if (gcBin != -1) windowsByGc[gcBin]++;
        }
    }

    return windowsByGc;
}
 
开发者ID:broadinstitute,项目名称:picard,代码行数:24,代码来源:GcBiasUtils.java

示例6: addReference

import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
/**
 * This loads a FASTA file into the memory, can load multi-FASTA files, can call multiple times
 *
 * @param filename FASTA file with fai index
 */
public void addReference(String filename) {
    File f = new File(filename);
    FastaSequenceFile fa = new FastaSequenceFile(f, true);
    ReferenceSequence seq = fa.nextSequence();
    while (seq != null) {
        ChrString name = new ChrString(seq.getName());
        byte[] seq_bytes = seq.getBases();

        if (seq_bytes == null) {
            log.error("Contig error: " + seq.getName());
        } else {
            log.info("Read ref: " + seq.getName() + ":" + name);
            if (!data.containsKey(name)) {
                Sequence contig = new Sequence(seq.getName(),
                        seq_bytes, seq_bytes.length);
                data.put(name, contig);
            } else {
                log.warn("Duplicate Key!");
            }
        }
        seq = fa.nextSequence();
    }
}
 
开发者ID:bioinform,项目名称:varsim,代码行数:29,代码来源:SimpleReference.java

示例7: tryToAddVariant

import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
/**
 *  utility function to attempt to add a variant. Checks that the reference allele still matches the reference (which may have changed)
 *
 * @param vc new {@link VariantContext}
 * @param refSeq {@link ReferenceSequence} of new reference
 * @param source the original {@link VariantContext} to use for putting the original location information into vc
 * @return true if successful, false if failed due to mismatching reference allele.
 */
private void tryToAddVariant(final VariantContext vc, final ReferenceSequence refSeq, final VariantContext source) {
    if (!refSeq.getName().equals(vc.getContig())) {
        throw new IllegalStateException("The contig of the VariantContext, " + vc.getContig() + ", doesnt match the ReferenceSequence: " + refSeq.getName());
    }

    // Check that the reference allele still agrees with the reference sequence
    boolean mismatchesReference = false;
    for (final Allele allele : vc.getAlleles()) {
        if (allele.isReference()) {
            final byte[] ref = refSeq.getBases();
            final String refString = StringUtil.bytesToString(ref, vc.getStart() - 1, vc.getEnd() - vc.getStart() + 1);

            if (!refString.equalsIgnoreCase(allele.getBaseString())) {
                // consider that the ref and the alt may have been swapped in a simple biallelic SNP
                if (vc.isBiallelic() && vc.isSNP() &&
                        refString.equalsIgnoreCase(vc.getAlternateAllele(0).getBaseString())) {
                    sorter.add(LiftoverUtils.swapRefAlt(vc, TAGS_TO_REVERSE, TAGS_TO_DROP));
                    return;
                }
                mismatchesReference = true;
            }
            break;
        }
    }

    if (mismatchesReference) {
        rejects.add(new VariantContextBuilder(source)
                .filter(FILTER_MISMATCHING_REF_ALLELE)
                .attribute(ATTEMPTED_LOCUS, String.format("%s:%d-%d", vc.getContig(), vc.getStart(), vc.getEnd()))
                .make());
        failedAlleleCheck++;
        trackLiftedVariantContig(rejectsByContig, source.getContig());
    } else {
        trackLiftedVariantContig(liftedBySourceContig, source.getContig());
        trackLiftedVariantContig(liftedByDestContig, vc.getContig());
        sorter.add(vc);
    }
}
 
开发者ID:broadinstitute,项目名称:picard,代码行数:47,代码来源:LiftoverVcf.java

示例8: acceptRecord

import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
@Override
public void acceptRecord(final GcBiasCollectorArgs args) {
    final SAMRecord rec = args.getRec();
    if (logCounter < 100 && rec.getReadBases().length == 0) {
        log.warn("Omitting read " + rec.getReadName() + " with '*' in SEQ field.");
        if (++logCounter == 100) {
            log.warn("There are more than 100 reads with '*' in SEQ field in file.");
        }
        return;
    }
    if (!rec.getReadUnmappedFlag()) {
        if (referenceIndex != rec.getReferenceIndex() || gc == null) {
            final ReferenceSequence ref = args.getRef();
            refBases = ref.getBases();
            StringUtil.toUpperCase(refBases);
            final int refLength = refBases.length;
            final int lastWindowStart = refLength - scanWindowSize;
            gc = GcBiasUtils.calculateAllGcs(refBases, lastWindowStart, scanWindowSize);
            referenceIndex = rec.getReferenceIndex();
        }

        addReadToGcData(rec, this.gcData);
        if (ignoreDuplicates && !rec.getDuplicateReadFlag()) {
            addReadToGcData(rec, this.gcDataNonDups);
        }
    } else {
        updateTotalClusters(rec, this.gcData);
        if (ignoreDuplicates && !rec.getDuplicateReadFlag()) {
            updateTotalClusters(rec, this.gcDataNonDups);
        }
    }
}
 
开发者ID:broadinstitute,项目名称:picard,代码行数:33,代码来源:GcBiasMetricsCollector.java

示例9: doWork

import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INTERVAL_LIST);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    IOUtil.assertFileIsWritable(OUTPUT);

    final IntervalList intervals = IntervalList.fromFile(INTERVAL_LIST);
    final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
    SequenceUtil.assertSequenceDictionariesEqual(intervals.getHeader().getSequenceDictionary(), ref.getSequenceDictionary());

    final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);

    for (final Interval interval : intervals) {
        final ReferenceSequence seq = ref.getSubsequenceAt(interval.getContig(), interval.getStart(), interval.getEnd());
        final byte[] bases = seq.getBases();
        if (interval.isNegativeStrand()) SequenceUtil.reverseComplement(bases);

        try {
            out.write(">");
            out.write(interval.getName());
            out.write("\n");

            for (int i=0; i<bases.length; ++i) {
                if (i > 0 && i % LINE_LENGTH == 0) out.write("\n");
                out.write(bases[i]);
            }

            out.write("\n");
        }
        catch (IOException ioe) {
            throw new PicardException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe);

        }
    }

    CloserUtil.close(out);

    return 0;
}
 
开发者ID:broadinstitute,项目名称:picard,代码行数:40,代码来源:ExtractSequences.java

示例10: getReferenceBases

import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
@Override
public ReferenceBases getReferenceBases(final SimpleInterval interval) throws IOException {
    try ( ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(getReferencePath()) ) {
        ReferenceSequence sequence = referenceSequenceFile.getSubsequenceAt(interval.getContig(), interval.getStart(), interval.getEnd());
        return new ReferenceBases(sequence.getBases(), interval);
    }
}
 
开发者ID:broadinstitute,项目名称:gatk,代码行数:8,代码来源:ReferenceFileSource.java

示例11: main

import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
public static void main(String[] args) {
	ReferenceSequenceFile rsf = ReferenceSequenceFileFactory
			.getReferenceSequenceFile(new File("data/hs37d5.fa"));
	SAMSequenceDictionary dict = rsf.getSequenceDictionary();
	ReferenceSequence rs1 = rsf.nextSequence(), rs2 = rsf.nextSequence();
	ReferenceSequence rs = rsf.getSequence("1"); // get reference sequence
													// by sequence name
	System.out.println(rs.getName());
	System.out.println(rs.getContigIndex());
	System.out.println(rs.length());
	System.out.println(rs.getBases().length);
	System.out.println("===============");
	byte[] bases = rs.getBases();
	int count[] = new int[256];
	for (int i = 0; i < bases.length; i++) {
		count[bases[i]]++;
	}
	for (int i = 0; i < count.length; i++) {
		System.out.println((char) i + " " + count[i]);
	}

	rs = rsf.getSequence("22");
	System.out.println(rs.getName());
	System.out.println(rs.getContigIndex());
	System.out.println(rs.length());
	System.out.println(rs.getBases().length);
	System.out.println("===============");

	rs = rsf.getSequence("X");
	System.out.println(rs.getName());
	System.out.println(rs.getContigIndex());
	System.out.println(rs.length());
	System.out.println(rs.getBases().length);
	System.out.println(rs.getBases()[rs.length() - 1]);

	rs = rsf.getSubsequenceAt("X", 0, 2);
	System.out.println(rs.getName());
	System.out.println(rs.getContigIndex());
	System.out.println(rs.length());
	System.out.println(rs.getBases().length);
	System.out.println(String.format("%02x", rs.getBases()[0]));

	// for (int i=0;i<bases.length;++i) {
	// int unsigned = bases[i]&0xff;
	//
	// System.out.println("\""+(char)(unsigned)+"\" ("+unsigned+")");
	// }
}
 
开发者ID:acs6610987,项目名称:secram,代码行数:49,代码来源:ExampleFastaRefUsage.java

示例12: acceptRecord

import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
public void acceptRecord(final SAMRecordAndReference args) {
	mappedRecordCount++;

	final SAMRecord samRecord = args.getSamRecord();
	final ReferenceSequence referenceSequence = args.getReferenceSequence();

	final byte[] readBases = samRecord.getReadBases();
	final byte[] readQualities = samRecord.getBaseQualities();
	final byte[] refBases = referenceSequence.getBases();

	if (samRecord.getReadLength() < minReadLength) {
		smallReadCount++;
		return;
	} else if (SequenceUtil.countMismatches(samRecord, refBases, true) > Math.round(samRecord.getReadLength() * maxMismatchRate)) {
		mismatchCount++;
		return;
	}

	// We only record non-CpG C sites if there was at least one CpG in the read, keep track of
	// the values for this record and then apply to the global totals if valid
	int recordCpgs = 0;

	for (final AlignmentBlock alignmentBlock : samRecord.getAlignmentBlocks()) {
		final int blockLength = alignmentBlock.getLength();
		final int refFragmentStart = alignmentBlock.getReferenceStart() - 1;
		final int readFragmentStart = alignmentBlock.getReadStart() - 1;

		final byte[] refFragment = getFragment(refBases, refFragmentStart, blockLength);
		final byte[] readFragment = getFragment(readBases, readFragmentStart, blockLength);
		final byte[] readQualityFragment = getFragment(readQualities, readFragmentStart, blockLength);

		if (samRecord.getReadNegativeStrandFlag()) {
			// In the case of a negative strand, reverse (and complement for base arrays) the reference,
			// reads & qualities so that it can be treated as a positive strand for the rest of the process
			SequenceUtil.reverseComplement(refFragment);
			SequenceUtil.reverseComplement(readFragment);
			SequenceUtil.reverseQualities(readQualityFragment);
		}

		for (int i=0; i < blockLength-1; i++) {
			final int curRefIndex = getCurRefIndex(refFragmentStart, blockLength, i, samRecord.getReadNegativeStrandFlag());

			// Look at a 2-base window to see if we're on a CpG site, and if so check for conversion
			// (CG -> TG). We do not consider ourselves to be on a CpG site if we're on the last base of a read
			if ((SequenceUtil.basesEqual(refFragment[i], SequenceUtil.C)) &&
					(SequenceUtil.basesEqual(refFragment[i+1], SequenceUtil.G))) {
				// We want to catch the case where there's a CpG in the reference, even if it is not valid
				// to prevent the C showing up as a non-CpG C down below. Otherwise this could have been all
				// in one if statement
				if (isValidCpg(refFragment, readFragment, readQualityFragment, i)) {
					recordCpgs++;
					final CpgLocation curLocation = new CpgLocation(samRecord.getReferenceName(), curRefIndex);
					cpgTotal.increment(curLocation);
					if (SequenceUtil.isBisulfiteConverted(readFragment[i], refFragment[i])) {
						cpgConverted.increment(curLocation);
					}
				}
				i++;
			} else if (isC(refFragment[i], readFragment[i]) && isAboveCytoQcThreshold(readQualities, i) &&
					SequenceUtil.bisulfiteBasesEqual(false, readFragment[i+1], refFragment[i+1])) {
				// C base in the reference that's not associated with a CpG
				nCytoTotal++;
				if (SequenceUtil.isBisulfiteConverted(readFragment[i], refFragment[i])) {
					nCytoConverted++;
				}
			}
		}
	}

	if (recordCpgs == 0) {
		noCpgCount++;
	}
}
 
开发者ID:broadinstitute,项目名称:picard,代码行数:74,代码来源:RrbsMetricsCollector.java

示例13: doWork

import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    if (INPUT.getAbsoluteFile().equals(OUTPUT.getAbsoluteFile())) {
        throw new IllegalArgumentException("Input and output cannot be the same file.");
    }

    final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(INPUT, TRUNCATE_SEQUENCE_NAMES_AT_WHITESPACE);
    final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);

    ReferenceSequence seq = null;
    while ((seq = ref.nextSequence()) != null) {
        final String name  = seq.getName();
        final byte[] bases = seq.getBases();

        try {
            out.write(">");
            out.write(name);
            out.newLine();

            if (bases.length == 0) {
                log.warn("Sequence " + name + " contains 0 bases.");
            }
            else {
                for (int i=0; i<bases.length; ++i) {
                    if (i > 0 && i % LINE_LENGTH == 0) out.write("\n");
                    out.write(bases[i]);
                }

                out.write("\n");
            }
        }
        catch (IOException ioe) {
            throw new PicardException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe);

        }
    }
    try {
        out.close();
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }
    return 0;
}
 
开发者ID:broadinstitute,项目名称:picard,代码行数:47,代码来源:NormalizeFasta.java

示例14: doWork

import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    // set up the reference and a mask so that we only count the positions requested by the user
    final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(INPUT);
    final ReferenceSequenceMask referenceSequenceMask;
    if (INTERVALS != null) {
        IOUtil.assertFileIsReadable(INTERVALS);
        final IntervalList intervalList = IntervalList.fromFile(INTERVALS);
        referenceSequenceMask = new IntervalListReferenceSequenceMask(intervalList);
    } else {
        final SAMFileHeader header = new SAMFileHeader();
        header.setSequenceDictionary(ref.getSequenceDictionary());
        referenceSequenceMask = new WholeGenomeReferenceSequenceMask(header);
    }

    long nonNbases = 0L;

    for (final SAMSequenceRecord rec : ref.getSequenceDictionary().getSequences()) {
        // pull out the contig and set up the bases
        final ReferenceSequence sequence = ref.getSequence(rec.getSequenceName());
        final byte[] bases = sequence.getBases();
        StringUtil.toUpperCase(bases);

        for (int i = 0; i < bases.length; i++) {
            // only investigate this position if it's within our mask
            if (referenceSequenceMask.get(sequence.getContigIndex(), i+1)) {
                nonNbases += bases[i] == SequenceUtil.N ? 0 : 1;
            }
        }
    }

    try {
        final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);
        out.write(nonNbases + "\n");
        out.close();
    }
    catch (IOException ioe) {
        throw new PicardException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe);
    }

    return 0;
}
 
开发者ID:broadinstitute,项目名称:picard,代码行数:46,代码来源:NonNFastaSize.java

示例15: getReferenceBases

import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
@Override
public ReferenceBases getReferenceBases(final SimpleInterval interval) {
    ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(IOUtils.getPath(referencePath));
    ReferenceSequence sequence = referenceSequenceFile.getSubsequenceAt(interval.getContig(), interval.getStart(), interval.getEnd());
    return new ReferenceBases(sequence.getBases(), interval);
}
 
开发者ID:broadinstitute,项目名称:gatk,代码行数:7,代码来源:ReferenceHadoopSource.java


注:本文中的htsjdk.samtools.reference.ReferenceSequence.getBases方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。