本文整理汇总了Java中htsjdk.samtools.reference.ReferenceSequence.getBases方法的典型用法代码示例。如果您正苦于以下问题:Java ReferenceSequence.getBases方法的具体用法?Java ReferenceSequence.getBases怎么用?Java ReferenceSequence.getBases使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类htsjdk.samtools.reference.ReferenceSequence
的用法示例。
在下文中一共展示了ReferenceSequence.getBases方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getReferenceBase
import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
private char getReferenceBase(long pos)
throws ArrayIndexOutOfBoundsException, IOException {
int refID = (int) (pos >> 32);
if (refID == cachedRefID) {
return (char) cachedRefSequence[(int) pos];
}
SAMSequenceRecord seq = secramHeader.getSamFileHeader().getSequence(
refID);
ReferenceSequence rs = mRsf.getSequence(seq.getSequenceName());
if (rs == null || rs.length() != seq.getSequenceLength()) {
System.err.println("Could not find the reference sequence "
+ seq.getSequenceName() + " in the file");
throw new IOException("No such sequence in file");
}
cachedRefID = refID;
cachedRefSequence = rs.getBases();
return (char) cachedRefSequence[(int) pos];
}
示例2: getReferenceBase
import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
/**
* Get the reference base of the position.
* @param pos
* The absolute position. Chromosome ID is in the higher 32 bits,
* and chromosome position is in the lower 32 bits.
* @throws ArrayIndexOutOfBoundsException
* @throws IOException
*/
private char getReferenceBase(long pos)
throws ArrayIndexOutOfBoundsException, IOException {
int refID = (int) (pos >> 32);
if (refID == cachedRefID) {
return (char) cachedRefSequence[(int) pos];
}
SAMSequenceRecord seq = mSAMFileHeader.getSequence(refID);
ReferenceSequence rs = mRsf.getSequence(seq.getSequenceName());
if (rs == null || rs.length() != seq.getSequenceLength()) {
System.err.println("Could not find the reference sequence "
+ seq.getSequenceName() + " in the file");
throw new IOException("No such sequence in file");
}
cachedRefID = refID;
cachedRefSequence = rs.getBases();
return (char) cachedRefSequence[(int) pos];
}
示例3: checkOutputGCContent
import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
private void checkOutputGCContent(final ReferenceFileSource reference, final SimpleInterval interval, final double observed) {
final ReferenceSequence sequence = reference.queryAndPrefetch(interval);
int total = 0;
int gc = 0;
for (final byte base : sequence.getBases()) {
switch (Character.toLowerCase(base)) {
case 'g':
case 'c':
gc++; total++; break;
case 'a':
case 't':
case 'u':
total++; break;
default:
}
}
final double expectedGCContent = total == 0 ? Double.NaN : ((double) gc / (double) total);
if (Double.isNaN(expectedGCContent)) {
Assert.assertTrue(Double.isNaN(observed));
} else {
Assert.assertEquals(observed, expectedGCContent, 0.0001);
}
}
示例4: makeSequenceRecord
import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
/**
* Create one SAMSequenceRecord from a single fasta sequence
*/
private SAMSequenceRecord makeSequenceRecord(final ReferenceSequence refSeq) {
final SAMSequenceRecord ret = new SAMSequenceRecord(refSeq.getName(), refSeq.length());
// Compute MD5 of upcased bases
final byte[] bases = refSeq.getBases();
for (int i = 0; i < bases.length; ++i) {
bases[i] = StringUtil.toUpperCase(bases[i]);
}
ret.setAttribute(SAMSequenceRecord.MD5_TAG, md5Hash(bases));
if (GENOME_ASSEMBLY != null) {
ret.setAttribute(SAMSequenceRecord.ASSEMBLY_TAG, GENOME_ASSEMBLY);
}
ret.setAttribute(SAMSequenceRecord.URI_TAG, URI);
if (SPECIES != null) {
ret.setAttribute(SAMSequenceRecord.SPECIES_TAG, SPECIES);
}
return ret;
}
示例5: calculateRefWindowsByGc
import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
public static int[] calculateRefWindowsByGc(final int windows, final File referenceSequence, final int windowSize) {
final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(referenceSequence);
ReferenceSequence ref;
final int [] windowsByGc = new int [windows];
while ((ref = refFile.nextSequence()) != null) {
final byte[] refBases = ref.getBases();
StringUtil.toUpperCase(refBases);
final int refLength = refBases.length;
final int lastWindowStart = refLength - windowSize;
final CalculateGcState state = new GcBiasUtils().new CalculateGcState();
for (int i = 1; i < lastWindowStart; ++i) {
final int windowEnd = i + windowSize;
final int gcBin = calculateGc(refBases, i, windowEnd, state);
if (gcBin != -1) windowsByGc[gcBin]++;
}
}
return windowsByGc;
}
示例6: addReference
import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
/**
* This loads a FASTA file into the memory, can load multi-FASTA files, can call multiple times
*
* @param filename FASTA file with fai index
*/
public void addReference(String filename) {
File f = new File(filename);
FastaSequenceFile fa = new FastaSequenceFile(f, true);
ReferenceSequence seq = fa.nextSequence();
while (seq != null) {
ChrString name = new ChrString(seq.getName());
byte[] seq_bytes = seq.getBases();
if (seq_bytes == null) {
log.error("Contig error: " + seq.getName());
} else {
log.info("Read ref: " + seq.getName() + ":" + name);
if (!data.containsKey(name)) {
Sequence contig = new Sequence(seq.getName(),
seq_bytes, seq_bytes.length);
data.put(name, contig);
} else {
log.warn("Duplicate Key!");
}
}
seq = fa.nextSequence();
}
}
示例7: tryToAddVariant
import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
/**
* utility function to attempt to add a variant. Checks that the reference allele still matches the reference (which may have changed)
*
* @param vc new {@link VariantContext}
* @param refSeq {@link ReferenceSequence} of new reference
* @param source the original {@link VariantContext} to use for putting the original location information into vc
* @return true if successful, false if failed due to mismatching reference allele.
*/
private void tryToAddVariant(final VariantContext vc, final ReferenceSequence refSeq, final VariantContext source) {
if (!refSeq.getName().equals(vc.getContig())) {
throw new IllegalStateException("The contig of the VariantContext, " + vc.getContig() + ", doesnt match the ReferenceSequence: " + refSeq.getName());
}
// Check that the reference allele still agrees with the reference sequence
boolean mismatchesReference = false;
for (final Allele allele : vc.getAlleles()) {
if (allele.isReference()) {
final byte[] ref = refSeq.getBases();
final String refString = StringUtil.bytesToString(ref, vc.getStart() - 1, vc.getEnd() - vc.getStart() + 1);
if (!refString.equalsIgnoreCase(allele.getBaseString())) {
// consider that the ref and the alt may have been swapped in a simple biallelic SNP
if (vc.isBiallelic() && vc.isSNP() &&
refString.equalsIgnoreCase(vc.getAlternateAllele(0).getBaseString())) {
sorter.add(LiftoverUtils.swapRefAlt(vc, TAGS_TO_REVERSE, TAGS_TO_DROP));
return;
}
mismatchesReference = true;
}
break;
}
}
if (mismatchesReference) {
rejects.add(new VariantContextBuilder(source)
.filter(FILTER_MISMATCHING_REF_ALLELE)
.attribute(ATTEMPTED_LOCUS, String.format("%s:%d-%d", vc.getContig(), vc.getStart(), vc.getEnd()))
.make());
failedAlleleCheck++;
trackLiftedVariantContig(rejectsByContig, source.getContig());
} else {
trackLiftedVariantContig(liftedBySourceContig, source.getContig());
trackLiftedVariantContig(liftedByDestContig, vc.getContig());
sorter.add(vc);
}
}
示例8: acceptRecord
import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
@Override
public void acceptRecord(final GcBiasCollectorArgs args) {
final SAMRecord rec = args.getRec();
if (logCounter < 100 && rec.getReadBases().length == 0) {
log.warn("Omitting read " + rec.getReadName() + " with '*' in SEQ field.");
if (++logCounter == 100) {
log.warn("There are more than 100 reads with '*' in SEQ field in file.");
}
return;
}
if (!rec.getReadUnmappedFlag()) {
if (referenceIndex != rec.getReferenceIndex() || gc == null) {
final ReferenceSequence ref = args.getRef();
refBases = ref.getBases();
StringUtil.toUpperCase(refBases);
final int refLength = refBases.length;
final int lastWindowStart = refLength - scanWindowSize;
gc = GcBiasUtils.calculateAllGcs(refBases, lastWindowStart, scanWindowSize);
referenceIndex = rec.getReferenceIndex();
}
addReadToGcData(rec, this.gcData);
if (ignoreDuplicates && !rec.getDuplicateReadFlag()) {
addReadToGcData(rec, this.gcDataNonDups);
}
} else {
updateTotalClusters(rec, this.gcData);
if (ignoreDuplicates && !rec.getDuplicateReadFlag()) {
updateTotalClusters(rec, this.gcDataNonDups);
}
}
}
示例9: doWork
import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
@Override
protected int doWork() {
IOUtil.assertFileIsReadable(INTERVAL_LIST);
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
IOUtil.assertFileIsWritable(OUTPUT);
final IntervalList intervals = IntervalList.fromFile(INTERVAL_LIST);
final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
SequenceUtil.assertSequenceDictionariesEqual(intervals.getHeader().getSequenceDictionary(), ref.getSequenceDictionary());
final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);
for (final Interval interval : intervals) {
final ReferenceSequence seq = ref.getSubsequenceAt(interval.getContig(), interval.getStart(), interval.getEnd());
final byte[] bases = seq.getBases();
if (interval.isNegativeStrand()) SequenceUtil.reverseComplement(bases);
try {
out.write(">");
out.write(interval.getName());
out.write("\n");
for (int i=0; i<bases.length; ++i) {
if (i > 0 && i % LINE_LENGTH == 0) out.write("\n");
out.write(bases[i]);
}
out.write("\n");
}
catch (IOException ioe) {
throw new PicardException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe);
}
}
CloserUtil.close(out);
return 0;
}
示例10: getReferenceBases
import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
@Override
public ReferenceBases getReferenceBases(final SimpleInterval interval) throws IOException {
try ( ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(getReferencePath()) ) {
ReferenceSequence sequence = referenceSequenceFile.getSubsequenceAt(interval.getContig(), interval.getStart(), interval.getEnd());
return new ReferenceBases(sequence.getBases(), interval);
}
}
示例11: main
import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
public static void main(String[] args) {
ReferenceSequenceFile rsf = ReferenceSequenceFileFactory
.getReferenceSequenceFile(new File("data/hs37d5.fa"));
SAMSequenceDictionary dict = rsf.getSequenceDictionary();
ReferenceSequence rs1 = rsf.nextSequence(), rs2 = rsf.nextSequence();
ReferenceSequence rs = rsf.getSequence("1"); // get reference sequence
// by sequence name
System.out.println(rs.getName());
System.out.println(rs.getContigIndex());
System.out.println(rs.length());
System.out.println(rs.getBases().length);
System.out.println("===============");
byte[] bases = rs.getBases();
int count[] = new int[256];
for (int i = 0; i < bases.length; i++) {
count[bases[i]]++;
}
for (int i = 0; i < count.length; i++) {
System.out.println((char) i + " " + count[i]);
}
rs = rsf.getSequence("22");
System.out.println(rs.getName());
System.out.println(rs.getContigIndex());
System.out.println(rs.length());
System.out.println(rs.getBases().length);
System.out.println("===============");
rs = rsf.getSequence("X");
System.out.println(rs.getName());
System.out.println(rs.getContigIndex());
System.out.println(rs.length());
System.out.println(rs.getBases().length);
System.out.println(rs.getBases()[rs.length() - 1]);
rs = rsf.getSubsequenceAt("X", 0, 2);
System.out.println(rs.getName());
System.out.println(rs.getContigIndex());
System.out.println(rs.length());
System.out.println(rs.getBases().length);
System.out.println(String.format("%02x", rs.getBases()[0]));
// for (int i=0;i<bases.length;++i) {
// int unsigned = bases[i]&0xff;
//
// System.out.println("\""+(char)(unsigned)+"\" ("+unsigned+")");
// }
}
示例12: acceptRecord
import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
public void acceptRecord(final SAMRecordAndReference args) {
mappedRecordCount++;
final SAMRecord samRecord = args.getSamRecord();
final ReferenceSequence referenceSequence = args.getReferenceSequence();
final byte[] readBases = samRecord.getReadBases();
final byte[] readQualities = samRecord.getBaseQualities();
final byte[] refBases = referenceSequence.getBases();
if (samRecord.getReadLength() < minReadLength) {
smallReadCount++;
return;
} else if (SequenceUtil.countMismatches(samRecord, refBases, true) > Math.round(samRecord.getReadLength() * maxMismatchRate)) {
mismatchCount++;
return;
}
// We only record non-CpG C sites if there was at least one CpG in the read, keep track of
// the values for this record and then apply to the global totals if valid
int recordCpgs = 0;
for (final AlignmentBlock alignmentBlock : samRecord.getAlignmentBlocks()) {
final int blockLength = alignmentBlock.getLength();
final int refFragmentStart = alignmentBlock.getReferenceStart() - 1;
final int readFragmentStart = alignmentBlock.getReadStart() - 1;
final byte[] refFragment = getFragment(refBases, refFragmentStart, blockLength);
final byte[] readFragment = getFragment(readBases, readFragmentStart, blockLength);
final byte[] readQualityFragment = getFragment(readQualities, readFragmentStart, blockLength);
if (samRecord.getReadNegativeStrandFlag()) {
// In the case of a negative strand, reverse (and complement for base arrays) the reference,
// reads & qualities so that it can be treated as a positive strand for the rest of the process
SequenceUtil.reverseComplement(refFragment);
SequenceUtil.reverseComplement(readFragment);
SequenceUtil.reverseQualities(readQualityFragment);
}
for (int i=0; i < blockLength-1; i++) {
final int curRefIndex = getCurRefIndex(refFragmentStart, blockLength, i, samRecord.getReadNegativeStrandFlag());
// Look at a 2-base window to see if we're on a CpG site, and if so check for conversion
// (CG -> TG). We do not consider ourselves to be on a CpG site if we're on the last base of a read
if ((SequenceUtil.basesEqual(refFragment[i], SequenceUtil.C)) &&
(SequenceUtil.basesEqual(refFragment[i+1], SequenceUtil.G))) {
// We want to catch the case where there's a CpG in the reference, even if it is not valid
// to prevent the C showing up as a non-CpG C down below. Otherwise this could have been all
// in one if statement
if (isValidCpg(refFragment, readFragment, readQualityFragment, i)) {
recordCpgs++;
final CpgLocation curLocation = new CpgLocation(samRecord.getReferenceName(), curRefIndex);
cpgTotal.increment(curLocation);
if (SequenceUtil.isBisulfiteConverted(readFragment[i], refFragment[i])) {
cpgConverted.increment(curLocation);
}
}
i++;
} else if (isC(refFragment[i], readFragment[i]) && isAboveCytoQcThreshold(readQualities, i) &&
SequenceUtil.bisulfiteBasesEqual(false, readFragment[i+1], refFragment[i+1])) {
// C base in the reference that's not associated with a CpG
nCytoTotal++;
if (SequenceUtil.isBisulfiteConverted(readFragment[i], refFragment[i])) {
nCytoConverted++;
}
}
}
}
if (recordCpgs == 0) {
noCpgCount++;
}
}
示例13: doWork
import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
@Override
protected int doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
if (INPUT.getAbsoluteFile().equals(OUTPUT.getAbsoluteFile())) {
throw new IllegalArgumentException("Input and output cannot be the same file.");
}
final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(INPUT, TRUNCATE_SEQUENCE_NAMES_AT_WHITESPACE);
final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);
ReferenceSequence seq = null;
while ((seq = ref.nextSequence()) != null) {
final String name = seq.getName();
final byte[] bases = seq.getBases();
try {
out.write(">");
out.write(name);
out.newLine();
if (bases.length == 0) {
log.warn("Sequence " + name + " contains 0 bases.");
}
else {
for (int i=0; i<bases.length; ++i) {
if (i > 0 && i % LINE_LENGTH == 0) out.write("\n");
out.write(bases[i]);
}
out.write("\n");
}
}
catch (IOException ioe) {
throw new PicardException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe);
}
}
try {
out.close();
} catch (IOException e) {
throw new RuntimeIOException(e);
}
return 0;
}
示例14: doWork
import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
@Override
protected int doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
// set up the reference and a mask so that we only count the positions requested by the user
final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(INPUT);
final ReferenceSequenceMask referenceSequenceMask;
if (INTERVALS != null) {
IOUtil.assertFileIsReadable(INTERVALS);
final IntervalList intervalList = IntervalList.fromFile(INTERVALS);
referenceSequenceMask = new IntervalListReferenceSequenceMask(intervalList);
} else {
final SAMFileHeader header = new SAMFileHeader();
header.setSequenceDictionary(ref.getSequenceDictionary());
referenceSequenceMask = new WholeGenomeReferenceSequenceMask(header);
}
long nonNbases = 0L;
for (final SAMSequenceRecord rec : ref.getSequenceDictionary().getSequences()) {
// pull out the contig and set up the bases
final ReferenceSequence sequence = ref.getSequence(rec.getSequenceName());
final byte[] bases = sequence.getBases();
StringUtil.toUpperCase(bases);
for (int i = 0; i < bases.length; i++) {
// only investigate this position if it's within our mask
if (referenceSequenceMask.get(sequence.getContigIndex(), i+1)) {
nonNbases += bases[i] == SequenceUtil.N ? 0 : 1;
}
}
}
try {
final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);
out.write(nonNbases + "\n");
out.close();
}
catch (IOException ioe) {
throw new PicardException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe);
}
return 0;
}
示例15: getReferenceBases
import htsjdk.samtools.reference.ReferenceSequence; //导入方法依赖的package包/类
@Override
public ReferenceBases getReferenceBases(final SimpleInterval interval) {
ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(IOUtils.getPath(referencePath));
ReferenceSequence sequence = referenceSequenceFile.getSubsequenceAt(interval.getContig(), interval.getStart(), interval.getEnd());
return new ReferenceBases(sequence.getBases(), interval);
}