本文整理汇总了Java中htsjdk.samtools.reference.IndexedFastaSequenceFile类的典型用法代码示例。如果您正苦于以下问题:Java IndexedFastaSequenceFile类的具体用法?Java IndexedFastaSequenceFile怎么用?Java IndexedFastaSequenceFile使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
IndexedFastaSequenceFile类属于htsjdk.samtools.reference包,在下文中一共展示了IndexedFastaSequenceFile类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: setVariantReadInInterval
import htsjdk.samtools.reference.IndexedFastaSequenceFile; //导入依赖的package包/类
/**Set filter to extract reads containing variant at the given interval.
* from, to: 1-based coordinates (first base of chr1 is `chr1:1-1`).
* @throws SQLException
* @throws InvalidRecordException
* @throws InvalidGenomicCoordsException
* @throws IOException
* @throws ClassNotFoundException
* @throws MalformedURLException
* */
public void setVariantReadInInterval(String chrom, int from, int to, boolean variantOnly) throws MalformedURLException, ClassNotFoundException, IOException, InvalidGenomicCoordsException, InvalidRecordException, SQLException{
this.getFeatureFilter().setVariantOnly(variantOnly);
if(chrom.equals(Filter.DEFAULT_VARIANT_CHROM.getValue())){
this.getFeatureFilter().setVariantReadInInterval(chrom, from, to, null);
this.update();
return;
}
if(from > to){
System.err.println("Invalid coordinates for filter from > to: " + from + ", " + to);
throw new InvalidGenomicCoordsException();
}
IndexedFastaSequenceFile faSeqFile = new IndexedFastaSequenceFile(new File(this.getGc().getFastaFile()));
byte[] faSeq= faSeqFile.getSubsequenceAt(chrom, from, to).getBases();
faSeqFile.close();
this.getFeatureFilter().setVariantReadInInterval(chrom, from, to, faSeq);
this.update();
}
示例2: canHandleEmptySequence
import htsjdk.samtools.reference.IndexedFastaSequenceFile; //导入依赖的package包/类
@Test
public void canHandleEmptySequence() throws IOException, UnindexableFastaFileException{
File fasta= new File("test_data/faidx/empty.fa");
File fai= new File(fasta.getAbsoluteFile() + ".fai");
fai.deleteOnExit();
new Faidx(fasta);
List<String> observed= Arrays.asList(FileUtils.readFileToString(fai).split("\n"));
for(String x : observed){
if(x.startsWith("empty")){ // Empty sequences just check sequence length is 0
assertEquals("0", x.split("\t")[1]);
}
}
// Can retrieve seqs
IndexedFastaSequenceFile ref= new IndexedFastaSequenceFile(new File("test_data/faidx/empty.fa"));
assertEquals("ACTGNNNNNNNNNNNNN", new String(ref.getSubsequenceAt("seq", 1, 17).getBases()));
assertEquals("AACCGGTTNN", new String(ref.getSubsequenceAt("seq2", 1, 10).getBases()));
assertEquals("GGGAAATTTNNNCCC", new String(ref.getSubsequenceAt("seq3", 1, 15).getBases()));
ref.close();
}
示例3: OverhangFixingManager
import htsjdk.samtools.reference.IndexedFastaSequenceFile; //导入依赖的package包/类
/**
*
* @param header header for the reads
* @param writer actual writer
* @param genomeLocParser the GenomeLocParser object
* @param referenceReader the reference reader
* @param maxRecordsInMemory max records to keep in memory
* @param maxMismatchesInOverhangs max number of mismatches permitted in the overhangs before requiring clipping
* @param maxBasesInOverhangs max number of bases permitted in the overhangs before deciding not to clip
* @param doNotFixOverhangs if true, don't clip overhangs at all
* @param processSecondaryReads if true, allow secondary reads to be overhang clipped. If false, secondary reads
* will still be written out to the writer but will not be edited by the clipper.
*/
public OverhangFixingManager(final SAMFileHeader header,
final GATKReadWriter writer,
final GenomeLocParser genomeLocParser,
final IndexedFastaSequenceFile referenceReader,
final int maxRecordsInMemory,
final int maxMismatchesInOverhangs,
final int maxBasesInOverhangs,
final boolean doNotFixOverhangs,
final boolean processSecondaryReads) {
this.header = header;
this.writer = writer;
this.genomeLocParser = genomeLocParser;
this.referenceReader = referenceReader;
this.maxRecordsInMemory = maxRecordsInMemory;
this.maxMismatchesInOverhang = maxMismatchesInOverhangs;
this.maxBasesInOverhang = maxBasesInOverhangs;
this.doNotFixOverhangs = doNotFixOverhangs;
this.waitingReadGroups = new PriorityQueue<List<SplitRead>>(INITIAL_CAPACITY, new SplitReadComparator());
this.outputToFile = false;
this.mateChangedReads = new HashMap<>();
this.processSecondaryReads = processSecondaryReads;
}
示例4: testPerfectUnpairedMapping
import htsjdk.samtools.reference.IndexedFastaSequenceFile; //导入依赖的package包/类
@Test
public void testPerfectUnpairedMapping() throws Exception {
final Random rdn = new Random(13);
final IndexedFastaSequenceFile fasta = new IndexedFastaSequenceFile(fastaFile, fastaIndex);
final List<SAMRecord> inputReads = ReadTestUtils.randomErrorFreeUnpairedReads(rdn, TEST_DICTIONARY, fasta,
"read-", 0, 1000, 50, 200);
final BwaMemAligner aligner = new BwaMemAligner(index);
final List<List<BwaMemAlignment>> alignments = aligner.alignSeqs(inputReads, SAMRecord::getReadBases);
Assert.assertEquals(alignments.size(), inputReads.size());
for (int i = 0; i < alignments.size(); i++) {
final List<BwaMemAlignment> blocks = alignments.get(i);
final SAMRecord inputRead = inputReads.get(i);
Assert.assertNotNull(blocks);
Assert.assertEquals(blocks.size(), 1);
assertPerfectAlignmentMatch(inputRead, blocks.get(0));
}
}
示例5: testBestSequenceDictionary_fromReadsAndReference
import htsjdk.samtools.reference.IndexedFastaSequenceFile; //导入依赖的package包/类
@Test
public void testBestSequenceDictionary_fromReadsAndReference() throws Exception {
final GATKTool tool = new TestGATKToolWithReads();
final CommandLineParser clp = new CommandLineArgumentParser(tool);
final File bamFile = new File(publicTestDir + "org/broadinstitute/hellbender/engine/reads_data_source_test1.bam");
final String fastaFile = hg19MiniReference;
final String[] args = {"-I", bamFile.getCanonicalPath(),
"-R", fastaFile};
clp.parseArguments(System.out, args);
tool.onStartup();
//read the dict back in and compare to reference dict
final SAMSequenceDictionary toolDict = tool.getBestAvailableSequenceDictionary();
final SAMSequenceDictionary fastaDict = new IndexedFastaSequenceFile(new File(fastaFile)).getSequenceDictionary();
toolDict.assertSameDictionary(fastaDict);
fastaDict.assertSameDictionary(toolDict);
Assert.assertEquals(toolDict, fastaDict);
}
示例6: testMixedCasesInExample
import htsjdk.samtools.reference.IndexedFastaSequenceFile; //导入依赖的package包/类
@Test
public void testMixedCasesInExample() throws FileNotFoundException, InterruptedException {
final IndexedFastaSequenceFile original = new IndexedFastaSequenceFile(new File(exampleFASTA));
final CachingIndexedFastaSequenceFile casePreserving = new CachingIndexedFastaSequenceFile(
IOUtils.getPath(exampleFASTA), true);
final CachingIndexedFastaSequenceFile allUpper = new CachingIndexedFastaSequenceFile(IOUtils.getPath(exampleFASTA));
int nMixedCase = 0;
for ( SAMSequenceRecord contig : original.getSequenceDictionary().getSequences() ) {
nMixedCase += testCases(original, casePreserving, allUpper, contig.getSequenceName(), -1, -1);
final int step = 100;
for ( int lastPos = step; lastPos < contig.getSequenceLength(); lastPos += step ) {
testCases(original, casePreserving, allUpper, contig.getSequenceName(), lastPos - step, lastPos);
}
}
Assert.assertTrue(nMixedCase > 0, "No mixed cases sequences found in file. Unexpected test state");
}
示例7: testCases
import htsjdk.samtools.reference.IndexedFastaSequenceFile; //导入依赖的package包/类
private int testCases(final IndexedFastaSequenceFile original,
final IndexedFastaSequenceFile casePreserving,
final IndexedFastaSequenceFile allUpper,
final String contig, final int start, final int stop ) {
final String orig = fetchBaseString(original, contig, start, stop);
final String keptCase = fetchBaseString(casePreserving, contig, start, stop);
final String upperCase = fetchBaseString(allUpper, contig, start, stop).toUpperCase();
final String origToUpper = orig.toUpperCase();
if ( ! orig.equals(origToUpper) ) {
Assert.assertEquals(keptCase, orig, "Case preserving operation not equal to the original case for contig " + contig);
Assert.assertEquals(upperCase, origToUpper, "All upper case reader not equal to the uppercase of original case for contig " + contig);
return 1;
} else {
return 0;
}
}
示例8: assertFastaIndexContent
import htsjdk.samtools.reference.IndexedFastaSequenceFile; //导入依赖的package包/类
private void assertFastaIndexContent(final Path path, final Path indexPath, final SAMSequenceDictionary dictionary,
final Map<String, byte[]> bases)
throws IOException {
final FastaSequenceIndex index = new FastaSequenceIndex(indexPath);
final IndexedFastaSequenceFile indexedFasta = new IndexedFastaSequenceFile(path, index);
for (final SAMSequenceRecord sequence : dictionary.getSequences()) {
final String name = sequence.getSequenceName();
final int length = sequence.getSequenceLength();
final ReferenceSequence start = indexedFasta.getSubsequenceAt(name, 1, Math.min(length, 30));
final ReferenceSequence end = indexedFasta.getSubsequenceAt(name, Math.max(1, length - 29), length);
final int middlePos = Math.max(1, Math.min(length, length / 2));
final ReferenceSequence middle = indexedFasta.getSubsequenceAt(name, middlePos, Math.min(middlePos + 29, length));
Assert.assertEquals(start.getBases(), Arrays.copyOfRange(bases.get(name), 0, start.length()));
Assert.assertEquals(end.getBases(), Arrays.copyOfRange(bases.get(name), Math.max(0, length - 30), length));
Assert.assertEquals(middle.getBases(), Arrays.copyOfRange(bases.get(name), middlePos - 1, middlePos - 1 + middle.length()));
}
}
示例9: doWork
import htsjdk.samtools.reference.IndexedFastaSequenceFile; //导入依赖的package包/类
@Override
public int doWork(List<String> args) {
if(faidx==null)
{
LOG.error("Indexed fasta file missing.");
return -1;
}
try
{
this.indexedFastaSequenceFile=new IndexedFastaSequenceFile(faidx);
return doVcfToVcf(args,this.outputFile);
}
catch(Exception err)
{
LOG.error(err);
return -1;
}
finally
{
CloserUtil.close(this.indexedFastaSequenceFile);
this.indexedFastaSequenceFile =null;
}
}
示例10: getConsensusSequence
import htsjdk.samtools.reference.IndexedFastaSequenceFile; //导入依赖的package包/类
private char[] getConsensusSequence() throws IOException {
// We could get the refseq from genomicCoords but maybe safer to extract it again from scratch.
byte[] refSeq= null;
if(this.getGc().getFastaFile() != null){
IndexedFastaSequenceFile faSeqFile = new IndexedFastaSequenceFile(new File(this.getGc().getFastaFile()));
refSeq= faSeqFile.getSubsequenceAt(this.getGc().getChrom(), this.getGc().getFrom(), this.getGc().getTo()).getBases();
faSeqFile.close();
}
char[] consensusSequence= new char[this.getGc().getTo() - this.getGc().getFrom() + 1];
int i= 0;
for(int pos= this.getGc().getFrom(); pos <= this.getGc().getTo(); pos++){
char consensus= ' '; // Empty char assuming there is no coverage.
if(this.loci.get(this.getGc().getChrom()).containsKey(pos)){ // If containsKey then the position has coverage.
Locus loc= this.loci.get(this.getGc().getChrom()).get(pos);
consensus= loc.getConsensus();
if(refSeq != null){
char ref= Character.toUpperCase((char) refSeq[loc.pos - this.getGc().getFrom()]);
if(ref == Character.toUpperCase(consensus)){
consensus= '=';
}
}
}
consensusSequence[i]= consensus;
i++;
}
return consensusSequence;
}
示例11: writeAsVcf
import htsjdk.samtools.reference.IndexedFastaSequenceFile; //导入依赖的package包/类
public void writeAsVcf(final File output, final File refFile) throws FileNotFoundException {
ReferenceSequenceFile ref = new IndexedFastaSequenceFile(refFile);
try (VariantContextWriter writer = new VariantContextWriterBuilder()
.setOutputFile(output)
.setReferenceDictionary(ref.getSequenceDictionary())
.build()) {
final VCFHeader vcfHeader = new VCFHeader(
VCFUtils.withUpdatedContigsAsLines(Collections.emptySet(), refFile, header.getSequenceDictionary(), false),
Collections.singleton(HET_GENOTYPE_FOR_PHASING));
VCFUtils.withUpdatedContigsAsLines(Collections.emptySet(), refFile, header.getSequenceDictionary(), false);
vcfHeader.addMetaDataLine(new VCFHeaderLine(VCFHeaderVersion.VCF4_2.getFormatString(), VCFHeaderVersion.VCF4_2.getVersionString()));
vcfHeader.addMetaDataLine(new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"));
vcfHeader.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
vcfHeader.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.PHASE_SET_KEY, 1, VCFHeaderLineType.String, "Phase-set identifier for phased genotypes."));
vcfHeader.addMetaDataLine(new VCFHeaderLine(VCFHeader.SOURCE_KEY,"HaplotypeMap::writeAsVcf"));
vcfHeader.addMetaDataLine(new VCFHeaderLine("reference","HaplotypeMap::writeAsVcf"));
// vcfHeader.addMetaDataLine(new VCFHeaderLine());
writer.writeHeader(vcfHeader);
final LinkedList<VariantContext> variants = new LinkedList<>(this.asVcf(ref));
variants.sort(vcfHeader.getVCFRecordComparator());
variants.forEach(writer::add);
}
}
示例12: testSequential
import htsjdk.samtools.reference.IndexedFastaSequenceFile; //导入依赖的package包/类
private void testSequential(final CachingIndexedFastaSequenceFile caching, final Path fasta, final int querySize) throws FileNotFoundException {
Assert.assertTrue(caching.isPreservingCase(), "testSequential only works for case preserving CachingIndexedFastaSequenceFile readers");
final IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
for ( int i = 0; i < contig.getSequenceLength(); i += STEP_SIZE ) {
int start = i;
int stop = start + querySize;
if ( stop <= contig.getSequenceLength() ) {
ReferenceSequence cachedVal = caching.getSubsequenceAt(contig.getSequenceName(), start, stop);
ReferenceSequence uncachedVal = uncached.getSubsequenceAt(contig.getSequenceName(), start, stop);
Assert.assertEquals(cachedVal.getName(), uncachedVal.getName());
Assert.assertEquals(cachedVal.getContigIndex(), uncachedVal.getContigIndex());
Assert.assertEquals(cachedVal.getBases(), uncachedVal.getBases());
}
}
// asserts for efficiency. We are going to make contig.length / STEP_SIZE queries
// at each of range: start -> start + querySize against a cache with size of X.
// we expect to hit the cache each time range falls within X. We expect a hit
// on the cache if range is within X. Which should happen at least (X - query_size * 2) / STEP_SIZE
// times.
final int minExpectedHits = (int)Math.floor((Math.min(caching.getCacheSize(), contig.getSequenceLength()) - querySize * 2.0) / STEP_SIZE);
caching.printEfficiency(Level.WARN);
Assert.assertTrue(caching.getCacheHits() >= minExpectedHits, "Expected at least " + minExpectedHits + " cache hits but only got " + caching.getCacheHits());
}
示例13: testCachingIndexedFastaReaderTwoStage
import htsjdk.samtools.reference.IndexedFastaSequenceFile; //导入依赖的package包/类
@Test(dataProvider = "fastas", enabled = ! DEBUG)
public void testCachingIndexedFastaReaderTwoStage(Path fasta, int cacheSize, int querySize) throws FileNotFoundException {
final IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true, false);
SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
int middleStart = (contig.getSequenceLength() - querySize) / 2;
int middleStop = middleStart + querySize;
logger.warn(String.format("Checking contig %s length %d with cache size %d and query size %d with intermediate query",
contig.getSequenceName(), contig.getSequenceLength(), cacheSize, querySize));
for ( int i = 0; i < contig.getSequenceLength(); i += 10 ) {
int start = i;
int stop = start + querySize;
if ( stop <= contig.getSequenceLength() ) {
ReferenceSequence grabMiddle = caching.getSubsequenceAt(contig.getSequenceName(), middleStart, middleStop);
ReferenceSequence cachedVal = caching.getSubsequenceAt(contig.getSequenceName(), start, stop);
ReferenceSequence uncachedVal = uncached.getSubsequenceAt(contig.getSequenceName(), start, stop);
Assert.assertEquals(cachedVal.getName(), uncachedVal.getName());
Assert.assertEquals(cachedVal.getContigIndex(), uncachedVal.getContigIndex());
Assert.assertEquals(cachedVal.getBases(), uncachedVal.getBases());
}
}
}
示例14: invalidIntervalDataProvider
import htsjdk.samtools.reference.IndexedFastaSequenceFile; //导入依赖的package包/类
@DataProvider(name="invalidIntervalTestData")
public Object[][] invalidIntervalDataProvider() throws Exception {
File fastaFile = new File(publicTestDir + "exampleFASTA.fasta");
GenomeLocParser genomeLocParser = new GenomeLocParser(new IndexedFastaSequenceFile(fastaFile));
return new Object[][] {
new Object[] {genomeLocParser, "1", 10000000, 20000000},
new Object[] {genomeLocParser, "2", 1, 2},
new Object[] {genomeLocParser, "1", -1, 50}
};
}
示例15: negativeOneLengthIntervalDataProvider
import htsjdk.samtools.reference.IndexedFastaSequenceFile; //导入依赖的package包/类
@DataProvider(name="negativeOneLengthIntervalTestData")
public Object[][] negativeOneLengthIntervalDataProvider() throws Exception {
File fastaFile = new File(publicTestDir + "exampleFASTA.fasta");
GenomeLocParser genomeLocParser = new GenomeLocParser(new IndexedFastaSequenceFile(fastaFile));
return new Object[][] {
new Object[] {genomeLocParser, "1", 2, 1},
new Object[] {genomeLocParser, "1", 500, 499},
new Object[] {genomeLocParser, "1", 1000, 999}
};
}