本文整理汇总了Python中pysam.tabix_index函数的典型用法代码示例。如果您正苦于以下问题:Python tabix_index函数的具体用法?Python tabix_index怎么用?Python tabix_index使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了tabix_index函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: batchTestHelper
def batchTestHelper(self, modFile, pool, refLens):
tmpName = tempfile.mkstemp('.tsv')[1]
tmpfp = open(tmpName, 'wb')
for line in modFile:
tmpfp.write(line)
tmpfp.close()
pysam.tabix_index(tmpName, force=True, seq_col=1, start_col=2, end_col=2,
meta_char='#', zerobased=True)
tmpName += '.gz'
modFile.close()
self.chromoID = '1'
self.modobj = mod.Mod(tmpName)
self.modobj.load(self.chromoID)
for tup in pool:
bamIter=[Read(tup[0], tup[1]+1, tup[2]) for tup in pool]
a = annot.Annotator(self.chromoID, refLens[self.chromoID],
self.modobj, bamIter)
results = a.execute()
for i,res in enumerate(results):
self.assertEqual(polish(res[0]),pool[i][3])
self.assertEqual(res[1], pool[i][4])
self.assertEqual(res[2], pool[i][5])
self.assertEqual(res[3], pool[i][6])
self.assertEqual(res[4], pool[i][7])
os.remove(tmpName)
os.remove(tmpName+'.tbi')
示例2: annotate_vcf
def annotate_vcf(self, inVcf, genome, outVcf, JVMmemory=None):
"""
Annotate variants in VCF file with translation consequences using snpEff.
"""
if outVcf.endswith('.vcf.gz'):
tmpVcf = util.file.mkstempfname(prefix='vcf_snpEff-', suffix='.vcf')
elif outVcf.endswith('.vcf'):
tmpVcf = outVcf
else:
raise Exception("invalid input")
args = [
'-treatAllAsProteinCoding', 'false',
'-t',
'-noLog',
'-ud', '0',
'-noStats',
'-noShiftHgvs',
genome,
inVcf
]
with open(tmpVcf, 'wt') as outf:
self.execute('ann', args, JVMmemory=JVMmemory, stdout=outf)
if outVcf.endswith('.vcf.gz'):
pysam.tabix_compress(tmpVcf, outVcf, force=True)
pysam.tabix_index(outVcf, force=True, preset='vcf')
os.unlink(tmpVcf)
示例3: addVariantSet
def addVariantSet(
self, variantFileName, dataset, referenceSet,
ontology, biosamples):
inputVcf = os.path.join(
self.inputDirectory, variantFileName)
outputVcf = os.path.join(
self.outputDirectory, variantFileName)
shutil.copy(inputVcf, outputVcf)
pysam.tabix_index(outputVcf, preset="vcf")
variantSet = variants.HtslibVariantSet(
dataset, variantFileName.split('_')[1])
variantSet.setReferenceSet(referenceSet)
variantSet.populateFromFile(
[os.path.abspath(outputVcf + ".gz")],
[os.path.abspath(outputVcf + ".gz.tbi")])
variantSet.checkConsistency()
for callSet in variantSet.getCallSets():
for biosample in biosamples:
if biosample.getLocalId() == callSet.getLocalId():
callSet.setBiosampleId(biosample.getId())
self.repo.insertVariantSet(variantSet)
for annotationSet in variantSet.getVariantAnnotationSets():
annotationSet.setOntology(ontology)
self.repo.insertVariantAnnotationSet(annotationSet)
示例4: testIndexPresetCompressed
def testIndexPresetCompressed(self):
'''test indexing via preset.'''
pysam.tabix_compress(self.tmpfilename, self.tmpfilename + ".gz")
pysam.tabix_index(self.tmpfilename + ".gz", preset=self.preset)
checkBinaryEqual(self.tmpfilename + ".gz", self.filename)
checkBinaryEqual(self.tmpfilename + ".gz.tbi", self.filename_idx)
示例5: eff_vcf
def eff_vcf(self, inVcf, outVcf, genome, java_flags='-Xmx2g',
in_format='vcf', out_format='vcf', eff_options=''):
"""
TODO: docstring here
"""
if outVcf.endswith('.vcf.gz'):
tmpVcf = util.file.mkstempfname(prefix='vcf_snpEff-', suffix='.vcf')
else:
tmpVcf = outVcf
args = ' '.join([
'eff',
'-c', '{}/snpEff.config'.format(self.executable_path()),
'-i', in_format,
'-o', out_format,
genome,
'-treatAllAsProteinCoding false',
'-noLog',
'-ud 0',
'-noStats',
eff_options
])
if inVcf.endswith('.gz'):
pre_pipe = "zcat {} | ".format(inVcf)
else:
pre_pipe = "cat {} | ".format(inVcf)
post_pipe = " > {}".format(tmpVcf)
self.execute(args, java_flags=java_flags, pre_pipe=pre_pipe,
post_pipe=post_pipe)
if outVcf.endswith('.vcf.gz'):
pysam.tabix_compress(tmpVcf, outVcf, force=True)
pysam.tabix_index(outVcf, force=True, preset='vcf')
os.unlink(tmpVcf)
示例6: make_bias_track
def make_bias_track(args, bases = 500000, splitsize = 1000):
"""function to compute bias track
"""
if args.out is None:
if args.bed is not None:
args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
else:
args.out = '.'.join(os.path.basename(args.fasta).split('.')[0:-1])
params = _BiasParams(args.fasta, args.pwm)
if args.bed is None:
chunks = ChunkList.convertChromSizes(params.chrs, splitsize = splitsize)
sets = chunks.split(items = bases/splitsize)
else:
chunks = ChunkList.read(args.bed)
chunks.merge()
sets = chunks.split(bases = bases)
maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks])))
pool = mp.Pool(processes = max(1,args.cores-1))
out_handle = open(args.out + '.Scores.bedgraph','w')
out_handle.close()
write_queue = mp.JoinableQueue(maxsize = maxQueueSize)
write_process = mp.Process(target = _writeBias, args=(write_queue, args.out))
write_process.start()
for j in sets:
tmp = pool.map(_biasHelper, zip(j,itertools.repeat(params)))
for track in tmp:
write_queue.put(track)
pool.close()
pool.join()
write_queue.put('STOP')
write_process.join()
pysam.tabix_compress(args.out + '.Scores.bedgraph', args.out + '.Scores.bedgraph.gz', force = True)
shell_command('rm ' + args.out + '.Scores.bedgraph')
pysam.tabix_index(args.out + '.Scores.bedgraph.gz', preset = "bed", force = True)
示例7: ensureIndexed
def ensureIndexed(bedPath, preset="bed", trySorting=True):
if not bedPath.endswith(".gz"):
if not os.path.exists(bedPath + ".gz"):
logging.info("bgzf compressing {}".format(bedPath))
pysam.tabix_compress(bedPath, bedPath + ".gz")
if not os.path.exists(bedPath + ".gz"):
raise Exception(
"Failed to create compress {preset} file for {file}; make sure the {preset} file is "
"sorted and the directory is writeable".format(preset=preset, file=bedPath)
)
bedPath += ".gz"
if not os.path.exists(bedPath + ".tbi"):
logging.info("creating tabix index for {}".format(bedPath))
pysam.tabix_index(bedPath, preset=preset)
if not os.path.exists(bedPath + ".tbi"):
raise Exception(
"Failed to create tabix index file for {file}; make sure the {preset} file is "
"sorted and the directory is writeable".format(preset=preset, file=bedPath)
)
line = pysam.Tabixfile(bedPath).fetch().next()
if len(line.strip().split("\t")) < 6 and preset == "bed":
raise AnnotationError(
"BED files need to have at least 6 (tab-delimited) fields (including "
"chrom, start, end, name, score, strand; score is unused)"
)
if len(line.strip().split("\t")) < 9 and preset == "gff":
raise AnnotationError("GFF/GTF files need to have at least 9 tab-delimited fields")
return bedPath
示例8: get_cov
def get_cov(args, bases = 50000, splitsize = 1000):
"""function to get coverages
"""
if not args.out:
if args.bed is None:
args.out = '.'.join(os.path.basename(args.bam).split('.')[0:-1])
else:
args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
if args.bed is None:
chrs = read_chrom_sizes_from_bam(args.bam)
chunks = ChunkList.convertChromSizes(chrs, splitsize = splitsize)
sets = chunks.split(items = bases/splitsize)
else:
chunks = ChunkList.read(args.bed)
chunks.merge()
sets = chunks.split(bases = bases)
maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks])))
pool1 = mp.Pool(processes = max(1,args.cores-1))
out_handle = open(args.out + '.cov.bedgraph','w')
out_handle.close()
write_queue = mp.JoinableQueue(maxsize = maxQueueSize)
write_process = mp.Process(target = _writeCov, args=(write_queue, args.out))
write_process.start()
for j in sets:
tmp = pool1.map(_covHelper, zip(j,itertools.repeat(args)))
for track in tmp:
write_queue.put(track)
pool1.close()
pool1.join()
write_queue.put('STOP')
write_process.join()
pysam.tabix_compress(args.out + '.cov.bedgraph', args.out + '.cov.bedgraph.gz', force = True)
shell_command('rm ' + args.out + '.cov.bedgraph')
pysam.tabix_index(args.out + '.cov.bedgraph.gz', preset = "bed", force = True)
示例9: annotate_vcf
def annotate_vcf(self, inVcf, genomes, outVcf, emailAddress, JVMmemory=None):
"""
Annotate variants in VCF file with translation consequences using snpEff.
"""
if outVcf.endswith('.vcf.gz'):
tmpVcf = util.file.mkstempfname(prefix='vcf_snpEff-', suffix='.vcf')
elif outVcf.endswith('.vcf'):
tmpVcf = outVcf
else:
raise Exception("invalid input")
sortedAccessionString = ", ".join(sorted(genomes))
databaseId = hashlib.sha256(sortedAccessionString.encode('utf-8')).hexdigest()[:55]
genomeToUse = ""
# if we don't have the genome, by name (snpEff official) or by hash (custom)
if (not self.has_genome(databaseId)):
if (not self.has_genome(genomes[0])):
_log.info("Checking for snpEff database online...")
# check to see if it is available for download, and if so install it
for row in self.available_databases():
if (genomes[0].lower() in row['Genome'].lower()) or (
genomes[0].lower() in row['Bundle'].lower()
) or (
genomes[0].lower() in row['Organism'].lower()
):
self.download_db(row['Genome'])
# backward compatability for where a single genome name is provided
if self.has_genome(genomes[0]):
genomeToUse = genomes[0]
else:
# if the hash of the accessions passed in is not present in the genomes db
if not self.has_genome(databaseId):
self.create_db(genomes, emailAddress, JVMmemory)
if self.has_genome(databaseId):
genomeToUse = databaseId
if not genomeToUse:
raise Exception()
args = [
'-treatAllAsProteinCoding', 'false', '-t', '-noLog', '-ud', '0', '-noStats', '-noShiftHgvs', genomeToUse,
os.path.realpath(inVcf)
]
command_ps = self.execute('ann', args, JVMmemory=JVMmemory)
if command_ps.returncode == 0:
with open(tmpVcf, 'wt') as outf:
outf.write(command_ps.stdout.decode("utf-8"))
if outVcf.endswith('.vcf.gz'):
pysam.tabix_compress(tmpVcf, outVcf, force=True)
pysam.tabix_index(outVcf, force=True, preset='vcf')
os.unlink(tmpVcf)
else:
raise subprocess.CalledProcessError(cmd=command_ps.args, returncode=command_ps.returncode, output=command_ps.stdout)
示例10: test_indexing_to_custom_location_works
def test_indexing_to_custom_location_works(self):
'''test indexing a file with a non-default location.'''
index_path = get_temp_filename(suffix='custom.tbi')
pysam.tabix_index(self.tmpfilename, preset="gff",
index=index_path, force=True)
self.assertTrue(checkBinaryEqual(index_path, self.filename_idx))
os.unlink(index_path)
示例11: testIndexPresetUncompressed
def testIndexPresetUncompressed(self):
'''test indexing via preset.'''
pysam.tabix_index(self.tmpfilename, preset=self.preset)
# check if uncompressed file has been removed
self.assertEqual(os.path.exists(self.tmpfilename), False)
checkBinaryEqual(self.tmpfilename + ".gz", self.filename)
checkBinaryEqual(self.tmpfilename + ".gz.tbi", self.filename_idx)
示例12: setUp
def setUp( self ):
self.tmpfilename = "tmp_%s.vcf" % id(self)
shutil.copyfile( self.filename, self.tmpfilename )
pysam.tabix_index( self.tmpfilename, preset = "vcf" )
self.tabix = pysam.Tabixfile( self.tmpfilename + ".gz" )
self.compare = [ x[:-1].split("\t") for x in open( self.filename, "r") if not x.startswith("#") ]
示例13: test_vcf_with_tbi_index
def test_vcf_with_tbi_index(self):
with get_temp_context("tmp_fn.vcf") as fn:
shutil.copyfile(self.vcf_filename, fn)
pysam.tabix_index(fn, preset="vcf", force=True)
self.assertTrue(os.path.exists(fn + ".gz" + ".tbi"))
self.assertFalse(os.path.exists(fn + ".gz" + ".csi"))
with pysam.VariantFile(fn + ".gz") as inf:
self.assertEqual(len(list(inf.fetch("20"))), 3)
示例14: run_nfr
def run_nfr(args):
"""run nfr calling
"""
if args.bam is None and args.ins_track is None:
raise Exception("Must supply either bam file or insertion track")
if not args.out:
args.out = '.'.join(os.path.basename(args.calls).split('.')[0:-3])
if args.fasta is not None:
chrs_fasta = read_chrom_sizes_from_fasta(args.fasta)
pwm = PWM.open(args.pwm)
chunks = ChunkList.read(args.bed, chromDict = chrs_fasta, min_offset = max(pwm.up, pwm.down))
else:
chunks = ChunkList.read(args.bed)
if args.bam is not None:
chrs_bam = read_chrom_sizes_from_bam(args.bam)
chunks.checkChroms(chrs_bam, chrom_source = "BAM file")
chunks.merge()
maxQueueSize = args.cores * 10
params = NFRParameters(args.occ_track, args.calls, args.ins_track, args.bam, max_occ = args.max_occ, max_occ_upper = args.max_occ_upper,
fasta = args.fasta, pwm = args.pwm)
sets = chunks.split(items = args.cores * 5)
pool1 = mp.Pool(processes = max(1,args.cores-1))
nfr_handle = open(args.out + '.nfrpos.bed','w')
nfr_handle.close()
nfr_queue = mp.JoinableQueue()
nfr_process = mp.Process(target = _writeNFR, args=(nfr_queue, args.out))
nfr_process.start()
if params.ins_track is None:
ins_handle = open(args.out + '.ins.bedgraph','w')
ins_handle.close()
ins_queue = mp.JoinableQueue()
ins_process = mp.Process(target = _writeIns, args=(ins_queue, args.out))
ins_process.start()
for j in sets:
tmp = pool1.map(_nfrHelper, zip(j,itertools.repeat(params)))
for result in tmp:
if params.ins_track is None:
nfr_queue.put(result[0])
ins_queue.put(result[1])
else:
nfr_queue.put(result)
pool1.close()
pool1.join()
nfr_queue.put('STOP')
nfr_process.join()
if params.ins_track is None:
ins_queue.put('STOP')
ins_process.join()
pysam.tabix_compress(args.out + '.nfrpos.bed', args.out + '.nfrpos.bed.gz',force = True)
shell_command('rm ' + args.out + '.nfrpos.bed')
pysam.tabix_index(args.out + '.nfrpos.bed.gz', preset = "bed", force = True)
if params.ins_track is None:
pysam.tabix_compress(args.out + '.ins.bedgraph', args.out + '.ins.bedgraph.gz', force = True)
shell_command('rm ' + args.out + '.ins.bedgraph')
pysam.tabix_index(args.out + '.ins.bedgraph.gz', preset = "bed", force = True)
示例15: test_indexing_with_lineskipping_works
def test_indexing_with_lineskipping_works(self):
'''test indexing via preset and lineskip.'''
pysam.tabix_index(self.tmpfilename,
seq_col=0,
start_col=3,
end_col=4,
line_skip=1,
zerobased=False)
self.assertFalse(checkBinaryEqual(
self.tmpfilename + ".tbi", self.filename_idx))