本文整理汇总了Python中pysam.tabix_compress方法的典型用法代码示例。如果您正苦于以下问题:Python pysam.tabix_compress方法的具体用法?Python pysam.tabix_compress怎么用?Python pysam.tabix_compress使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pysam
的用法示例。
在下文中一共展示了pysam.tabix_compress方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _make_vcf_and_read_depths_files
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import tabix_compress [as 别名]
def _make_vcf_and_read_depths_files(self):
if not os.path.exists(self.ref_fa + '.fai'):
pysam.faidx(self.ref_fa)
tmp_vcf = self.vcf_file + '.tmp'
with open(tmp_vcf, 'w') as f:
print(pysam.mpileup(
'-t', 'INFO/AD,INFO/ADF,INFO/ADR',
'-L', '99999999',
'-A',
'-f', self.ref_fa,
'-u',
'-v',
self.bam,
), end='', file=f)
got = vcfcall_ariba.vcfcall_ariba(tmp_vcf, self.outprefix, self.min_var_read_depth, self.min_second_var_read_depth, self.max_allele_freq)
if got != 0:
raise Error('Error parsing vcf file. Cannot contine')
pysam.tabix_compress(self.outprefix + '.read_depths', self.read_depths_file)
pysam.tabix_index(self.read_depths_file, seq_col=0, start_col=1, end_col=1)
os.unlink(self.outprefix + '.read_depths')
os.unlink(tmp_vcf)
示例2: ensureIndexed
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import tabix_compress [as 别名]
def ensureIndexed(bedPath, preset="bed", trySorting=True):
if not bedPath.endswith(".gz"):
if not os.path.exists(bedPath+".gz"):
logging.info("bgzf compressing {}".format(bedPath))
pysam.tabix_compress(bedPath, bedPath+".gz")
if not os.path.exists(bedPath+".gz"):
raise Exception("Failed to create compress {preset} file for {file}; make sure the {preset} file is "
"sorted and the directory is writeable".format(preset=preset, file=bedPath))
bedPath += ".gz"
if not os.path.exists(bedPath+".tbi"):
logging.info("creating tabix index for {}".format(bedPath))
pysam.tabix_index(bedPath, preset=preset)
if not os.path.exists(bedPath+".tbi"):
raise Exception("Failed to create tabix index file for {file}; make sure the {preset} file is "
"sorted and the directory is writeable".format(preset=preset, file=bedPath))
line = next(pysam.Tabixfile(bedPath).fetch())
if len(line.strip().split("\t")) < 6 and preset == "bed":
raise AnnotationError("BED files need to have at least 6 (tab-delimited) fields (including "
"chrom, start, end, name, score, strand; score is unused)")
if len(line.strip().split("\t")) < 9 and preset == "gff":
raise AnnotationError("GFF/GTF files need to have at least 9 tab-delimited fields")
return bedPath
# def sortFile(uncompressedPath, preset):
# if preset == "bed":
# fields = {"chrom":0, "start":1, "end":2}
# elif preset == "gff":
# fields = {"chrom":0, "start":3, "end":4}
# sortCommand = "sort -k{chrom}V -k{start}n -k{end}n".format(**fields)
# tabixCommand = "{sort} {path} | bgzip > {path}.gz".format(sort=sortCommand, path=uncompressedPath)
# logging.info("Trying to sort input annotation file with command:")
# logging.info(" {}".format(tabixCommand))
# subprocess.check_call(tabixCommand, shell=True)
示例3: get_ins
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import tabix_compress [as 别名]
def get_ins(args, bases = 50000, splitsize = 1000):
"""function to get insertions
"""
if not args.out:
if args.bed is None:
args.out = '.'.join(os.path.basename(args.bam).split('.')[0:-1])
else:
args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
if args.bed is None:
chrs = read_chrom_sizes_from_bam(args.bam)
chunks = ChunkList.convertChromSizes(chrs, splitsize = splitsize)
sets = chunks.split(items = bases/splitsize)
else:
chunks = ChunkList.read(args.bed)
chunks.merge()
sets = chunks.split(bases = bases)
maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks])))
pool1 = mp.Pool(processes = max(1,args.cores-1))
out_handle = open(args.out + '.ins.bedgraph','w')
out_handle.close()
write_queue = mp.JoinableQueue(maxsize = maxQueueSize)
write_process = mp.Process(target = _writeIns, args=(write_queue, args.out))
write_process.start()
for j in sets:
if args.smooth:
tmp = pool1.map(_insHelperSmooth, zip(j,itertools.repeat(args)))
else:
tmp = pool1.map(_insHelper, zip(j,itertools.repeat(args)))
for track in tmp:
write_queue.put(track)
pool1.close()
pool1.join()
write_queue.put('STOP')
write_process.join()
pysam.tabix_compress(args.out + '.ins.bedgraph', args.out + '.ins.bedgraph.gz', force = True)
shell_command('rm ' + args.out + '.ins.bedgraph')
pysam.tabix_index(args.out + '.ins.bedgraph.gz', preset = "bed", force = True)
示例4: get_cov
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import tabix_compress [as 别名]
def get_cov(args, bases = 50000, splitsize = 1000):
"""function to get coverages
"""
if not args.out:
if args.bed is None:
args.out = '.'.join(os.path.basename(args.bam).split('.')[0:-1])
else:
args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
if args.bed is None:
chrs = read_chrom_sizes_from_bam(args.bam)
chunks = ChunkList.convertChromSizes(chrs, splitsize = splitsize)
sets = chunks.split(items = bases/splitsize)
else:
chunks = ChunkList.read(args.bed)
chunks.merge()
sets = chunks.split(bases = bases)
maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks])))
pool1 = mp.Pool(processes = max(1,args.cores-1))
out_handle = open(args.out + '.cov.bedgraph','w')
out_handle.close()
write_queue = mp.JoinableQueue(maxsize = maxQueueSize)
write_process = mp.Process(target = _writeCov, args=(write_queue, args.out))
write_process.start()
for j in sets:
tmp = pool1.map(_covHelper, zip(j,itertools.repeat(args)))
for track in tmp:
write_queue.put(track)
pool1.close()
pool1.join()
write_queue.put('STOP')
write_process.join()
pysam.tabix_compress(args.out + '.cov.bedgraph', args.out + '.cov.bedgraph.gz', force = True)
shell_command('rm ' + args.out + '.cov.bedgraph')
pysam.tabix_index(args.out + '.cov.bedgraph.gz', preset = "bed", force = True)
示例5: tabix_bedgraph
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import tabix_compress [as 别名]
def tabix_bedgraph(bedgraph):
pysam.tabix_compress(bedgraph,bedgraph+'.gz')
pysam.tabix_index(bedgraph+'.gz', seq_col=0, start_col=1, end_col=2, zerobased=True)
示例6: make_bias_track
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import tabix_compress [as 别名]
def make_bias_track(args, bases = 500000, splitsize = 1000):
"""function to compute bias track
"""
if args.out is None:
if args.bed is not None:
args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
else:
args.out = '.'.join(os.path.basename(args.fasta).split('.')[0:-1])
params = _BiasParams(args.fasta, args.pwm)
if args.bed is None:
chunks = ChunkList.convertChromSizes(params.chrs, splitsize = splitsize)
sets = chunks.split(items = bases/splitsize)
else:
chunks = ChunkList.read(args.bed)
chunks.checkChroms(params.chrs.keys())
chunks.merge()
sets = chunks.split(bases = bases)
maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks])))
pool = mp.Pool(processes = max(1,args.cores-1))
out_handle = open(args.out + '.Scores.bedgraph','w')
out_handle.close()
write_queue = mp.JoinableQueue(maxsize = maxQueueSize)
write_process = mp.Process(target = _writeBias, args=(write_queue, args.out))
write_process.start()
for j in sets:
tmp = pool.map(_biasHelper, zip(j,itertools.repeat(params)))
for track in tmp:
write_queue.put(track)
pool.close()
pool.join()
write_queue.put('STOP')
write_process.join()
pysam.tabix_compress(args.out + '.Scores.bedgraph', args.out + '.Scores.bedgraph.gz', force = True)
shell_command('rm ' + args.out + '.Scores.bedgraph')
pysam.tabix_index(args.out + '.Scores.bedgraph.gz', preset = "bed", force = True)
示例7: run_diff
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import tabix_compress [as 别名]
def run_diff(args, bases = 500000):
"""run differential occupancy calling
"""
chrs = read_chrom_sizes_from_bam(args.bam)
pwm = PWM.open(args.pwm)
chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank + args.upper/2 + max(pwm.up,pwm.down))
chunks.merge()
maxQueueSize = max(2,int(100 * bases / np.mean([chunk.length() for chunk in chunks])))
#get fragmentsizes
fragment_dist1 = FragmentMixDistribution(0, upper = args.upper)
fragment_dist1.fragmentsizes = FragmentSizes(0, args.upper, vals = FragmentSizes.open(args.sizes1).get(0,args.upper))
fragment_dist1.modelNFR()
fragment_dist2 = FragmentMixDistribution(0, upper = args.upper)
fragment_dist2.fragmentsizes = FragmentSizes(0, args.upper, vals = FragmentSizes.open(args.sizes2).get(0,args.upper))
fragment_dist2.modelNFR()
params = OccupancyParameters(fragment_dist, args.upper, args.fasta, args.pwm, sep = args.nuc_sep, min_occ = args.min_occ,
flank = args.flank, bam = args.bam, ci = args.confidence_interval)
sets = chunks.split(bases = bases)
pool1 = mp.Pool(processes = max(1,args.cores-1))
diff_handle = open(args.out + '.occdiff.bed','w')
diff_handle.close()
diff_queue = mp.JoinableQueue()
diff_process = mp.Process(target = _writeDiff, args=(diff_queue, args.out))
diff_process.start()
nuc_dist = np.zeros(args.upper)
for j in sets:
tmp = pool1.map(_occHelper, zip(j,itertools.repeat(params)))
for result in tmp:
diff_queue.put(result[1])
pool1.close()
pool1.join()
diff_queue.put('STOP')
diff_process.join()
pysam.tabix_compress(args.out + '.occdiff.bed', args.out + '.occdiff.bed.gz',force = True)
shell_command('rm ' + args.out + '.occdiff.bed')
pysam.tabix_index(args.out + '.occdiff.bed.gz', preset = "bed", force = True)
示例8: convert_VariantFile_to_IndexedVariantFile
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import tabix_compress [as 别名]
def convert_VariantFile_to_IndexedVariantFile(vf_path, ivf_path):
make_basedir(ivf_path)
tmp_path = get_tmp_path(ivf_path)
pysam.tabix_compress(vf_path, tmp_path, force=True)
os.rename(tmp_path, ivf_path)
pysam.tabix_index(
filename=ivf_path, force=True,
seq_col=0, start_col=1, end_col=1, # note: `pysam.tabix_index` calls the first column `0`, but cmdline `tabix` calls it `1`.
line_skip=1, # skip header
)
示例9: _compress_and_index_file
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import tabix_compress [as 别名]
def _compress_and_index_file(infile, log_fh=None):
if log_fh is not None:
print('Compressing file', infile, file=log_fh, flush=True)
pysam.tabix_compress(infile, infile + '.gz')
pysam.tabix_index(infile + '.gz', seq_col=0, start_col=1, end_col=1)
示例10: run_nuc
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import tabix_compress [as 别名]
def run_nuc(args):
"""run occupancy calling
"""
vmat = VMat.open(args.vmat)
if args.fasta:
chrs = read_chrom_sizes_from_fasta(args.fasta)
else:
chrs = read_chrom_sizes_from_bam(args.bam)
pwm = PWM.open(args.pwm)
chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = vmat.mat.shape[1] + vmat.upper/2 + max(pwm.up,pwm.down) + args.nuc_sep/2, min_length = args.nuc_sep * 2)
chunks.slop(chrs, up = args.nuc_sep/2, down = args.nuc_sep/2)
chunks.merge()
maxQueueSize = args.cores*10
if args.sizes is not None:
fragment_dist = FragmentSizes.open(args.sizes)
else:
fragment_dist = FragmentSizes(0, upper = vmat.upper)
fragment_dist.calculateSizes(args.bam, chunks)
params = NucParameters(vmat = vmat, fragmentsizes = fragment_dist, bam = args.bam, fasta = args.fasta, pwm = args.pwm,
occ_track = args.occ_track,
sd = args.sd, nonredundant_sep = args.nuc_sep, redundant_sep = args.redundant_sep,
min_z = args.min_z, min_lr = args.min_lr , atac = args.atac)
sets = chunks.split(items = args.cores*5)
pool1 = mp.Pool(processes = max(1,args.cores-1))
if args.write_all:
outputs = ['nucpos','nucpos.redundant','nucleoatac_signal','nucleoatac_signal.smooth',
'nucleoatac_background','nucleoatac_raw']
else:
outputs = ['nucpos','nucpos.redundant','nucleoatac_signal','nucleoatac_signal.smooth']
handles = {}
write_queues = {}
write_processes = {}
for i in outputs:
if i not in ['nucpos','nucpos.redundant','nfrpos']:
handles[i] = open(args.out + '.'+i+'.bedgraph','w')
else:
handles[i] = open(args.out + '.'+i+'.bed','w')
handles[i].close()
write_queues[i] = mp.JoinableQueue(maxsize = maxQueueSize)
write_processes[i] = mp.Process(target = _writeFuncs[i], args=(write_queues[i], args.out))
write_processes[i].start()
for j in sets:
tmp = pool1.map(_nucHelper, zip(j,itertools.repeat(params)))
for result in tmp:
for i in outputs:
write_queues[i].put(result[i])
pool1.close()
pool1.join()
for i in outputs:
write_queues[i].put('STOP')
for i in outputs:
write_processes[i].join()
if i not in ['nucpos','nucpos.redundant']:
pysam.tabix_compress(args.out + '.' + i + '.bedgraph', args.out + '.' + i + '.bedgraph.gz',force = True)
shell_command('rm ' + args.out + '.' + i + '.bedgraph')
pysam.tabix_index(args.out + '.' + i + '.bedgraph.gz', preset = "bed", force = True)
else:
pysam.tabix_compress(args.out + '.' + i + '.bed', args.out + '.' + i + '.bed.gz',force = True)
shell_command('rm ' + args.out + '.' + i + '.bed')
pysam.tabix_index(args.out + '.' + i + '.bed.gz', preset = "bed", force = True)
示例11: run_nfr
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import tabix_compress [as 别名]
def run_nfr(args):
"""run nfr calling
"""
if args.bam is None and args.ins_track is None:
raise Exception("Must supply either bam file or insertion track")
if not args.out:
args.out = '.'.join(os.path.basename(args.calls).split('.')[0:-3])
if args.fasta is not None:
chrs_fasta = read_chrom_sizes_from_fasta(args.fasta)
pwm = PWM.open(args.pwm)
chunks = ChunkList.read(args.bed, chromDict = chrs_fasta, min_offset = max(pwm.up, pwm.down))
else:
chunks = ChunkList.read(args.bed)
if args.bam is not None:
chrs_bam = read_chrom_sizes_from_bam(args.bam)
chunks.checkChroms(chrs_bam, chrom_source = "BAM file")
chunks.merge()
maxQueueSize = args.cores * 10
params = NFRParameters(args.occ_track, args.calls, args.ins_track, args.bam, max_occ = args.max_occ, max_occ_upper = args.max_occ_upper,
fasta = args.fasta, pwm = args.pwm)
sets = chunks.split(items = args.cores * 5)
pool1 = mp.Pool(processes = max(1,args.cores-1))
nfr_handle = open(args.out + '.nfrpos.bed','w')
nfr_handle.close()
nfr_queue = mp.JoinableQueue()
nfr_process = mp.Process(target = _writeNFR, args=(nfr_queue, args.out))
nfr_process.start()
if params.ins_track is None:
ins_handle = open(args.out + '.ins.bedgraph','w')
ins_handle.close()
ins_queue = mp.JoinableQueue()
ins_process = mp.Process(target = _writeIns, args=(ins_queue, args.out))
ins_process.start()
for j in sets:
tmp = pool1.map(_nfrHelper, zip(j,itertools.repeat(params)))
for result in tmp:
if params.ins_track is None:
nfr_queue.put(result[0])
ins_queue.put(result[1])
else:
nfr_queue.put(result)
pool1.close()
pool1.join()
nfr_queue.put('STOP')
nfr_process.join()
if params.ins_track is None:
ins_queue.put('STOP')
ins_process.join()
pysam.tabix_compress(args.out + '.nfrpos.bed', args.out + '.nfrpos.bed.gz',force = True)
shell_command('rm ' + args.out + '.nfrpos.bed')
pysam.tabix_index(args.out + '.nfrpos.bed.gz', preset = "bed", force = True)
if params.ins_track is None:
pysam.tabix_compress(args.out + '.ins.bedgraph', args.out + '.ins.bedgraph.gz', force = True)
shell_command('rm ' + args.out + '.ins.bedgraph')
pysam.tabix_index(args.out + '.ins.bedgraph.gz', preset = "bed", force = True)