本文整理汇总了Python中pysam.tabix_compress函数的典型用法代码示例。如果您正苦于以下问题:Python tabix_compress函数的具体用法?Python tabix_compress怎么用?Python tabix_compress使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了tabix_compress函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
def main():
# Read options, args.
parser = optparse.OptionParser()
(options, args) = parser.parse_args()
input_fname, output_fname = args
pysam.tabix_compress(input_fname, output_fname, force=True)
示例2: make_bias_track
def make_bias_track(args, bases = 500000, splitsize = 1000):
"""function to compute bias track
"""
if args.out is None:
if args.bed is not None:
args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
else:
args.out = '.'.join(os.path.basename(args.fasta).split('.')[0:-1])
params = _BiasParams(args.fasta, args.pwm)
if args.bed is None:
chunks = ChunkList.convertChromSizes(params.chrs, splitsize = splitsize)
sets = chunks.split(items = bases/splitsize)
else:
chunks = ChunkList.read(args.bed)
chunks.merge()
sets = chunks.split(bases = bases)
maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks])))
pool = mp.Pool(processes = max(1,args.cores-1))
out_handle = open(args.out + '.Scores.bedgraph','w')
out_handle.close()
write_queue = mp.JoinableQueue(maxsize = maxQueueSize)
write_process = mp.Process(target = _writeBias, args=(write_queue, args.out))
write_process.start()
for j in sets:
tmp = pool.map(_biasHelper, zip(j,itertools.repeat(params)))
for track in tmp:
write_queue.put(track)
pool.close()
pool.join()
write_queue.put('STOP')
write_process.join()
pysam.tabix_compress(args.out + '.Scores.bedgraph', args.out + '.Scores.bedgraph.gz', force = True)
shell_command('rm ' + args.out + '.Scores.bedgraph')
pysam.tabix_index(args.out + '.Scores.bedgraph.gz', preset = "bed", force = True)
示例3: get_cov
def get_cov(args, bases = 50000, splitsize = 1000):
"""function to get coverages
"""
if not args.out:
if args.bed is None:
args.out = '.'.join(os.path.basename(args.bam).split('.')[0:-1])
else:
args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
if args.bed is None:
chrs = read_chrom_sizes_from_bam(args.bam)
chunks = ChunkList.convertChromSizes(chrs, splitsize = splitsize)
sets = chunks.split(items = bases/splitsize)
else:
chunks = ChunkList.read(args.bed)
chunks.merge()
sets = chunks.split(bases = bases)
maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks])))
pool1 = mp.Pool(processes = max(1,args.cores-1))
out_handle = open(args.out + '.cov.bedgraph','w')
out_handle.close()
write_queue = mp.JoinableQueue(maxsize = maxQueueSize)
write_process = mp.Process(target = _writeCov, args=(write_queue, args.out))
write_process.start()
for j in sets:
tmp = pool1.map(_covHelper, zip(j,itertools.repeat(args)))
for track in tmp:
write_queue.put(track)
pool1.close()
pool1.join()
write_queue.put('STOP')
write_process.join()
pysam.tabix_compress(args.out + '.cov.bedgraph', args.out + '.cov.bedgraph.gz', force = True)
shell_command('rm ' + args.out + '.cov.bedgraph')
pysam.tabix_index(args.out + '.cov.bedgraph.gz', preset = "bed", force = True)
示例4: main
def main():
# Read options, args.
parser = optparse.OptionParser()
parser.add_option('-c', '--chr-col', type='int', dest='chrom_col')
parser.add_option('-s', '--start-col', type='int', dest='start_col')
parser.add_option('-e', '--end-col', type='int', dest='end_col')
parser.add_option('-P', '--preset', dest='preset')
(options, args) = parser.parse_args()
input_fname, output_fname = args
tmpfile = tempfile.NamedTemporaryFile()
sort_params = None
if options.chrom_col and options.start_col and options.end_col:
sort_params = [
"sort",
"-k%(i)s,%(i)s" % {'i': options.chrom_col},
"-k%(i)i,%(i)in" % {'i': options.start_col},
"-k%(i)i,%(i)in" % {'i': options.end_col}
]
elif options.preset == "bed":
sort_params = ["sort", "-k1,1", "-k2,2n", "-k3,3n"]
elif options.preset == "vcf":
sort_params = ["sort", "-k1,1", "-k2,2n"]
elif options.preset == "gff":
sort_params = ["sort", "-s", "-k1,1", "-k4,4n"] # stable sort on start column
# Skip any lines starting with "#" and "track"
grepped = subprocess.Popen(["grep", "-e", "^\"#\"", "-e", "^track", "-v", input_fname], stderr=subprocess.PIPE, stdout=subprocess.PIPE)
after_sort = subprocess.Popen(sort_params, stdin=grepped.stdout, stderr=subprocess.PIPE, stdout=tmpfile)
grepped.stdout.close()
output, err = after_sort.communicate()
pysam.tabix_compress(tmpfile.name, output_fname, force=True)
示例5: _create_tabix
def _create_tabix(fname, ftype):
logger = logging.getLogger("pita")
tabix_file = ""
logger.info("Creating tabix index for %s", os.path.basename(fname))
logger.debug("Preparing %s for tabix", fname)
tmp = NamedTemporaryFile(prefix="pita", delete=False)
preset = "gff"
if ftype == "bed":
cmd = "sort -k1,1 -k2g,2 {0} | grep -v track | grep -v \"^#\" > {1}"
preset = "bed"
elif ftype in ["gff", "gff3", "gtf"]:
cmd = "sort -k1,1 -k4g,4 {0} | grep -v \"^#\" > {1}"
# Sort the input file
logger.debug(cmd.format(fname, tmp.name))
sp.call(cmd.format(fname, tmp.name), shell=True)
# Compress using bgzip
logger.debug("compressing %s", tmp.name)
tabix_file = tmp.name + ".gz"
pysam.tabix_compress(tmp.name, tabix_file)
tmp.close()
# Index (using tabix command line, as pysam.index results in a Segmentation fault
logger.debug("indexing %s", tabix_file)
sp.call("tabix {0} -p {1}".format(tabix_file, preset), shell=True)
return tabix_file
示例6: ensureIndexed
def ensureIndexed(bedPath, preset="bed", trySorting=True):
if not bedPath.endswith(".gz"):
if not os.path.exists(bedPath + ".gz"):
logging.info("bgzf compressing {}".format(bedPath))
pysam.tabix_compress(bedPath, bedPath + ".gz")
if not os.path.exists(bedPath + ".gz"):
raise Exception(
"Failed to create compress {preset} file for {file}; make sure the {preset} file is "
"sorted and the directory is writeable".format(preset=preset, file=bedPath)
)
bedPath += ".gz"
if not os.path.exists(bedPath + ".tbi"):
logging.info("creating tabix index for {}".format(bedPath))
pysam.tabix_index(bedPath, preset=preset)
if not os.path.exists(bedPath + ".tbi"):
raise Exception(
"Failed to create tabix index file for {file}; make sure the {preset} file is "
"sorted and the directory is writeable".format(preset=preset, file=bedPath)
)
line = pysam.Tabixfile(bedPath).fetch().next()
if len(line.strip().split("\t")) < 6 and preset == "bed":
raise AnnotationError(
"BED files need to have at least 6 (tab-delimited) fields (including "
"chrom, start, end, name, score, strand; score is unused)"
)
if len(line.strip().split("\t")) < 9 and preset == "gff":
raise AnnotationError("GFF/GTF files need to have at least 9 tab-delimited fields")
return bedPath
示例7: eff_vcf
def eff_vcf(self, inVcf, outVcf, genome, java_flags='-Xmx2g',
in_format='vcf', out_format='vcf', eff_options=''):
"""
TODO: docstring here
"""
if outVcf.endswith('.vcf.gz'):
tmpVcf = util.file.mkstempfname(prefix='vcf_snpEff-', suffix='.vcf')
else:
tmpVcf = outVcf
args = ' '.join([
'eff',
'-c', '{}/snpEff.config'.format(self.executable_path()),
'-i', in_format,
'-o', out_format,
genome,
'-treatAllAsProteinCoding false',
'-noLog',
'-ud 0',
'-noStats',
eff_options
])
if inVcf.endswith('.gz'):
pre_pipe = "zcat {} | ".format(inVcf)
else:
pre_pipe = "cat {} | ".format(inVcf)
post_pipe = " > {}".format(tmpVcf)
self.execute(args, java_flags=java_flags, pre_pipe=pre_pipe,
post_pipe=post_pipe)
if outVcf.endswith('.vcf.gz'):
pysam.tabix_compress(tmpVcf, outVcf, force=True)
pysam.tabix_index(outVcf, force=True, preset='vcf')
os.unlink(tmpVcf)
示例8: testIndexPresetCompressed
def testIndexPresetCompressed(self):
'''test indexing via preset.'''
pysam.tabix_compress(self.tmpfilename, self.tmpfilename + ".gz")
pysam.tabix_index(self.tmpfilename + ".gz", preset=self.preset)
checkBinaryEqual(self.tmpfilename + ".gz", self.filename)
checkBinaryEqual(self.tmpfilename + ".gz.tbi", self.filename_idx)
示例9: annotate_vcf
def annotate_vcf(self, inVcf, genome, outVcf, JVMmemory=None):
"""
Annotate variants in VCF file with translation consequences using snpEff.
"""
if outVcf.endswith('.vcf.gz'):
tmpVcf = util.file.mkstempfname(prefix='vcf_snpEff-', suffix='.vcf')
elif outVcf.endswith('.vcf'):
tmpVcf = outVcf
else:
raise Exception("invalid input")
args = [
'-treatAllAsProteinCoding', 'false',
'-t',
'-noLog',
'-ud', '0',
'-noStats',
'-noShiftHgvs',
genome,
inVcf
]
with open(tmpVcf, 'wt') as outf:
self.execute('ann', args, JVMmemory=JVMmemory, stdout=outf)
if outVcf.endswith('.vcf.gz'):
pysam.tabix_compress(tmpVcf, outVcf, force=True)
pysam.tabix_index(outVcf, force=True, preset='vcf')
os.unlink(tmpVcf)
示例10: annotate_vcf
def annotate_vcf(self, inVcf, genomes, outVcf, emailAddress, JVMmemory=None):
"""
Annotate variants in VCF file with translation consequences using snpEff.
"""
if outVcf.endswith('.vcf.gz'):
tmpVcf = util.file.mkstempfname(prefix='vcf_snpEff-', suffix='.vcf')
elif outVcf.endswith('.vcf'):
tmpVcf = outVcf
else:
raise Exception("invalid input")
sortedAccessionString = ", ".join(sorted(genomes))
databaseId = hashlib.sha256(sortedAccessionString.encode('utf-8')).hexdigest()[:55]
genomeToUse = ""
# if we don't have the genome, by name (snpEff official) or by hash (custom)
if (not self.has_genome(databaseId)):
if (not self.has_genome(genomes[0])):
_log.info("Checking for snpEff database online...")
# check to see if it is available for download, and if so install it
for row in self.available_databases():
if (genomes[0].lower() in row['Genome'].lower()) or (
genomes[0].lower() in row['Bundle'].lower()
) or (
genomes[0].lower() in row['Organism'].lower()
):
self.download_db(row['Genome'])
# backward compatability for where a single genome name is provided
if self.has_genome(genomes[0]):
genomeToUse = genomes[0]
else:
# if the hash of the accessions passed in is not present in the genomes db
if not self.has_genome(databaseId):
self.create_db(genomes, emailAddress, JVMmemory)
if self.has_genome(databaseId):
genomeToUse = databaseId
if not genomeToUse:
raise Exception()
args = [
'-treatAllAsProteinCoding', 'false', '-t', '-noLog', '-ud', '0', '-noStats', '-noShiftHgvs', genomeToUse,
os.path.realpath(inVcf)
]
command_ps = self.execute('ann', args, JVMmemory=JVMmemory)
if command_ps.returncode == 0:
with open(tmpVcf, 'wt') as outf:
outf.write(command_ps.stdout.decode("utf-8"))
if outVcf.endswith('.vcf.gz'):
pysam.tabix_compress(tmpVcf, outVcf, force=True)
pysam.tabix_index(outVcf, force=True, preset='vcf')
os.unlink(tmpVcf)
else:
raise subprocess.CalledProcessError(cmd=command_ps.args, returncode=command_ps.returncode, output=command_ps.stdout)
示例11: run_nfr
def run_nfr(args):
"""run nfr calling
"""
if args.bam is None and args.ins_track is None:
raise Exception("Must supply either bam file or insertion track")
if not args.out:
args.out = '.'.join(os.path.basename(args.calls).split('.')[0:-3])
if args.fasta is not None:
chrs_fasta = read_chrom_sizes_from_fasta(args.fasta)
pwm = PWM.open(args.pwm)
chunks = ChunkList.read(args.bed, chromDict = chrs_fasta, min_offset = max(pwm.up, pwm.down))
else:
chunks = ChunkList.read(args.bed)
if args.bam is not None:
chrs_bam = read_chrom_sizes_from_bam(args.bam)
chunks.checkChroms(chrs_bam, chrom_source = "BAM file")
chunks.merge()
maxQueueSize = args.cores * 10
params = NFRParameters(args.occ_track, args.calls, args.ins_track, args.bam, max_occ = args.max_occ, max_occ_upper = args.max_occ_upper,
fasta = args.fasta, pwm = args.pwm)
sets = chunks.split(items = args.cores * 5)
pool1 = mp.Pool(processes = max(1,args.cores-1))
nfr_handle = open(args.out + '.nfrpos.bed','w')
nfr_handle.close()
nfr_queue = mp.JoinableQueue()
nfr_process = mp.Process(target = _writeNFR, args=(nfr_queue, args.out))
nfr_process.start()
if params.ins_track is None:
ins_handle = open(args.out + '.ins.bedgraph','w')
ins_handle.close()
ins_queue = mp.JoinableQueue()
ins_process = mp.Process(target = _writeIns, args=(ins_queue, args.out))
ins_process.start()
for j in sets:
tmp = pool1.map(_nfrHelper, zip(j,itertools.repeat(params)))
for result in tmp:
if params.ins_track is None:
nfr_queue.put(result[0])
ins_queue.put(result[1])
else:
nfr_queue.put(result)
pool1.close()
pool1.join()
nfr_queue.put('STOP')
nfr_process.join()
if params.ins_track is None:
ins_queue.put('STOP')
ins_process.join()
pysam.tabix_compress(args.out + '.nfrpos.bed', args.out + '.nfrpos.bed.gz',force = True)
shell_command('rm ' + args.out + '.nfrpos.bed')
pysam.tabix_index(args.out + '.nfrpos.bed.gz', preset = "bed", force = True)
if params.ins_track is None:
pysam.tabix_compress(args.out + '.ins.bedgraph', args.out + '.ins.bedgraph.gz', force = True)
shell_command('rm ' + args.out + '.ins.bedgraph')
pysam.tabix_index(args.out + '.ins.bedgraph.gz', preset = "bed", force = True)
示例12: testEmptyFileVCFGZWithoutIndex
def testEmptyFileVCFGZWithoutIndex(self):
with get_temp_context("tmp_testEmptyFileWithoutIndex.vcf") as fn:
with open(fn, "w"):
pass
pysam.tabix_compress(fn,
fn + ".gz",
force=True)
self.assertRaises(ValueError, pysam.VariantFile, fn + ".gz")
示例13: convert_VariantFile_to_IndexedVariantFile
def convert_VariantFile_to_IndexedVariantFile(vf_path, ivf_path):
make_basedir(ivf_path)
tmp_path = get_tmp_path(ivf_path)
pysam.tabix_compress(vf_path, tmp_path, force=True)
os.rename(tmp_path, ivf_path)
pysam.tabix_index(
filename=ivf_path, force=True,
seq_col=0, start_col=1, end_col=1, # note: `pysam.tabix_index` calls the first column `0`, but cmdline `tabix` calls it `1`.
line_skip=1, # skip header
)
示例14: run_merge
def run_merge(args):
if not args.out:
args.out = '.'.join(os.path.basename(args.nucpos).split('.')[0:-3])
occ = NucList.read(args.occpeaks, "occ", args.min_occ)
nuc = NucList.read(args.nucpos, "nuc", args.min_occ)
new = merge(occ, nuc, args.sep)
out = open(args.out + '.nucmap_combined.bed','w')
out.write(new.asBed())
out.close()
pysam.tabix_compress(args.out + '.nucmap_combined.bed', args.out + '.nucmap_combined.bed.gz',force = True)
shell_command('rm ' + args.out + '.nucmap_combined.bed')
pysam.tabix_index(args.out + '.nucmap_combined.bed.gz', preset = "bed", force = True)
示例15: testEmptyFileVCFGZ
def testEmptyFileVCFGZ(self):
with open("tmp_testEmptyFile.vcf", "w"):
pass
pysam.tabix_compress("tmp_testEmptyFile.vcf",
"tmp_testEmptyFile.vcf.gz")
self.assertRaises(ValueError, pysam.VariantFile,
"tmp_testEmptyFile.vcf.gz")
os.unlink("tmp_testEmptyFile.vcf")
os.unlink("tmp_testEmptyFile.vcf.gz")