当前位置: 首页>>代码示例>>Python>>正文


Python pysam.tabix_compress函数代码示例

本文整理汇总了Python中pysam.tabix_compress函数的典型用法代码示例。如果您正苦于以下问题:Python tabix_compress函数的具体用法?Python tabix_compress怎么用?Python tabix_compress使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了tabix_compress函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

def main():
    # Read options, args.
    parser = optparse.OptionParser()
    (options, args) = parser.parse_args()
    input_fname, output_fname = args

    pysam.tabix_compress(input_fname, output_fname, force=True)
开发者ID:ImmPortDB,项目名称:immport-galaxy,代码行数:7,代码来源:vcf_to_vcf_bgzip.py

示例2: make_bias_track

def make_bias_track(args, bases = 500000, splitsize = 1000):
    """function to compute bias track

    """
    if args.out is None:
        if args.bed is not None:
            args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
        else:
            args.out = '.'.join(os.path.basename(args.fasta).split('.')[0:-1])
    params = _BiasParams(args.fasta, args.pwm)
    if args.bed is None:
        chunks = ChunkList.convertChromSizes(params.chrs, splitsize = splitsize)
        sets = chunks.split(items = bases/splitsize)
    else:
        chunks = ChunkList.read(args.bed)
        chunks.merge()
        sets = chunks.split(bases = bases)
    maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks])))
    pool = mp.Pool(processes = max(1,args.cores-1))
    out_handle = open(args.out + '.Scores.bedgraph','w')
    out_handle.close()
    write_queue = mp.JoinableQueue(maxsize = maxQueueSize)
    write_process = mp.Process(target = _writeBias, args=(write_queue, args.out))
    write_process.start()
    for j in sets:
        tmp = pool.map(_biasHelper, zip(j,itertools.repeat(params)))
        for track in tmp:
            write_queue.put(track)
    pool.close()
    pool.join()
    write_queue.put('STOP')
    write_process.join()
    pysam.tabix_compress(args.out + '.Scores.bedgraph', args.out + '.Scores.bedgraph.gz', force = True)
    shell_command('rm ' + args.out + '.Scores.bedgraph')
    pysam.tabix_index(args.out + '.Scores.bedgraph.gz', preset = "bed", force = True)
开发者ID:kesteph,项目名称:NucleoATAC,代码行数:35,代码来源:make_bias_track.py

示例3: get_cov

def get_cov(args, bases = 50000, splitsize = 1000):
    """function to get coverages

    """
    if not args.out:
        if args.bed is None:
            args.out = '.'.join(os.path.basename(args.bam).split('.')[0:-1])
        else:
            args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
    if args.bed is None:
        chrs = read_chrom_sizes_from_bam(args.bam)
        chunks = ChunkList.convertChromSizes(chrs, splitsize = splitsize)
        sets = chunks.split(items = bases/splitsize)
    else:
        chunks = ChunkList.read(args.bed)
        chunks.merge()
        sets = chunks.split(bases = bases)
    maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks])))
    pool1 = mp.Pool(processes = max(1,args.cores-1))
    out_handle = open(args.out + '.cov.bedgraph','w')
    out_handle.close()
    write_queue = mp.JoinableQueue(maxsize = maxQueueSize)
    write_process = mp.Process(target = _writeCov, args=(write_queue, args.out))
    write_process.start()
    for j in sets:
        tmp = pool1.map(_covHelper, zip(j,itertools.repeat(args)))
        for track in tmp:
            write_queue.put(track)
    pool1.close()
    pool1.join()
    write_queue.put('STOP')
    write_process.join()
    pysam.tabix_compress(args.out + '.cov.bedgraph', args.out + '.cov.bedgraph.gz', force = True)
    shell_command('rm ' + args.out + '.cov.bedgraph')
    pysam.tabix_index(args.out + '.cov.bedgraph.gz', preset = "bed", force = True)
开发者ID:JordiAlbert,项目名称:NucleoATAC,代码行数:35,代码来源:get_cov.py

示例4: main

def main():
    # Read options, args.
    parser = optparse.OptionParser()
    parser.add_option('-c', '--chr-col', type='int', dest='chrom_col')
    parser.add_option('-s', '--start-col', type='int', dest='start_col')
    parser.add_option('-e', '--end-col', type='int', dest='end_col')
    parser.add_option('-P', '--preset', dest='preset')
    (options, args) = parser.parse_args()
    input_fname, output_fname = args

    tmpfile = tempfile.NamedTemporaryFile()
    sort_params = None

    if options.chrom_col and options.start_col and options.end_col:
        sort_params = [
            "sort",
            "-k%(i)s,%(i)s" % {'i': options.chrom_col},
            "-k%(i)i,%(i)in" % {'i': options.start_col},
            "-k%(i)i,%(i)in" % {'i': options.end_col}
        ]
    elif options.preset == "bed":
        sort_params = ["sort", "-k1,1", "-k2,2n", "-k3,3n"]
    elif options.preset == "vcf":
        sort_params = ["sort", "-k1,1", "-k2,2n"]
    elif options.preset == "gff":
        sort_params = ["sort", "-s", "-k1,1", "-k4,4n"]  # stable sort on start column
    # Skip any lines starting with "#" and "track"
    grepped = subprocess.Popen(["grep", "-e", "^\"#\"", "-e", "^track", "-v", input_fname], stderr=subprocess.PIPE, stdout=subprocess.PIPE)
    after_sort = subprocess.Popen(sort_params, stdin=grepped.stdout, stderr=subprocess.PIPE, stdout=tmpfile)
    grepped.stdout.close()
    output, err = after_sort.communicate()

    pysam.tabix_compress(tmpfile.name, output_fname, force=True)
开发者ID:ImmPortDB,项目名称:immport-galaxy,代码行数:33,代码来源:bgzip.py

示例5: _create_tabix

def _create_tabix(fname, ftype):
    logger = logging.getLogger("pita")
    tabix_file = ""
    logger.info("Creating tabix index for %s", os.path.basename(fname))
    logger.debug("Preparing %s for tabix", fname)
    tmp = NamedTemporaryFile(prefix="pita", delete=False)
    preset = "gff"
    if ftype == "bed":
        cmd = "sort -k1,1 -k2g,2 {0} | grep -v track | grep -v \"^#\" > {1}"
        preset = "bed"
    elif ftype in ["gff", "gff3", "gtf"]:
        cmd = "sort -k1,1 -k4g,4 {0} | grep -v \"^#\" > {1}"

    # Sort the input file
    logger.debug(cmd.format(fname, tmp.name))
    sp.call(cmd.format(fname, tmp.name), shell=True)
    # Compress using bgzip
    logger.debug("compressing %s", tmp.name)
    tabix_file = tmp.name + ".gz"
    pysam.tabix_compress(tmp.name, tabix_file)
    tmp.close()
    # Index (using tabix command line, as pysam.index results in a Segmentation fault
    logger.debug("indexing %s", tabix_file)
    sp.call("tabix {0} -p {1}".format(tabix_file, preset), shell=True)
    return tabix_file
开发者ID:Jorisvansteenbrugge,项目名称:pita,代码行数:25,代码来源:io.py

示例6: ensureIndexed

def ensureIndexed(bedPath, preset="bed", trySorting=True):
    if not bedPath.endswith(".gz"):
        if not os.path.exists(bedPath + ".gz"):
            logging.info("bgzf compressing {}".format(bedPath))
            pysam.tabix_compress(bedPath, bedPath + ".gz")
            if not os.path.exists(bedPath + ".gz"):
                raise Exception(
                    "Failed to create compress {preset} file for {file}; make sure the {preset} file is "
                    "sorted and the directory is writeable".format(preset=preset, file=bedPath)
                )
        bedPath += ".gz"
    if not os.path.exists(bedPath + ".tbi"):
        logging.info("creating tabix index for {}".format(bedPath))
        pysam.tabix_index(bedPath, preset=preset)
        if not os.path.exists(bedPath + ".tbi"):
            raise Exception(
                "Failed to create tabix index file for {file}; make sure the {preset} file is "
                "sorted and the directory is writeable".format(preset=preset, file=bedPath)
            )

    line = pysam.Tabixfile(bedPath).fetch().next()
    if len(line.strip().split("\t")) < 6 and preset == "bed":
        raise AnnotationError(
            "BED files need to have at least 6 (tab-delimited) fields (including "
            "chrom, start, end, name, score, strand; score is unused)"
        )
    if len(line.strip().split("\t")) < 9 and preset == "gff":
        raise AnnotationError("GFF/GTF files need to have at least 9 tab-delimited fields")

    return bedPath
开发者ID:apregier,项目名称:svviz,代码行数:30,代码来源:tabix.py

示例7: eff_vcf

    def eff_vcf(self, inVcf, outVcf, genome, java_flags='-Xmx2g',
            in_format='vcf', out_format='vcf', eff_options=''):
        """
        TODO: docstring here
        """
        if outVcf.endswith('.vcf.gz'):
            tmpVcf = util.file.mkstempfname(prefix='vcf_snpEff-', suffix='.vcf')
        else:
            tmpVcf = outVcf

        args = ' '.join([
                'eff',
                    '-c', '{}/snpEff.config'.format(self.executable_path()),
                    '-i', in_format,
                    '-o', out_format,
                    genome,
                    '-treatAllAsProteinCoding false',
                    '-noLog',
                    '-ud 0',
                    '-noStats',
                    eff_options
                ])

        if inVcf.endswith('.gz'):
            pre_pipe = "zcat {} | ".format(inVcf)
        else:
            pre_pipe = "cat {} | ".format(inVcf)
        post_pipe = " > {}".format(tmpVcf)
        self.execute(args, java_flags=java_flags, pre_pipe=pre_pipe,
                post_pipe=post_pipe)
        
        if outVcf.endswith('.vcf.gz'):
            pysam.tabix_compress(tmpVcf, outVcf, force=True)
            pysam.tabix_index(outVcf, force=True, preset='vcf')
            os.unlink(tmpVcf)
开发者ID:mlin,项目名称:viral-ngs,代码行数:35,代码来源:snpeff.py

示例8: testIndexPresetCompressed

    def testIndexPresetCompressed(self):
        '''test indexing via preset.'''

        pysam.tabix_compress(self.tmpfilename, self.tmpfilename + ".gz")
        pysam.tabix_index(self.tmpfilename + ".gz", preset=self.preset)
        checkBinaryEqual(self.tmpfilename + ".gz", self.filename)
        checkBinaryEqual(self.tmpfilename + ".gz.tbi", self.filename_idx)
开发者ID:humburg,项目名称:pysam,代码行数:7,代码来源:tabix_test.py

示例9: annotate_vcf

    def annotate_vcf(self, inVcf, genome, outVcf, JVMmemory=None):
        """
        Annotate variants in VCF file with translation consequences using snpEff.
        """
        if outVcf.endswith('.vcf.gz'):
            tmpVcf = util.file.mkstempfname(prefix='vcf_snpEff-', suffix='.vcf')
        elif outVcf.endswith('.vcf'):
            tmpVcf = outVcf
        else:
            raise Exception("invalid input")

        args = [
            '-treatAllAsProteinCoding', 'false',
            '-t',
            '-noLog',
            '-ud', '0',
            '-noStats',
            '-noShiftHgvs',
            genome,
            inVcf
            ]
        with open(tmpVcf, 'wt') as outf:
            self.execute('ann', args, JVMmemory=JVMmemory, stdout=outf)
        
        if outVcf.endswith('.vcf.gz'):
            pysam.tabix_compress(tmpVcf, outVcf, force=True)
            pysam.tabix_index(outVcf, force=True, preset='vcf')
            os.unlink(tmpVcf)
开发者ID:ACEGID-Senegal,项目名称:viral-ngs,代码行数:28,代码来源:snpeff.py

示例10: annotate_vcf

    def annotate_vcf(self, inVcf, genomes, outVcf, emailAddress, JVMmemory=None):
        """
        Annotate variants in VCF file with translation consequences using snpEff.
        """
        if outVcf.endswith('.vcf.gz'):
            tmpVcf = util.file.mkstempfname(prefix='vcf_snpEff-', suffix='.vcf')
        elif outVcf.endswith('.vcf'):
            tmpVcf = outVcf
        else:
            raise Exception("invalid input")

        sortedAccessionString = ", ".join(sorted(genomes))
        databaseId = hashlib.sha256(sortedAccessionString.encode('utf-8')).hexdigest()[:55]

        genomeToUse = ""

        # if we don't have the genome, by name (snpEff official) or by hash (custom)
        if (not self.has_genome(databaseId)):
            if (not self.has_genome(genomes[0])):
                _log.info("Checking for snpEff database online...")
                # check to see if it is available for download, and if so install it
                for row in self.available_databases():
                    if (genomes[0].lower() in row['Genome'].lower()) or (
                        genomes[0].lower() in row['Bundle'].lower()
                    ) or (
                        genomes[0].lower() in row['Organism'].lower()
                    ):
                        self.download_db(row['Genome'])

        # backward compatability for where a single genome name is provided
        if self.has_genome(genomes[0]):
            genomeToUse = genomes[0]
        else:
            # if the hash of the accessions passed in is not present in the genomes db
            if not self.has_genome(databaseId):
                self.create_db(genomes, emailAddress, JVMmemory)

            if self.has_genome(databaseId):
                genomeToUse = databaseId

        if not genomeToUse:
            raise Exception()

        args = [
            '-treatAllAsProteinCoding', 'false', '-t', '-noLog', '-ud', '0', '-noStats', '-noShiftHgvs', genomeToUse,
            os.path.realpath(inVcf)
        ]

        command_ps = self.execute('ann', args, JVMmemory=JVMmemory)
        if command_ps.returncode == 0:
            with open(tmpVcf, 'wt') as outf:
               outf.write(command_ps.stdout.decode("utf-8"))

            if outVcf.endswith('.vcf.gz'):
                pysam.tabix_compress(tmpVcf, outVcf, force=True)
                pysam.tabix_index(outVcf, force=True, preset='vcf')
                os.unlink(tmpVcf)
        else:
            raise subprocess.CalledProcessError(cmd=command_ps.args, returncode=command_ps.returncode, output=command_ps.stdout)
开发者ID:mypandos,项目名称:viral-ngs,代码行数:59,代码来源:snpeff.py

示例11: run_nfr

def run_nfr(args):
    """run nfr calling

    """
    if args.bam is None and args.ins_track is None:
        raise Exception("Must supply either bam file or insertion track")
    if not args.out:
        args.out = '.'.join(os.path.basename(args.calls).split('.')[0:-3])
    if args.fasta is not None:
        chrs_fasta = read_chrom_sizes_from_fasta(args.fasta)
        pwm = PWM.open(args.pwm)
        chunks = ChunkList.read(args.bed, chromDict = chrs_fasta, min_offset = max(pwm.up, pwm.down))
    else:
        chunks = ChunkList.read(args.bed)
    if args.bam is not None:
        chrs_bam = read_chrom_sizes_from_bam(args.bam)
        chunks.checkChroms(chrs_bam, chrom_source = "BAM file") 
    chunks.merge()
    maxQueueSize = args.cores * 10 
    params = NFRParameters(args.occ_track, args.calls, args.ins_track, args.bam, max_occ = args.max_occ, max_occ_upper = args.max_occ_upper,
                            fasta = args.fasta, pwm = args.pwm)
    sets = chunks.split(items = args.cores * 5)
    pool1 = mp.Pool(processes = max(1,args.cores-1))
    nfr_handle = open(args.out + '.nfrpos.bed','w')
    nfr_handle.close()
    nfr_queue = mp.JoinableQueue()
    nfr_process = mp.Process(target = _writeNFR, args=(nfr_queue, args.out))
    nfr_process.start()
    if params.ins_track is None:
        ins_handle = open(args.out + '.ins.bedgraph','w')
        ins_handle.close()
        ins_queue = mp.JoinableQueue()
        ins_process = mp.Process(target = _writeIns, args=(ins_queue, args.out))
        ins_process.start()
    for j in sets:
        tmp = pool1.map(_nfrHelper, zip(j,itertools.repeat(params)))
        for result in tmp:
            if params.ins_track is None:
                nfr_queue.put(result[0])
                ins_queue.put(result[1])
            else:
                nfr_queue.put(result)
    pool1.close()
    pool1.join()
    nfr_queue.put('STOP')
    nfr_process.join()
    if params.ins_track is None:
        ins_queue.put('STOP')
        ins_process.join()
    pysam.tabix_compress(args.out + '.nfrpos.bed', args.out + '.nfrpos.bed.gz',force = True)
    shell_command('rm ' + args.out + '.nfrpos.bed')
    pysam.tabix_index(args.out + '.nfrpos.bed.gz', preset = "bed", force = True)
    if params.ins_track is None:
        pysam.tabix_compress(args.out + '.ins.bedgraph', args.out + '.ins.bedgraph.gz', force = True)
        shell_command('rm ' + args.out + '.ins.bedgraph')
        pysam.tabix_index(args.out + '.ins.bedgraph.gz', preset = "bed", force = True)
开发者ID:JordiAlbert,项目名称:NucleoATAC,代码行数:56,代码来源:run_nfr.py

示例12: testEmptyFileVCFGZWithoutIndex

    def testEmptyFileVCFGZWithoutIndex(self):
        with get_temp_context("tmp_testEmptyFileWithoutIndex.vcf") as fn:
            with open(fn, "w"):
                pass

            pysam.tabix_compress(fn,
                                 fn + ".gz",
                                 force=True)

            self.assertRaises(ValueError, pysam.VariantFile, fn + ".gz")
开发者ID:msto,项目名称:pysam,代码行数:10,代码来源:VariantFile_test.py

示例13: convert_VariantFile_to_IndexedVariantFile

def convert_VariantFile_to_IndexedVariantFile(vf_path, ivf_path):
    make_basedir(ivf_path)
    tmp_path = get_tmp_path(ivf_path)
    pysam.tabix_compress(vf_path, tmp_path, force=True)
    os.rename(tmp_path, ivf_path)

    pysam.tabix_index(
        filename=ivf_path, force=True,
        seq_col=0, start_col=1, end_col=1, # note: `pysam.tabix_index` calls the first column `0`, but cmdline `tabix` calls it `1`.
        line_skip=1, # skip header
    )
开发者ID:statgen,项目名称:pheweb,代码行数:11,代码来源:file_utils.py

示例14: run_merge

def run_merge(args):
    if not args.out:
        args.out = '.'.join(os.path.basename(args.nucpos).split('.')[0:-3])
    occ = NucList.read(args.occpeaks, "occ", args.min_occ)
    nuc = NucList.read(args.nucpos, "nuc", args.min_occ)
    new = merge(occ, nuc, args.sep)
    out = open(args.out + '.nucmap_combined.bed','w')
    out.write(new.asBed())
    out.close()
    pysam.tabix_compress(args.out + '.nucmap_combined.bed', args.out + '.nucmap_combined.bed.gz',force = True)
    shell_command('rm ' + args.out + '.nucmap_combined.bed')
    pysam.tabix_index(args.out + '.nucmap_combined.bed.gz', preset = "bed", force = True)
开发者ID:JordiAlbert,项目名称:NucleoATAC,代码行数:12,代码来源:merge.py

示例15: testEmptyFileVCFGZ

    def testEmptyFileVCFGZ(self):
        with open("tmp_testEmptyFile.vcf", "w"):
            pass

        pysam.tabix_compress("tmp_testEmptyFile.vcf",
                             "tmp_testEmptyFile.vcf.gz")

        self.assertRaises(ValueError, pysam.VariantFile,
                          "tmp_testEmptyFile.vcf.gz")

        os.unlink("tmp_testEmptyFile.vcf")
        os.unlink("tmp_testEmptyFile.vcf.gz")
开发者ID:nickwork,项目名称:pysam,代码行数:12,代码来源:VariantFile_test.py


注:本文中的pysam.tabix_compress函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。