当前位置: 首页>>代码示例>>Python>>正文


Python pysam.faidx函数代码示例

本文整理汇总了Python中pysam.faidx函数的典型用法代码示例。如果您正苦于以下问题:Python faidx函数的具体用法?Python faidx怎么用?Python faidx使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了faidx函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: gatk_realigner

def gatk_realigner(align_bam, ref_file, config, dbsnp=None, region=None,
                   out_file=None, deep_coverage=False):
    """Realign a BAM file around indels using GATK, returning sorted BAM.
    """
    runner = broad.runner_from_config(config)
    runner.run_fn("picard_index", align_bam)
    runner.run_fn("picard_index_ref", ref_file)
    if not os.path.exists("%s.fai" % ref_file):
        pysam.faidx(ref_file)
    if region:
        align_bam = subset_bam_by_region(align_bam, region, out_file)
        runner.run_fn("picard_index", align_bam)
    if has_aligned_reads(align_bam, region):
        variant_regions = config["algorithm"].get("variant_regions", None)
        realign_target_file = gatk_realigner_targets(runner, align_bam,
                                                     ref_file, dbsnp, region,
                                                     out_file, deep_coverage,
                                                     variant_regions)
        realign_bam = gatk_indel_realignment(runner, align_bam, ref_file,
                                             realign_target_file, region,
                                             out_file, deep_coverage)
        # No longer required in recent GATK (> Feb 2011) -- now done on the fly
        # realign_sort_bam = runner.run_fn("picard_fixmate", realign_bam)
        return realign_bam
    elif out_file:
        shutil.copy(align_bam, out_file)
        return out_file
    else:
        return align_bam
开发者ID:brentp,项目名称:bcbio-nextgen,代码行数:29,代码来源:realign.py

示例2: __init__

    def __init__(self, filename):
        if not os.path.exists(filename + '.fai'):
            import pysam
            pysam.faidx(filename)

        self.fasta = open(filename)
        self.index = self.load_index(filename + '.fai')
开发者ID:hyeshik,项目名称:tailseeker,代码行数:7,代码来源:sequtils.py

示例3: extracAllels

def extracAllels(chrom, vcf_coord, var_descr, genref):
    '''compute alternative allel from description in bed file.
    must be one of sub(C->T), ins(CCCT), del(5)
    '''
    ref_allel = pysam.faidx(genref, chrom+':'+vcf_coord+'-'+vcf_coord)[1].strip()
    if 'sub' in var_descr:
        yy = var_descr.split('->')
        ref = yy[0][-1]
        if ref == ref_allel:
            return '\t'.join([ref, yy[1][0]])
        else:
            print 'ref allels do not match, exiting...'
            print chrom, vcf_coord, var_descr
            sys.exit(1)
    elif 'ins' in var_descr:
        yy = var_descr.split('(')[1]
        return '\t'.join([ref_allel, ref_allel + yy[:-1]])
    elif 'del' in var_descr:
        yy = var_descr.split('(')[1]
        del_len = int(yy[:-1])
        vcf_coord_end = str(int(vcf_coord) + int(del_len))
        ref = pysam.faidx(genref, chrom+':'+vcf_coord+'-'+vcf_coord_end)[1].strip()
        if ref[0] == ref_allel:
            return '\t'.join([ref, ref_allel])
        else:
            print 'ref allels do not match in del, exiting...'
            print chrom, vcf_coord, var_descr
            sys.exit(1)
    else:
        print 'format not found, exiting...'
        sys.exit(1)
开发者ID:asalomatov,项目名称:misc_scripts,代码行数:31,代码来源:ios2table.py

示例4: bed_tofasta

def bed_tofasta(bed, ref_fasta, min_size=50, stranded=True, include_name=False, out=sys.stdout):
    if not os.path.exists('%s.fai' % ref_fasta):
        pysam.faidx(ref_fasta)

    fasta = pysam.Fastafile(ref_fasta)

    refs = set()
    with open('%s.fai' % ref_fasta) as f:
        for line in f:
            refs.add(line.split('\t')[0].strip())

    name = ''
    for region in bed:
        if include_name:
            name = '%s|' % (region.name.strip())

        if region.end - region.start >= min_size and region.chrom in refs:
            seq = fasta.fetch(region.chrom, region.start, region.end)
            if stranded and region.strand:
                if region.strand == '-':
                    seq = revcomp(seq)
                out.write('>%s%s:%d-%d[%s]\n%s\n' % (name, region.chrom, region.start, region.end, region.strand, seq))
            else:
                out.write('>%s%s:%d-%d%s\n%s\n' % (name, region.chrom, region.start, region.end, seq))

    fasta.close()
开发者ID:dickgroenenberg,项目名称:ngsutils,代码行数:26,代码来源:tofasta.py

示例5: resolved_tool_contract_runner

def resolved_tool_contract_runner(resolved_contract):
    rc = resolved_contract
    alignment_path = rc.task.input_files[0]
    reference_path = rc.task.input_files[1]
    gff_path = rc.task.output_files[0]
    vcf_path = rc.task.output_files[1]
    dataset_path = rc.task.output_files[2]
    fasta_path = re.sub(".contigset.xml", ".fasta", dataset_path)
    fastq_path = rc.task.output_files[3]
    args = [
        alignment_path,
        "--verbose",
        "--reference", reference_path,
        "--outputFilename", gff_path,
        "--outputFilename", fasta_path,
        "--outputFilename", fastq_path,
        "--outputFilename", vcf_path,
        "--numWorkers", str(rc.task.nproc),
        "--minCoverage", str(rc.task.options[Constants.MIN_COVERAGE_ID]),
        "--minConfidence", str(rc.task.options[Constants.MIN_CONFIDENCE_ID]),
        "--maskRadius", str(Constants.DEFAULT_MASK_RADIUS) if \
                        bool(rc.task.options[Constants.MASKING_ID]) else "0",
        "--algorithm", rc.task.options[Constants.ALGORITHM_ID],
        "--alignmentSetRefWindows",
    ]
    args_ = get_parser().arg_parser.parser.parse_args(args)
    rc = args_runner(args_)
    if rc == 0:
        pysam.faidx(fasta_path)
        ds = ContigSet(fasta_path, strict=True)
        ds.write(dataset_path)
    return rc
开发者ID:PacificBiosciences,项目名称:GenomicConsensus,代码行数:32,代码来源:main.py

示例6: generate_data_files

def generate_data_files(dir_name=None):
    import logging
    logging.basicConfig(level=logging.INFO)
    if dir_name is not None:
        os.chdir(dir_name)
    with open("tst1.fasta", "w") as f:
        f.write(">ecoliK12_pbi_March2013_2955000_to_2980000\n")
        f.write("AAAGAGAGAG" * 2500)
    pysam.faidx("tst1.fasta")
    for i in range(len(sam_strings)):
        sam_file = "tst_%d_subreads.sam" % (i + 1)
        bam_file = "tst_%d_subreads.bam" % (i + 1)
        with open(sam_file, "w") as sam_out:
            sam_out.write(sam_strings[i])
        logging.info("Converting {s} to BAM".format(s=sam_file))
        # FIXME pysam is way broken - can't handle unmapped input?
        # convert to bam using pysam
        # with pysam.AlignmentFile(sam_file, "r", check_sq=False) as sam_in:
        #    with pysam.AlignmentFile(bam_file, "wb",
        #                             template=sam_in) as bam_out:
        #        for s in sam_in:
        #            bam_out.write(s)
        args = ["samtools", "view", "-b", "-o", bam_file, sam_file]
        assert subprocess.call(args) == 0, args
        os.remove(sam_file)
        # XXX don't create .pbi for this file, we want it to be absent
        if bam_file != "tst_2_subreads.bam":
            logging.info("Indexing {b}".format(b=bam_file))
            subprocess.call(["pbindex", bam_file])
开发者ID:mpkocher,项目名称:pbcoretools,代码行数:29,代码来源:test_pbvalidate_bam.py

示例7: run

    def run(self):
        AbstractAnalysis.run(self) #Call base method to do some logging
        localBamFile = os.path.join(self.getLocalTempDir(), "mapping.bam")
        localSortedBamFile = os.path.join(self.getLocalTempDir(), "mapping.sorted")

        samToBamFile(self.samFile, localBamFile)
        pysam.sort(localBamFile, localSortedBamFile)
        pysam.index(localSortedBamFile + ".bam")
        pysam.faidx(self.referenceFastaFile)
        
        file_header = self.readFastqFile.split(".fastq")[0].split("/")[-1] +  "_" + self.referenceFastaFile.split(".fa")[0].split("/")[-1]
        consensus_vcf = os.path.join(self.outputDir, file_header + "_Consensus.vcf")
        consensus_fastq = os.path.join(self.outputDir, file_header + "_Consensus.fastq")

        system("samtools mpileup -Q 0 -uf %s %s | bcftools view -cg - > %s" \
                % (self.referenceFastaFile, localSortedBamFile + ".bam", consensus_vcf))
        system("vcfutils.pl vcf2fq %s > %s" % (consensus_vcf, consensus_fastq))
        system("rm -rf %s" % (self.referenceFastaFile + ".fai"))
        
        formatted_consensus_fastq = os.path.join(self.getLocalTempDir(), "Consensus.fastq")
        
        formatConsensusFastq(consensus_fastq, formatted_consensus_fastq)
        system("mv %s %s" % (formatted_consensus_fastq, consensus_fastq))
        
        self.finish()
开发者ID:mitenjain,项目名称:nanopore,代码行数:25,代码来源:consensus.py

示例8: merge_mut2

def merge_mut2(mutation_file_list, output_file, reference):


    mut2sample = {}
    sample_ind = 0
    for mut_file in mutation_file_list:
        sample_ind = sample_ind + 1
        is_vcf = True if mut_file.endswith(".vcf") or mut_file.endswith(".vcf.gz") else False
        hin2 = gzip.open(mut_file, 'r') if mut_file.endswith(".gz") else open(mut_file, 'r')

        for line2 in hin2:
            F2 = line2.rstrip('\n').split('\t')
            if F2[0].startswith('#'): continue
            if F2[0] == "Chr": continue

            if is_vcf == False:
                pos, ref, alt = F2[1], F2[3], F2[4]
        
                # insertion
                if F2[3] == "-":
                    # get the sequence for the reference base
                    seq = ""    
                    for item in pysam.faidx(reference, F2[0] + ":" + str(F2[1]) + "-" + str(F2[1])):
                        seq = seq + item.rstrip('\n')
                    seq = seq.replace('>', '')
                    seq = seq.replace(F2[0] + ":" + str(F2[1]) + "-" + str(F2[1]), '')
                    ref, alt = seq, seq + F2[4]

                # deletion
                if F2[4] == "-":
                    # get the sequence for the reference base
                    seq = ""    
                    for item in pysam.faidx(reference, F2[0] + ":" + str(int(F2[1]) - 1) + "-" + str(int(F2[1]) - 1)):
                        seq = seq + item.rstrip('\n')
                    seq = seq.replace('>', '')
                    seq = seq.replace(F2[0] + ":" + str(int(F2[1]) - 1) + "-" + str(int(F2[1]) - 1), '')
                    pos, ref, alt = str(int(F2[1]) - 1), seq + F2[3], seq

                QUAL = 60
                INFO = "SOMATIC"

                key = '\t'.join([F2[0], pos, '.', ref, alt, str(QUAL), "PASS", INFO])

            else:

                key = '\t'.join(F2[0:8])

            if key not in mut2sample:
                mut2sample[key] = []

            mut2sample[key].append(str(sample_ind))

    sample_num = sample_ind

    hout = open(output_file, 'w')
    for mut in sorted(mut2sample):
        if len(mut2sample[mut]) == sample_num: continue
        print >> hout, mut + '\t' + ','.join(mut2sample[mut])

    hout.close()
开发者ID:Genomon-Project,项目名称:GenomonSplicingMutation,代码行数:60,代码来源:preprocess.py

示例9: resolved_tool_contract_runner

def resolved_tool_contract_runner(resolved_contract):
    rc = resolved_contract
    alignment_path = rc.task.input_files[0]
    reference_path = rc.task.input_files[1]
    gff_path = rc.task.output_files[0]
    dataset_path = rc.task.output_files[1]
    fasta_path = re.sub(".contigset.xml", ".fasta", dataset_path)
    fastq_path = rc.task.output_files[2]
    args = [
        alignment_path,
        "--verbose",
        "--reference", reference_path,
        "--outputFilename", gff_path,
        "--outputFilename", fasta_path,
        "--outputFilename", fastq_path,
        "--numWorkers", str(rc.task.nproc),
        "--minCoverage", str(rc.task.options[Constants.MIN_COVERAGE_ID]),
        "--minConfidence", str(rc.task.options[Constants.MIN_CONFIDENCE_ID]),
        "--algorithm", rc.task.options[Constants.ALGORITHM_ID],
        "--alignmentSetRefWindows",
    ]
    if rc.task.options[Constants.DIPLOID_MODE_ID]:
        args.append("--diploid")
    args_ = get_parser().arg_parser.parser.parse_args(args)
    rc = args_runner(args_)
    if rc == 0:
        pysam.faidx(fasta_path)
        ds = ContigSet(fasta_path, strict=True)
        ds.write(dataset_path)
    return rc
开发者ID:lpp1985,项目名称:lpp_Script,代码行数:30,代码来源:main.py

示例10: run

def run(referenceset, fastq, gff, fasta, contigset, alignmentset, options, log_level):
    #'--log-file foo.log',
    #'--verbose',
    #'--debug', # requires 'ipdb'
    #'-j NWORKERS',
    #'--algorithm quiver',
    #'--diploid', # binary
    #'--minConfidence 40',
    #'--minCoverage 5',
    #'--alignmentSetRefWindows',
    cmd = "variantCaller --log-level {log_level} {options} --referenceFilename {referenceset} -o {fastq} -o {gff} -o {fasta} {alignmentset}"
    system(cmd.format(**locals()))
    try:
        say('Converting fasta {!r} to contigset {!r}'.format(fasta, contigset))
        # Convert to contigset.xml

        import pysam
        pysam.faidx(fasta) # pylint: disable=no-member
        # I do not know why pylint does not see this defined.

        ds = ContigSet(fasta, strict=True)
        ds.write(contigset, relPaths=True)
        say('Successfully wrapped fasta {!r} in contigset {!r}'.format(fasta, contigset))
    except Exception:
        say(traceback.format_exc())
        say('Skipping conversion to contigset.')
开发者ID:PacificBiosciences,项目名称:FALCON-polish,代码行数:26,代码来源:run_variantCaller.py

示例11: fetch_file

def fetch_file(options):
    if len(options) != 4:
        sys.exit('fetch_ucsc.py hg19/hg38/mm10 ref/kg/ens/fa out')
    if options[1] in {'hg19', 'hg38', 'mm10'}:
        path = 'http://hgdownload.soe.ucsc.edu/goldenPath/%s/' % options[1]
    else:
        sys.exit('Only support human or mouse!')
    s = string.maketrans(' ', '_')
    if options[2] == 'ref':  # RefSeq gene annotations
        urllib.urlretrieve(path + 'database/refFlat.txt.gz', 'refFlat.txt.gz')
        with open(options[3], 'w') as outf:
            outf.write(gzip.open('refFlat.txt.gz', 'rb').read())
    elif options[2] == 'kg':  # KnownGenes gene annotations
        urllib.urlretrieve(path + 'database/knownGene.txt.gz',
                           'knownGene.txt.gz')
        urllib.urlretrieve(path + 'database/kgXref.txt.gz', 'kgXref.txt.gz')
        kg_iso = {}
        with gzip.open('kgXref.txt.gz', 'rb') as kg_id_f:
            for line in kg_id_f:
                iso = line.split('\t')[0]
                gene = line.split('\t')[4].translate(s)
                kg_iso[iso] = gene
        with gzip.open('knownGene.txt.gz', 'rb') as kg_f:
            with open(options[3], 'w') as outf:
                for line in kg_f:
                    entry = line.split('\t')
                    iso = entry[0]
                    outf.write('\t'.join([kg_iso[iso]] + entry[:10]) + '\n')
    elif options[2] == 'ens':  # Ensembl gene annotations
        if options[1] == 'hg38':
            sys.exit('No Ensembl gene annotations for hg38!')
        urllib.urlretrieve(path + 'database/ensGene.txt.gz', 'ensGene.txt.gz')
        urllib.urlretrieve(path + 'database/ensemblToGeneName.txt.gz',
                           'ensemblToGeneName.txt.gz')
        ens_iso = {}
        with gzip.open('ensemblToGeneName.txt.gz', 'rb') as ens_id_f:
            for line in ens_id_f:
                iso, gene = line.split()
                ens_iso[iso] = gene
        with gzip.open('ensGene.txt.gz', 'rb') as ens_f:
            with open(options[3], 'w') as outf:
                for line in ens_f:
                    entry = line.split()
                    iso = entry[1]
                    outf.write('\t'.join([ens_iso[iso]] + entry[1:11]) + '\n')
    elif options[2] == 'fa':  # Genome sequences
        if options[1] == 'hg38':
            fa_path = 'bigZips/hg38.chromFa.tar.gz'
        else:
            fa_path = 'bigZips/chromFa.tar.gz'
        urllib.urlretrieve(path + fa_path, 'chromFa.tar.gz')
        with tarfile.open('chromFa.tar.gz', 'r:gz') as fa:
            with open(options[3], 'w') as outf:
                for f in fa:
                    if f.isfile():
                        outf.write(fa.extractfile(f).read())
        pysam.faidx(options[3])
    else:
        sys.exit('Only support ref/kg/ens/fa!')
开发者ID:BioXiao,项目名称:CIRCexplorer2,代码行数:59,代码来源:fetch_ucsc.py

示例12: write_fa_subset

def write_fa_subset(seq_names, infile, outfile):
    if not os.path.exists(infile + '.fai'):
        pysam.faidx(infile)

    f = pyfastaq.utils.open_file_write(outfile)
    for name in seq_names:
        print(pysam.faidx(infile, name), end='', file=f)
    pyfastaq.utils.close(f)
开发者ID:andrewjpage,项目名称:ariba,代码行数:8,代码来源:faidx.py

示例13: check_fasta

def check_fasta(fa_f, pysam_flag=True):
    if not os.path.isfile(fa_f + '.fai'):
        pysam.faidx(fa_f)
    if pysam_flag:  # return pysam FastaFile object
        fa = pysam.FastaFile(fa_f)
        return fa
    else:  # return fasta file path
        return fa_f
开发者ID:YangLab,项目名称:CIRCexplorer2,代码行数:8,代码来源:parser.py

示例14: _generate_chunk_output_file

 def _generate_chunk_output_file(self, i=None):
     fn = tempfile.NamedTemporaryFile(suffix=".fasta").name
     suffix = "|arrow"
     with open(fn, "w") as f:
         header, seq = self.CHUNK_CONTIGS[i]
         f.write(">{h}{s}\n{q}".format(h=header, s=suffix, q=seq))
     pysam.faidx(fn)
     return self._make_dataset_file(fn)
开发者ID:mpkocher,项目名称:pbcoretools,代码行数:8,代码来源:test_tasks_scatter_gather.py

示例15: __init__

    def __init__(self, num, refname):
        self.num = int(num)
        self.refname = refname

        if not os.path.exists('%s.fai' % refname):
            pysam.faidx(refname)

        self.ref = pysam.Fastafile(refname)
开发者ID:pjbriggs,项目名称:tools-iuc,代码行数:8,代码来源:filter.py


注:本文中的pysam.faidx函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。