当前位置: 首页>>代码示例>>Python>>正文


Python pysam.index函数代码示例

本文整理汇总了Python中pysam.index函数的典型用法代码示例。如果您正苦于以下问题:Python index函数的具体用法?Python index怎么用?Python index使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了index函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: map_paired_reads

def map_paired_reads(pe1_path, pe2_path, genome_path, output_path, args):
    work_dir = tempfile.mkdtemp( )
    genome_db = os.path.join( work_dir, "genome" )
    pe1_output = os.path.join( work_dir, "pe1.sai" )
    pe2_output = os.path.join( work_dir, "pe2.sai" )
    bwa_output = os.path.join( work_dir, "output.sam" )
    
    null = open( "/dev/null" ) #open("/tmp/bwa_out")#
    subprocess.check_call( [ "bwa", "index", "-p", genome_db, genome_path ], stderr = null )
    with open( pe1_output, "w" ) as pe1_file:
        subprocess.check_call( [ "bwa", "aln", genome_db, pe1_path ], stdout = pe1_file, stderr = null )
    
    with open( pe2_output, "w" ) as pe2_file:
        subprocess.check_call( [ "bwa", "aln", genome_db, pe2_path ], stdout = pe2_file, stderr = null )
    
    with open( bwa_output, "w" ) as bwa_file:
        subprocess.check_call( [ "bwa", "sampe",
                                "-r", "@RG\tID:ILLUMINA\tSM:48_2\tPL:ILLUMINA\tLB:LIB1",
                                genome_db,
                                pe1_output, pe2_output,
                                pe1_path, pe2_path ], stdout = bwa_file, stderr = null )
 


    if args.sam:
        shutil.move(bwa_output ,output_path+'.sam')
        #os.rename(bwa_output ,output_path+'.sam')
    else:
        sam_to_bam( bwa_output, bwa_output + ".bam" )
        if args.sort:
            # coordinate sort the file
            pysam.sort( bwa_output + ".bam", output_path )
            pysam.index(output_path+'.bam')
        else:
            shutil.move(bwa_output +".bam",output_path+'.bam')
开发者ID:ksahlin,项目名称:generate_assembly,代码行数:35,代码来源:align.py

示例2: bwa_mem

def bwa_mem(pe1_path, pe2_path, genome_path, threads, output_path):
    print 'Aligning with bwa mem'
    start = time()
    work_dir = tempfile.mkdtemp()
    genome_db = os.path.join(work_dir, "genome")
    pe1_output = os.path.join(work_dir, "pe1.sai")
    pe2_output = os.path.join(work_dir, "pe2.sai")
    bwa_output = os.path.join(work_dir, "output.sam")
    stderr_file = open(output_path+'.bwa.1','w')

    #null = open("/dev/null")
    subprocess.check_call([ "bwa", "index", "-p", genome_db, genome_path ], stderr=stderr_file)
    with open(bwa_output, "w") as bwa_file:
        subprocess.check_call([ "bwa", "mem", "-t", threads,
                                genome_db, pe1_path, pe2_path ],
                              stdout=bwa_file,
                              stderr=stderr_file)

    elapsed = time() - start
    print 'Time elapsed for bwa mem: ', elapsed
    sam_to_bam(bwa_output, bwa_output + ".bam")
    pysam.sort(bwa_output + ".bam", output_path)
    pysam.index(output_path + '.bam')
   
    shutil.rmtree(work_dir)
开发者ID:sidorov-si,项目名称:BESST,代码行数:25,代码来源:reads_to_ctg_map.py

示例3: _generate_empty_bam_file

 def _generate_empty_bam_file(self, sam_path, bam_path_prefix):
     samfile = pysam.Samfile(sam_path, "r")
     bamfile = pysam.Samfile(
         "%s.bam" % bam_path_prefix, "wb", header=samfile.header)
     bamfile.close()
     samfile.close()
     pysam.index("%s.bam" % bam_path_prefix)
开发者ID:fengwangjiang,项目名称:READemption,代码行数:7,代码来源:sambamconverter.py

示例4: addReadGroupSet

    def addReadGroupSet(self, datasetName, filePath, moveMode):
        """
        Add a read group set to the repo
        """
        # move the bam file
        self._check()
        self._checkDataset(datasetName)
        self._checkFile(filePath, self.bamExtension)
        fileName = os.path.basename(filePath)
        readGroupSetName = filenameWithoutExtension(
            fileName, self.bamExtension)
        destPath = os.path.join(
            self._repoPath, self.datasetsDirName, datasetName,
            self.readsDirName, fileName)
        self._assertPathEmpty(destPath, inRepo=True)
        self._moveFile(filePath, destPath, moveMode)

        # move the index file if it exists, otherwise do indexing
        indexPath = os.path.join(
            os.path.split(filePath)[0],
            readGroupSetName + self.bamIndexExtension)
        indexMessage = ""
        if os.path.exists(indexPath):
            dstDir = os.path.split(destPath)[0]
            self._moveFile(
                indexPath,
                os.path.join(dstDir, os.path.basename(indexPath)),
                moveMode)
        else:
            pysam.index(destPath.encode('utf-8'))
            indexMessage = " (and indexed)"

        # finish
        self._repoEmit("ReadGroupSet '{}' added to dataset '{}'{}".format(
            fileName, datasetName, indexMessage))
开发者ID:bosz,项目名称:server,代码行数:35,代码来源:repo_manager.py

示例5: run_cufflinks

def run_cufflinks(org_db, num_cpus=4):
    """
    run cufflinks program on mapped reads 
    """

    try:
        subprocess.call(["cufflinks"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    except:
        exit("Please make sure that the `Cufflinks` binary is in your $PATH")
    
    org_name = org_db['short_name'] 
    print("preparing for cufflinks run for organism %s" % org_name)

    min_intron_length = 20
    min_isoform_frac = 0.25
    max_intron_length = org_db['max_intron_len']
    result_dir = org_db['read_assembly_dir']

    bam_file = "%s/%s_Aligned_mmr_sortbyCoord.bam" % (org_db['read_map_dir'], org_name)
    if not os.path.isfile(bam_file):
        sys.stdout.write("failed to fetch sorted mmr BAM file for organism: %s, trying to get the mmr file...\n" % org_name)
        bam_file = "%s/%s_Aligned_mmr.bam" % (org_db['read_map_dir'], org_name)
        if not os.path.isfile(bam_file):
            exit("error: failed to fetch mmr BAM file for organism %s" % org_name)
        
        ## sorting, indexing the bam file 
        file_prefix, ext = os.path.splitext(bam_file)
        sorted_bam = "%s_sortbyCoord" % file_prefix

        sys.stdout.write("trying to sort based by the coordinates with output prefix as: %s\n" % sorted_bam)
        if not os.path.isfile("%s.bam" % sorted_bam):
            pysam.sort(bam_file, sorted_bam)
            
        bam_file = "%s.bam" % sorted_bam

    print('using bam file from %s' % bam_file)
    if not os.path.exists(bam_file + ".bai"):
        pysam.index(bam_file) 

    ## always use quiet mode to avoid problems with storing log output.
    cli_cuff = "cufflinks -q --no-update-check \
        -F %.2f \
        -I %d \
        --min-intron-length %d \
        --library-type fr-unstranded \
        -p %d \
        -o %s \
        %s" % (min_isoform_frac, max_intron_length, min_intron_length, num_cpus, result_dir, bam_file)
  
    sys.stdout.write('\trun cufflinks as: %s \n' % cli_cuff)
    try:
        os.chdir(result_dir)
        process = subprocess.Popen(cli_cuff, shell=True) 
        returncode = process.wait()

        if returncode !=0:
            raise Exception, "Exit status return code = %i" % returncode

    except Exception, e:
        print 'Error running cufflinks.\n%s' %  str( e )
开发者ID:vipints,项目名称:genomeutils,代码行数:60,代码来源:transcript_assembly.py

示例6: bwa_sampe

def bwa_sampe(pe1_path, pe2_path, genome_path, output_path):
    print 'Aligning with bwa aln/sampe'
    start = time()
    work_dir = tempfile.mkdtemp()
    genome_db = os.path.join(work_dir, "genome")
    pe1_output = os.path.join(work_dir, "pe1.sai")
    pe2_output = os.path.join(work_dir, "pe2.sai")
    bwa_output = os.path.join(work_dir, "output.sam")

    null = open("/dev/null")
    subprocess.check_call([ "bwa", "index", "-p", genome_db, genome_path ], stderr=null)
    with open(pe1_output, "w") as pe1_file:
        subprocess.check_call([ "bwa", "aln", genome_db, pe1_path ], stdout=pe1_file, stderr=null)

    with open(pe2_output, "w") as pe2_file:
        subprocess.check_call([ "bwa", "aln", genome_db, pe2_path ], stdout=pe2_file, stderr=null)

    with open(bwa_output, "w") as bwa_file:
        subprocess.check_call([ "bwa", "sampe",
                                genome_db,
                                pe1_output, pe2_output,
                                pe1_path, pe2_path ], stdout=bwa_file, stderr=null)

    elapsed = time() - start
    print 'Time elapsed for bwa aln/sampe: ', elapsed

    sam_to_bam(bwa_output, bwa_output + ".bam")
    pysam.sort(bwa_output + ".bam", output_path)
    pysam.index(output_path + '.bam')
开发者ID:jvhaarst,项目名称:BESST,代码行数:29,代码来源:reads_to_ctg_map.py

示例7: populate

 def populate(self, sam_file_name, minimum_alignment_score):
     if self.contig == "":
         RuntimeError("contig must be set before reading a bam file")
     if self.contig[0]==">":
         current_contig_to_analyse = self.contig.lstrip('>') #Necessary because there is no ">" in the bam file...
     else:
         current_contig_to_analyse = self.contig
     sys.stderr.write("Loading file %s\n" %sam_file_name)
     samfile = pysam.Samfile(sam_file_name, 'rb')
     if not samfile._hasIndex(): #if no index, we must build it
         samfile.close()
         sys.stderr.write("Building index for %s\n" % sam_file_name)
         pysam.index(sam_file_name)
         samfile = pysam.Samfile(sam_file_name, 'rb')
     if self.position-3 < 0:
         sys.stderr.write("%s position %s. I have problem computing this position\n" % (self.contig, self.position))
     for pileup_data in samfile.pileup(current_contig_to_analyse, max([0,self.position-3]), self.position+1):
         #print(str(self.position-3)+" "+str(pileup_data.pos)+" "+str(self.position+1))
         if self.position-3 <= pileup_data.pos <= self.position+1:
             #print('in')
             for pileup_read in pileup_data.pileups:
                 if not pileup_read.alignment.qname in self.reads:
                     self.reads[pileup_read.alignment.qname] = {}
                 if ord(pileup_read.alignment.qual[pileup_read.qpos])-33 > minimum_alignment_score:
                         self.reads[pileup_read.alignment.qname][int(pileup_data.pos+1)] = \
                             pileup_read.alignment.seq[pileup_read.qpos] #using biological position, not python.
     samfile.close()
开发者ID:plpla,项目名称:furry-bear,代码行数:27,代码来源:Genomic_environment.py

示例8: tophat_map

def tophat_map(gtf, out_dir, prefix, fastq, thread, bw=False, scale=False,
               gtf_flag=1):
    '''
    1. Map reads with TopHat2
    2. Extract unmapped reads
    3. Create BigWig file if needed
    '''
    # tophat2 mapping
    print('Map reads with TopHat2...')
    tophat_cmd = 'tophat2 -g 1 --microexon-search -m 2 '
    if gtf_flag:
        tophat_cmd += '-G %s ' % gtf
    tophat_cmd += '-p %s -o %s ' % (thread, out_dir + '/tophat')
    tophat_cmd += '%s/bowtie2_index/%s ' % (out_dir, prefix) + ','.join(fastq)
    tophat_cmd += ' 2> %s/tophat.log' % out_dir
    print('TopHat2 mapping command:')
    print(tophat_cmd)
    return_code = os.system(tophat_cmd) >> 8
    if return_code:
        sys.exit('Error: cannot map reads with TopHat2!')
    # extract unmapped reads
    print('Extract unmapped reads...')
    unmapped_bam = pybedtools.BedTool('%s/tophat/unmapped.bam' % out_dir)
    unmapped_bam.bam_to_fastq(fq='%s/tophat/unmapped.fastq' % out_dir)
    # create Bigwig file if needed
    if bw and which('bedGraphToBigWig') is not None:
        print('Create BigWig file...')
        map_bam_fname = '%s/tophat/accepted_hits.bam' % out_dir
        # index bam if not exist
        if not os.path.isfile(map_bam_fname + '.bai'):
            pysam.index(map_bam_fname)
        map_bam = pysam.AlignmentFile(map_bam_fname, 'rb')
        # extract chrom size file
        chrom_size_fname = '%s/tophat/chrom.size' % out_dir
        with open(chrom_size_fname, 'w') as chrom_size_f:
            for seq in map_bam.header['SQ']:
                chrom_size_f.write('%s\t%s\n' % (seq['SN'], seq['LN']))
        if scale:  # scale to HPB
            mapped_reads = map_bam.mapped
            for read in map_bam:
                read_length = read.query_length
                break
            s = 1000000000.0 / mapped_reads / read_length
        else:
            s = 1
        map_bam = pybedtools.BedTool(map_bam_fname)
        bedgraph_fname = '%s/tophat/accepted_hits.bg' % out_dir
        with open(bedgraph_fname, 'w') as bedgraph_f:
            for line in map_bam.genome_coverage(bg=True, g=chrom_size_fname,
                                                scale=s, split=True):
                value = str(int(float(line[3]) + 0.5))
                bedgraph_f.write('\t'.join(line[:3]) + '\t%s\n' % value)
        bigwig_fname = '%s/tophat/accepted_hits.bw' % out_dir
        return_code = os.system('bedGraphToBigWig %s %s %s' %
                                (bedgraph_fname, chrom_size_fname,
                                 bigwig_fname)) >> 8
        if return_code:
            sys.exit('Error: cannot convert bedGraph to BigWig!')
    else:
        print('Could not find bedGraphToBigWig, so skip this step!')
开发者ID:BioXiao,项目名称:CIRCexplorer2,代码行数:60,代码来源:align.py

示例9: splitByStrand

def splitByStrand(bamfile, pe):
    
    bam_prefix = bamfile.split(".bam")[0]
    
    if pe:
        flags = [('-f 0x40 -F 0x10', 'plus'), ('-f 0x40 -F 0x20', 'minus')]
        cmd_args = [['samtools', 'view',
                 '-b', flag[0], bamfile,
                 bam_prefix + "_" + flag[1] + ".bam"]for flag in flags]
    else:
        flags = [('-F 0x10', 'plus'), ('-f 0x10', 'minus')]
        cmd_args = [['samtools', 'view',
                 '-b', flag[0], bamfile,
                 bam_prefix + "_" + flag[1] + ".bam"]for flag in flags]
    
    
    
    for cmd_arg in cmd_args:
        print cmd_arg
        if os.path.exists(cmd_arg[5]): 
            continue
        outfile = open(cmd_arg[5], 'w')
        p = Popen(cmd_arg[:5], stdout=outfile)
        p.wait()
        pysam.index(cmd_arg[5])
    
    # Return split BAM names
    return([cmd_arg[5] for cmd_arg in cmd_args])
开发者ID:hjanime,项目名称:seqAnalysis,代码行数:28,代码来源:bamStrand.py

示例10: indexed_bam

def indexed_bam(bam_filename):
    import pysam
    if not os.path.exists(bam_filename + ".bai"):
        pysam.index(bam_filename)
    sam_reader = pysam.Samfile(bam_filename, "rb")
    yield sam_reader
    sam_reader.close()
开发者ID:NAL-i5K,项目名称:bam_to_bigwig,代码行数:7,代码来源:bam_to_bigwig.py

示例11: convert_sam_to_bam

def convert_sam_to_bam():
    """
    This method should take a newly create .sam file from alignment and
        - convert it to .bam
        - sort .bam
        - index .bam
    """
    ids = generate_ids()
    for id in ids:
        start_time = time()
        print 'converting: %s'%id
        base_path = os.path.join(SAMPLE_DIR, id)
        sam_path = os.path.join(base_path, id+'-bwape.sam')
        bam_path = os.path.join(base_path, id+'-bwape.bam')

        bam_content = pysam.view('-bS', sam_path)
        bam_file = open(bam_path, 'w+')
        bam_file.writelines(bam_content)
        bam_file.close()

        pysam.sort(bam_path, bam_path+'_sorted')
        pysam.index(bam_path+'_sorted.bam')

        # indexing creates file.bam.bam. Move it to file.bam
        bam_call = "mv {0} {1}".format(bam_path+'_sorted.bam', bam_path)
        index_call = "mv {0} {1}".format(bam_path+'_sorted.bam.bai',
                                         bam_path+'.bam.bai')
        subprocess.call(bam_call, shell=True)
        subprocess.call(index_call, shell=True)
        end_time = time()
        print 'completed: %.3fs'%(end_time-start_time)
开发者ID:wflynny,项目名称:miseq-analysis,代码行数:31,代码来源:bam_utilities.py

示例12: sort_bam

def sort_bam(in_bam, sort_fn, to_include=None):
    out_file = "%s-ksort%s" % os.path.splitext(in_bam)
    index_file = "%s.bai" % in_bam
    if not os.path.exists(index_file):
        pysam.index(in_bam)

    orig = pysam.Samfile(in_bam, "rb")
    chroms = [(c["SN"], c) for c in orig.header["SQ"]]
    new_chroms = chroms[:]
    if to_include:
        new_chroms = [(c, x) for (c, x) in new_chroms if c in to_include]
    new_chroms.sort(sort_fn)
    remapper = _id_remapper(chroms, new_chroms)
    new_header = orig.header
    new_header["SQ"] = [h for (_, h) in new_chroms]

    new = pysam.Samfile(out_file, "wb", header=new_header)
    for (chrom, _) in new_chroms:
        for read in orig.fetch(chrom):
            write = True
            read.rname = remapper[read.rname]
            try:
                read.mrnm = remapper[read.mrnm]
            # read pair is on a chromosome we are not using
            except KeyError:
                assert to_include is not None
                write = False
            if write:
                new.write(read)
开发者ID:Galithil,项目名称:bcbio-nextgen,代码行数:29,代码来源:resort_bam_karyotype.py

示例13: extractRegion

def extractRegion(bamfile,start,stop,output,exact):
    pysam.index(bamfile)                # must create a .bai index for any bam file to be read or fetch won't work
    bam = pysam.Samfile(bamfile,'rb')   # and must be done before bamfile is opened
    ref = bam.references[0]             # Get name of reference reads aligned to in bam
    outfile = open(bamfile+".extracted."+output,'w')

    # Get the reads in region of interest
    read_pool = bam.fetch(bam.references[0], start,stop)
    
    # Process reads
    for read in read_pool:
        if exact != '':
            if read.pos <= start and read.aend >= stop:
                if output == 'fastq':
                    outfile.write(writeFastQ(read))
                elif output =='fasta':
                    output.write(writeFastA(read))
        else:
            if output == 'fastq':
                outfile.write(writeFastQ(read))
            elif output == 'fasta':
                outfile.write(writeFastA(read))
        

    outfile.close()
    return
开发者ID:talonsensei,项目名称:Bfx_scripts,代码行数:26,代码来源:bamextract2.py

示例14: extractRegion

def extractRegion(bamfile,start,stop,output):
    pysam.index(bamfile)                # must create a .bai index for any bam file to be read or fetch won't work
    bam = pysam.Samfile(bamfile,'rb')   # and must be done before bamfile is opened
    ref = bam.references[0]             # Get name of reference reads aligned to in bam
    outfile = open(bamfile+".extracted."+output,'w')

    # Get the reads in region of interest
    read_pool = bam.fetch(bam.references[0], start,stop)
    
    # Process reads
    for read in read_pool:
        if read.is_reverse == True:                     # all reverse reads in a bam file have been reverse 
            seq = Seq(read.query)                       # complemented already so they need to be reverse 
            rc = seq.reverse_complement().tostring()    # complemented again, along with the quality scores
            rq = reverseString(read.qqual)              # to write correctly to the fastq
            if output == 'fastq':
                outfile.write("@"+read.qname+"\n"+rc+"\n+\n"+rq+"\n")
            elif output == 'fasta':
                outfile.write('>'+read.qname+'\n'+rc+'\n')
        else:
            if output == 'fastq':
                outfile.write("@"+read.qname+"\n"+read.query+"\n+\n"+read.qqual+"\n")
            elif output == 'fasta':
                outfile.write('>'+read.qname+'\n'+read.query+'\n')

    outfile.close()
    return
开发者ID:talonsensei,项目名称:Bfx_scripts,代码行数:27,代码来源:bamextract.py

示例15: read_directions_count

def read_directions_count(bam_file):
    """
    get the reads directions count from a bam file 

    @args bam_file: binary file formt for storing sequencing reads information
    @type bam_file: str 
    """

    ## indexing the in bam file 
    if not os.path.exists(bam_file + ".bai"):
        pysam.index(bam_file) 

    reverse_cnt = 0 
    forward_cnt = 0

    bam_fh = pysam.Samfile(bam_file, "rb") 

    for read in bam_fh.fetch():
         if read.is_proper_pair and read.is_read1:
            if read.is_reverse:
                reverse_cnt += 1
            else:
                forward_cnt += 1 

    bam_fh.close() 
    return {'forward_reads_count': forward_cnt, 'reverse_reads_count': reverse_cnt} 
开发者ID:kuod,项目名称:genomeutils,代码行数:26,代码来源:star_align_rna.py


注:本文中的pysam.index函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。