当前位置: 首页>>代码示例>>Python>>正文


Python pysam.sort函数代码示例

本文整理汇总了Python中pysam.sort函数的典型用法代码示例。如果您正苦于以下问题:Python sort函数的具体用法?Python sort怎么用?Python sort使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了sort函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: map_paired_reads

def map_paired_reads(pe1_path, pe2_path, genome_path, output_path):
    work_dir = tempfile.mkdtemp()
    genome_db = os.path.join(work_dir, "genome")
    pe1_output = os.path.join(work_dir, "pe1.sai")
    pe2_output = os.path.join(work_dir, "pe2.sai")
    bwa_output = os.path.join(work_dir, "output.sam")

    null = open("/dev/null")
    subprocess.check_call([ "bwa", "index", "-p", genome_db, genome_path ], stderr=null)
    with open(pe1_output, "w") as pe1_file:
        subprocess.check_call([ "bwa", "aln", genome_db, pe1_path ], stdout=pe1_file, stderr=null)

    with open(pe2_output, "w") as pe2_file:
        subprocess.check_call([ "bwa", "aln", genome_db, pe2_path ], stdout=pe2_file, stderr=null)

    with open(bwa_output, "w") as bwa_file:
        subprocess.check_call([ "bwa", "sampe",
                                "-r", "@RG\tID:ILLUMINA\tSM:48_2\tPL:ILLUMINA\tLB:LIB1",
                                genome_db,
                                pe1_output, pe2_output,
                                pe1_path, pe2_path ], stdout=bwa_file, stderr=null)

    sam_to_bam(bwa_output, bwa_output + ".bam")
    pysam.sort(bwa_output + ".bam", output_path)
    pysam.index(output_path + '.bam')
开发者ID:tnoorasyikin,项目名称:BESST,代码行数:25,代码来源:reads_to_ctg_map.py

示例2: convertSortAlign

def convertSortAlign(output_filename):
    # Pregenerate file names for all the intermediate steps (output_filename is the output of the Bowtie2 alignment)
    # Note that the file extension is not always given depending on the input conventions of the tool being called
    sam_filename=output_filename+'.sam'
    bam_filename=output_filename+'.bam'
    sorted_filename_input=output_filename+'_sorted'
    sorted_filename_output=output_filename+'_sorted.bam'
    
    # convert sam to bam
    print 'Converting {0} to {1} . . .'.format(sam_filename,bam_filename)
    try:
        SamtoBam(sam_filename,bam_filename)
    except Exception as ex:
        print "Error converting sam to bam ({0}): {1}".format(ex.errno, ex.strerror)
        return False   
    
    # sort
    print 'Sorting {0} -> {1}'.format(bam_filename,sorted_filename_output)
    try:
        pysam.sort(bam_filename,sorted_filename_input)
    except Exception as ex:
        print "Error sorting bam file ({0}): {1}".format(ex.errno, ex.strerror)
        return False   
    
    # index
    print 'Indexing {0} . . .'.format(sorted_filename_output)
    try:
        pysam.index(sorted_filename_output)
    except Exception as ex:
        print "Error indexing bam file ({0}): {1}".format(ex.errno, ex.strerror)
        return False   
    
    print
    print 'Done'
    return True
开发者ID:phageghost,项目名称:align-sanger,代码行数:35,代码来源:alignsanger.py

示例3: map_paired_reads

def map_paired_reads(pe1_path, pe2_path, genome_path, output_path, args):
    work_dir = tempfile.mkdtemp( )
    genome_db = os.path.join( work_dir, "genome" )
    pe1_output = os.path.join( work_dir, "pe1.sai" )
    pe2_output = os.path.join( work_dir, "pe2.sai" )
    bwa_output = os.path.join( work_dir, "output.sam" )
    
    null = open( "/dev/null" ) #open("/tmp/bwa_out")#
    subprocess.check_call( [ "bwa", "index", "-p", genome_db, genome_path ], stderr = null )
    with open( pe1_output, "w" ) as pe1_file:
        subprocess.check_call( [ "bwa", "aln", genome_db, pe1_path ], stdout = pe1_file, stderr = null )
    
    with open( pe2_output, "w" ) as pe2_file:
        subprocess.check_call( [ "bwa", "aln", genome_db, pe2_path ], stdout = pe2_file, stderr = null )
    
    with open( bwa_output, "w" ) as bwa_file:
        subprocess.check_call( [ "bwa", "sampe",
                                "-r", "@RG\tID:ILLUMINA\tSM:48_2\tPL:ILLUMINA\tLB:LIB1",
                                genome_db,
                                pe1_output, pe2_output,
                                pe1_path, pe2_path ], stdout = bwa_file, stderr = null )
 


    if args.sam:
        shutil.move(bwa_output ,output_path+'.sam')
        #os.rename(bwa_output ,output_path+'.sam')
    else:
        sam_to_bam( bwa_output, bwa_output + ".bam" )
        if args.sort:
            # coordinate sort the file
            pysam.sort( bwa_output + ".bam", output_path )
            pysam.index(output_path+'.bam')
        else:
            shutil.move(bwa_output +".bam",output_path+'.bam')
开发者ID:ksahlin,项目名称:generate_assembly,代码行数:35,代码来源:align.py

示例4: sort_by_position

def sort_by_position(bam_file, dir):

    ## get the file prefix
    prefix = ""
    prefix_match = re.match(r"(.*).bam", bam_file)

    try:
        prefix = prefix_match.group(1)
    except:
        print "Existing: Invalid bam file -i %s" %(bam_file)
        sys.exit(2)
        

    # sort the bam file
    bam_input = dir + bam_file
    sort_bam = dir +  prefix + "_sorted"
    pysam.sort(bam_input, sort_bam)
    sort_bam = sort_bam + ".bam"
    
    # index the sort bam file
    pysam.index(sort_bam)

    print ""
    print "Writing Sorted Bam File : %s" %(sort_bam)
    print "Writing Index Sorted Bam File : %s.bai" %(sort_bam)
    
    return sort_bam
开发者ID:chelseaju,项目名称:Pseudogene,代码行数:27,代码来源:bamfile_sorter.py

示例5: run

    def run(self):
        AbstractAnalysis.run(self) #Call base method to do some logging
        localBamFile = os.path.join(self.getLocalTempDir(), "mapping.bam")
        localSortedBamFile = os.path.join(self.getLocalTempDir(), "mapping.sorted")

        samToBamFile(self.samFile, localBamFile)
        pysam.sort(localBamFile, localSortedBamFile)
        pysam.index(localSortedBamFile + ".bam")
        pysam.faidx(self.referenceFastaFile)
        
        file_header = self.readFastqFile.split(".fastq")[0].split("/")[-1] +  "_" + self.referenceFastaFile.split(".fa")[0].split("/")[-1]
        consensus_vcf = os.path.join(self.outputDir, file_header + "_Consensus.vcf")
        consensus_fastq = os.path.join(self.outputDir, file_header + "_Consensus.fastq")

        system("samtools mpileup -Q 0 -uf %s %s | bcftools view -cg - > %s" \
                % (self.referenceFastaFile, localSortedBamFile + ".bam", consensus_vcf))
        system("vcfutils.pl vcf2fq %s > %s" % (consensus_vcf, consensus_fastq))
        system("rm -rf %s" % (self.referenceFastaFile + ".fai"))
        
        formatted_consensus_fastq = os.path.join(self.getLocalTempDir(), "Consensus.fastq")
        
        formatConsensusFastq(consensus_fastq, formatted_consensus_fastq)
        system("mv %s %s" % (formatted_consensus_fastq, consensus_fastq))
        
        self.finish()
开发者ID:mitenjain,项目名称:nanopore,代码行数:25,代码来源:consensus.py

示例6: check_bam

def check_bam(bam, p, make_new_index=False):
    """
    Sort and index bam file
    returns dictionary of chromosome names and lengths
    """
    # check if sorted
    test_head = pysam.AlignmentFile(bam, 'rb')
    chrom_sizes = {}
    p = str(p)
    for i in test_head.header['SQ']:
        chrom_sizes[i['SN']] = int(i['LN'])
    try:
        test_head.header['HD']['SO']
    except KeyError:
        print '  sorting bam file'
        pysam.sort('[email protected]', p, bam, 'sorted.temp')
        os.remove(bam)
        os.rename('sorted.temp.bam', bam)
    else:
        if test_head.header['HD']['SO'] == 'coordinate':
            pass
        else:
            print '  sorting bam file'
            pysam.sort('[email protected]', p, bam, 'sorted.temp')
            os.remove(bam)
            os.rename('sorted.temp.bam', bam)
    test_head.close()
    # check if indexed
    if '{}.bai'.format(bam) in os.listdir('.') and make_new_index is False:
        pass
    else:
        print '  indexing bam file'
        pysam.index(bam)
    return chrom_sizes
开发者ID:timoast,项目名称:TEpy,代码行数:34,代码来源:tepy.py

示例7: main

def main(infile, snp_dir, max_window=MAX_WINDOW_DEFAULT,
         is_paired_end=False, is_sorted=False):
    name_split = infile.split(".")
    
    if len(name_split) > 1:
        pref = ".".join(name_split[:-1])
    else:
        pref = name_split[0]
    
    if not is_sorted:
        pysam.sort(infile, pref + ".sort")
        infile = pref + ".sort"
        sort_file_name = pref + ".sort.bam"
    else:
        sort_file_name = infile

    keep_file_name = pref + ".keep.bam"
    remap_name = pref + ".to.remap.bam"
    remap_num_name = pref + ".to.remap.num.gz"

    if is_paired_end:
        fastq_names = [pref + ".remap.fq1.gz",
                       pref + ".remap.fq2.gz"]
    else:
        fastq_names = [pref + ".remap.fq.gz"]

    bam_data = BamScanner(is_paired_end, max_window, 
                          sort_file_name, keep_file_name, remap_name, 
                          remap_num_name, fastq_names, snp_dir)
    bam_data.run()
开发者ID:hobrien,项目名称:WASP,代码行数:30,代码来源:find_intersecting_snps.py

示例8: disciple

def disciple(bam_fname, bam_hdr, rg_id, long_qname_table, cigar_v2, in_queue):
  """Create a BAM file from the FASTQ lines fed to it via in_queue

  :param bam_fname:
  :param bam_hdr:
  :param rg_id:
  :param long_qname_table:
  :param cigar_v2:
  :param in_queue:
  :return:
  """
  logger.debug('Writing to {} ...'.format(bam_fname))
  t0 = time.time()
  fp = pysam.AlignmentFile(bam_fname, 'wb', header=bam_hdr)
  ref_dict = {k['SN']: n for n, k in enumerate(bam_hdr['SQ'])}
  cnt = 0
  for cnt, (qname, read_data) in enumerate(iter(in_queue.get, __process_stop_code__)):
    write_perfect_reads(qname, rg_id, long_qname_table, ref_dict, read_data, cigar_v2, fp)
  fp.close()
  t1 = time.time()
  logger.debug('... {}: {} reads in {:0.2f}s ({:0.2f} t/s)'.format(bam_fname, cnt, t1 - t0, cnt/(t1 - t0)))

  logger.debug('Sorting {} -> {}'.format(bam_fname, bam_fname + '.sorted'))
  t0 = time.time()
  pysam.sort('-m', '1G', '-o', bam_fname + '.sorted', bam_fname)
  os.remove(bam_fname)
  t1 = time.time()
  logger.debug('... {:0.2f}s'.format(t1 - t0))

  logger.debug('Shutting down thread for {}'.format(bam_fname))
开发者ID:sbg,项目名称:Mitty,代码行数:30,代码来源:god_aligner.py

示例9: miraligner

def miraligner(args):
    """
    Realign BAM hits to miRBAse to get better accuracy and annotation
    """
    hairpin, mirna = _download_mirbase(args)
    precursors = _read_precursor(args.hairpin, args.sps)
    matures = _read_mature(args.mirna, args.sps)
    gtf = _read_gtf(args.gtf)
    out_dts = []
    for bam_fn in args.files:
        sample = op.splitext(op.basename(bam_fn))[0]
        if bam_fn.endswith("bam") or bam_fn.endswith("sam"):
            logger.info("Reading %s" % bam_fn)
            bam_fn = _sam_to_bam(bam_fn)
            bam_sort_by_n = op.splitext(bam_fn)[0] + "_sort"
            pysam.sort("-n", bam_fn, bam_sort_by_n)
            reads = _read_bam(bam_sort_by_n + ".bam", precursors)
        elif bam_fn.endswith("fasta") or bam_fn.endswith("fa") or bam_fn.endswith("fastq"):
            out_file = op.join(args.out, sample + ".premirna")
            bam_fn = _filter_seqs(bam_fn)
            if args.miraligner:
                _cmd_miraligner(bam_fn, out_file, args.sps, args.hairpin)
                reads = _read_miraligner(out_file)
            else:
                if bam_fn.endswith("fastq"):
                    bam_fn = _convert_to_fasta(bam_fn)
                logger.info("Aligning %s" % bam_fn)
                if not file_exists(out_file):
                    pyMatch.Miraligner(hairpin, bam_fn, out_file, 1, 4)
                reads = _read_pyMatch(out_file, precursors)
        else:
            raise ValueError("Format not recognized.")

        if not args.miraligner:
            reads = _annotate(reads, matures, precursors)
        out_file = op.join(args.out, sample + ".mirna")
        out_file, dt, dt_pre= _tab_output(reads, out_file, sample)
        try:
            vcf_file = op.join(args.out, sample + ".vcf")
            if not file_exists(vcf_file):
            # if True:
                create_vcf(dt_pre, matures, gtf, vcf_file)
            try:
                import vcf
                vcf.Reader(filename=vcf_file)
            except Exception as e:
                logger.warning(e.__doc__)
                logger.warning(e.message)
        except Exception as e:
            # traceback.print_exc()
            logger.warning(e.__doc__)
            logger.warning(e.message)
        if isinstance(dt, pd.DataFrame):
            out_dts.append(dt)

    if out_dts:
        _create_counts(out_dts, args.out)
        # _summarize(out_dts)
    else:
        print "No files analyzed!"
开发者ID:JackieMium,项目名称:seqcluster,代码行数:60,代码来源:__init__.py

示例10: main

def main():
    # Read options, args.
    parser = optparse.OptionParser()
    (options, args) = parser.parse_args()
    input_fname, output_fname = args
    slots = os.getenv('GALAXY_SLOTS', 1)
    pysam.sort("[email protected]%s" % slots, '-o', output_fname, '-O', 'bam', '-T', '.', input_fname)
开发者ID:ImmPortDB,项目名称:immport-galaxy,代码行数:7,代码来源:cram_to_bam.py

示例11: run_cufflinks

def run_cufflinks(org_db, num_cpus=4):
    """
    run cufflinks program on mapped reads 
    """

    try:
        subprocess.call(["cufflinks"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    except:
        exit("Please make sure that the `Cufflinks` binary is in your $PATH")
    
    org_name = org_db['short_name'] 
    print("preparing for cufflinks run for organism %s" % org_name)

    min_intron_length = 20
    min_isoform_frac = 0.25
    max_intron_length = org_db['max_intron_len']
    result_dir = org_db['read_assembly_dir']

    bam_file = "%s/%s_Aligned_mmr_sortbyCoord.bam" % (org_db['read_map_dir'], org_name)
    if not os.path.isfile(bam_file):
        sys.stdout.write("failed to fetch sorted mmr BAM file for organism: %s, trying to get the mmr file...\n" % org_name)
        bam_file = "%s/%s_Aligned_mmr.bam" % (org_db['read_map_dir'], org_name)
        if not os.path.isfile(bam_file):
            exit("error: failed to fetch mmr BAM file for organism %s" % org_name)
        
        ## sorting, indexing the bam file 
        file_prefix, ext = os.path.splitext(bam_file)
        sorted_bam = "%s_sortbyCoord" % file_prefix

        sys.stdout.write("trying to sort based by the coordinates with output prefix as: %s\n" % sorted_bam)
        if not os.path.isfile("%s.bam" % sorted_bam):
            pysam.sort(bam_file, sorted_bam)
            
        bam_file = "%s.bam" % sorted_bam

    print('using bam file from %s' % bam_file)
    if not os.path.exists(bam_file + ".bai"):
        pysam.index(bam_file) 

    ## always use quiet mode to avoid problems with storing log output.
    cli_cuff = "cufflinks -q --no-update-check \
        -F %.2f \
        -I %d \
        --min-intron-length %d \
        --library-type fr-unstranded \
        -p %d \
        -o %s \
        %s" % (min_isoform_frac, max_intron_length, min_intron_length, num_cpus, result_dir, bam_file)
  
    sys.stdout.write('\trun cufflinks as: %s \n' % cli_cuff)
    try:
        os.chdir(result_dir)
        process = subprocess.Popen(cli_cuff, shell=True) 
        returncode = process.wait()

        if returncode !=0:
            raise Exception, "Exit status return code = %i" % returncode

    except Exception, e:
        print 'Error running cufflinks.\n%s' %  str( e )
开发者ID:vipints,项目名称:genomeutils,代码行数:60,代码来源:transcript_assembly.py

示例12: align_to_bam_file

    def align_to_bam_file(self, reference_fasta_path, query_fasta_path, output_bam_path, multiple=False, assert_record=None):

        logging.debug('LastzRunner: running on reference %s and query %s' %
                     (reference_fasta_path, query_fasta_path))
        output_sam_path = os.path.abspath(
            os.path.expandvars(output_bam_path.replace('.bam', '.sam')))
        output_bam_unsorted_path = os.path.abspath(
            os.path.expandvars(output_bam_path + '.unsorted'))

        logging.debug(
            'LastzRunner: aligning with output in temporary sam file %s' %
            output_sam_path)
        with open(output_sam_path, 'w') as output_sam_handler:
            for line in self._align(reference_fasta_path, query_fasta_path, multiple):
                output_sam_handler.write(line)

        logging.debug(
            'LastzRunner: transforming sam into unsorted bam file %s' %
            output_bam_unsorted_path)
        input_sam_handler = pysam.Samfile(output_sam_path, "r")
        output_bam_file = pysam.Samfile(
            output_bam_unsorted_path, "wb", template=input_sam_handler)

        logging.debug(
            'LastzRunner: copying from sam file to bam file')
        for s in input_sam_handler:
            output_bam_file.write(s)
        output_bam_file.close()

        logging.debug('LastzRunner: sorting and indexing bam file %s' %
                      output_bam_path)
        pysam.sort(output_bam_unsorted_path,
                   output_bam_path.replace('.bam', ''))

        pysam.index(output_bam_path)
开发者ID:achenge07,项目名称:virana,代码行数:35,代码来源:vhom.py

示例13: makeAggregate

def makeAggregate(cells, directory, suffix, output):
    """
    Create aggregate sample.

    Make an aggregate bam file from a list of cells, sorts and indexes
    the file for easy use in IGV. Suffix is required to prevent non
    0-padded numbers matching the wrong files. Return final file name.

    Parameters
    ----------
    cells : list
        List of cell names to create aggregate from.
    directory : string
        Directory path with the bam files from each cell.
    suffix : string
        String to match the end of the bam file, use to add file extension
        and to anchor the extension after file numbers - this will prevent
        cell_4 matching cell_4*.
    output : string
        String containing output file location.
    """
    from glob import glob
    cells = set(cells)
    fileList = []
    for cell in cells:
        fileList.append(glob(os.path.join(directory, "*" + cell + suffix))[0])
    pysam.cat("-o", output + ".bam", *fileList, catch_stdout=False)
    pysam.sort(output + ".bam", output + ".sorted", catch_stdout=False)
    pysam.index(output + ".sorted.bam", catch_stdout=False)

    return output + ".sorted.bam"
开发者ID:mfiers,项目名称:rat,代码行数:31,代码来源:scatac.py

示例14: saveReads

def saveReads(dataHub, nameExtra=None):
    if dataHub.args.save_reads:
        logging.info("* Saving relevant reads *")
        for i, sample in enumerate(dataHub):
            outbam_path = dataHub.args.save_reads
            if not outbam_path.endswith(".bam"):
                outbam_path += ".bam"

            if len(dataHub.samples) > 1:
                logging.debug("Using i = {}".format(i))
                outbam_path = outbam_path.replace(".bam", ".{}.bam".format(i))

            if nameExtra is not None:
                outbam_path = outbam_path.replace(".bam", ".{}.bam".format(nameExtra))

            logging.info("  Outpath: {}".format(outbam_path))

            # print out just the reads we're interested for use later
            bam_small = pysam.Samfile(outbam_path, "wb", template=sample.bam)
            for read in sample.reads:
                bam_small.write(read)

            for read in sample.readStatistics.reads:
                bam_small.write(read)

            bam_small.close()
            sorted_path = outbam_path.replace(".bam", ".sorted")
            pysam.sort(outbam_path, sorted_path)
            pysam.index(sorted_path+".bam")
开发者ID:MMesbahU,项目名称:svviz,代码行数:29,代码来源:app.py

示例15: bwa_sampe

def bwa_sampe(pe1_path, pe2_path, genome_path, output_path):
    print 'Aligning with bwa aln/sampe'
    start = time()
    work_dir = tempfile.mkdtemp()
    genome_db = os.path.join(work_dir, "genome")
    pe1_output = os.path.join(work_dir, "pe1.sai")
    pe2_output = os.path.join(work_dir, "pe2.sai")
    bwa_output = os.path.join(work_dir, "output.sam")

    null = open("/dev/null")
    subprocess.check_call([ "bwa", "index", "-p", genome_db, genome_path ], stderr=null)
    with open(pe1_output, "w") as pe1_file:
        subprocess.check_call([ "bwa", "aln", genome_db, pe1_path ], stdout=pe1_file, stderr=null)

    with open(pe2_output, "w") as pe2_file:
        subprocess.check_call([ "bwa", "aln", genome_db, pe2_path ], stdout=pe2_file, stderr=null)

    with open(bwa_output, "w") as bwa_file:
        subprocess.check_call([ "bwa", "sampe",
                                genome_db,
                                pe1_output, pe2_output,
                                pe1_path, pe2_path ], stdout=bwa_file, stderr=null)

    elapsed = time() - start
    print 'Time elapsed for bwa aln/sampe: ', elapsed

    sam_to_bam(bwa_output, bwa_output + ".bam")
    pysam.sort(bwa_output + ".bam", output_path)
    pysam.index(output_path + '.bam')
开发者ID:jvhaarst,项目名称:BESST,代码行数:29,代码来源:reads_to_ctg_map.py


注:本文中的pysam.sort函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。