當前位置: 首頁>>代碼示例>>Python>>正文


Python pysam.sort方法代碼示例

本文整理匯總了Python中pysam.sort方法的典型用法代碼示例。如果您正苦於以下問題:Python pysam.sort方法的具體用法?Python pysam.sort怎麽用?Python pysam.sort使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在pysam的用法示例。


在下文中一共展示了pysam.sort方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: check_name_sorted

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import sort [as 別名]
def check_name_sorted(bam, p, prefix):
    """
    Sort bam file by name if position sorted
    """
    test_head = pysam.AlignmentFile(bam, 'rb')
    p = str(p)
    try:
        test_head.header['HD']['SO']
    except KeyError:
        print '  sorting bam file'
        pysam.sort('-@', p, '-n', bam, prefix + 'sorted.temp')
        os.remove(bam)
        os.rename(prefix + 'sorted.temp.bam', bam)
    else:
        if test_head.header['HD']['SO'] == 'queryname':
            pass
        else:
            print '  sorting bam file'
            pysam.sort('-@', p, '-n', bam, prefix + 'sorted.temp')
            os.remove(bam)
            os.rename(prefix + 'sorted.temp.bam', bam)
    test_head.close() 
開發者ID:ListerLab,項目名稱:TEPID,代碼行數:24,代碼來源:tepid.py

示例2: sam_to_bam

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import sort [as 別名]
def sam_to_bam(self, sam_path, bam_path_prefix):
        if self._sam_file_is_empty(sam_path) is True:
            # pysam will generate an error if an emtpy SAM file will
            # be converted. Due to this an empty bam file with the
            # same header information will be generated from scratch
            self._generate_empty_bam_file(sam_path, bam_path_prefix)
            # Remove SAM file
            os.remove(sam_path)
            return
        temp_unsorted_bam_path = self._temp_unsorted_bam_path(
            bam_path_prefix)
        # Generate unsorted BAM file
        pysam.samtools.view(
            "-b", "-o{}".format(temp_unsorted_bam_path), sam_path,
            catch_stdout=False)
        # Generate sorted BAM file
        pysam.sort(temp_unsorted_bam_path, "-o", bam_path_prefix + ".bam")
        # Generate index for BAM file
        pysam.index("%s.bam" % bam_path_prefix)
        # Remove unsorted BAM file
        os.remove(temp_unsorted_bam_path)
        # Remove SAM file
        os.remove(sam_path) 
開發者ID:foerstner-lab,項目名稱:READemption,代碼行數:25,代碼來源:sambamconverter.py

示例3: bamSort

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import sort [as 別名]
def bamSort(outputBAM, log, newHeader, verbose):

    tmp = outputBAM + "_tmp"
    if(newHeader != None):
        pyOutputBAM = pysam.AlignmentFile(outputBAM, "rb")
        pyTmp = pysam.AlignmentFile(tmp, "wb", header=newHeader)
        for read in pyOutputBAM:
            pyTmp.write(read)
        pyOutputBAM.close()
        pyTmp.close()
    else:
        os.rename(outputBAM, tmp)

    #run(" ".join(["samtools", "sort", "-@", str(threads) , tmp, replaceExtension(outFile, "")]), log, verbose=verbose, dry=dry)
    run(" ".join(["samtools sort", "-o", outputBAM, tmp]), log, verbose=verbose, dry=False)
    #pysam.sort(tmp, outputBAM)  # @UndefinedVariable
    removeFile(tmp) 
開發者ID:t-neumann,項目名稱:slamdunk,代碼行數:19,代碼來源:filter.py

示例4: sam_to_bam

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import sort [as 別名]
def sam_to_bam(filename):
    """Convert sam to bam"""

    import pysam
    infile = pysam.AlignmentFile(filename, "r")
    name = os.path.splitext(filename)[0]+'.bam'
    bamfile = pysam.AlignmentFile(name, "wb", template=infile)
    for s in infile:
        bamfile.write(s)
    pysam.sort("-o", name, name)
    pysam.index(name)
    bamfile = pysam.AlignmentFile(name, "rb")
    return 
開發者ID:dmnfarrell,項目名稱:smallrnaseq,代碼行數:15,代碼來源:utils.py

示例5: saveReads

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import sort [as 別名]
def saveReads(dataHub, nameExtra=None):
    if dataHub.args.save_reads:
        logging.info("* Saving relevant reads *")
        for i, sample in enumerate(dataHub):
            outbam_path = dataHub.args.save_reads
            if not outbam_path.endswith(".bam"):
                outbam_path += ".bam"

            if len(dataHub.samples) > 1:
                logging.debug("Using i = {}".format(i))
                outbam_path = outbam_path.replace(".bam", ".{}.bam".format(i))

            if nameExtra is not None:
                outbam_path = outbam_path.replace(".bam", ".{}.bam".format(nameExtra))

            logging.info("  Outpath: {}".format(outbam_path))

            # print out just the reads we're interested for use later
            bam_small = pysam.Samfile(outbam_path, "wb", template=sample.bam)
            for read in sample.reads:
                bam_small.write(read)

            for read in sample.readStatistics.reads:
                bam_small.write(read)

            bam_small.close()
            sorted_path = outbam_path.replace(".bam", ".sorted")
            pysam.sort(outbam_path, sorted_path)
            pysam.index(sorted_path+".bam") 
開發者ID:svviz,項目名稱:svviz,代碼行數:31,代碼來源:app.py

示例6: print_read_stack

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import sort [as 別名]
def print_read_stack(reads, refseq=None, cutoff=0, by=None, label=''):
    """Print local read alignments from a sam file against the mapped sequence.
       Args:
        reads: dataframe of read counts with position info
        refseq: sequence segment or gene that reads are mapped to
        cutoff: don't display read with <=cutoff counts
        by: column to sort reads by, e.g. 'reads', 'start' or 'end'
       Returns:
        string of printed read stacks to display or write to a file
    """

    if refseq != None:
        seqlen = len(refseq)
    else:
        seqlen = reads.end.max()
    f = None
    reads = reads[reads.reads>=cutoff]
    if by is not None:
        reads = reads.sort_values(by, ascending=False)

    l = len(reads)
    if l==0: return
    s = ''
    s += '%s unique reads\n' %l
    s += '-------------------------------------\n'
    s += refseq+'\n'
    for idx,r in reads.iterrows():
        seq = r.seq
        count = r.reads
        pos = find_subseq(refseq, seq)
        if pos == -1: continue
        i = len(seq)+pos
        s += "{:>{w}} ({c})\n".format(seq,w=i,c=count)
    s += '\n'
    return s 
開發者ID:dmnfarrell,項目名稱:smallrnaseq,代碼行數:37,代碼來源:utils.py

示例7: featurecounts

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import sort [as 別名]
def featurecounts(samfile, gtffile):
    """Count aligned features with the featureCounts program.
       Returns: a dataframe of counts"""

    params = '-T 5 -t exon -g transcript_id'
    cmd = 'featureCounts %s -a %s -o counts.txt %s' %(params, gtffile, samfile)
    print (cmd)
    result = subprocess.check_output(cmd, shell=True, executable='/bin/bash')
    counts =  pd.read_csv('counts.txt', sep='\t', comment='#')
    counts = counts.rename(columns={samfile:'reads'})
    counts = counts.sort('reads', ascending=False)
    return counts 
開發者ID:dmnfarrell,項目名稱:smallrnaseq,代碼行數:14,代碼來源:utils.py

示例8: preprocess_sam

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import sort [as 別名]
def preprocess_sam(sam_files, datasets, tmp_dir = "talon_tmp/", n_threads = 0):
    """ Copy and rename the provided SAM/BAM file(s), merge them, and index.
        This is necessary in order to use Pybedtools commands on the reads.
        The renaming is necessary in order to label the reads according to
        their dataset."""

    # Create the tmp dir
    os.system("mkdir -p %s " % (tmp_dir))

    # Copy and rename SAM files with dataset names to ensure correct RG tags
    renamed_sams = []
    for sam, dataset in zip(sam_files, datasets):
        suffix = "." + sam.split(".")[-1]
        if suffix == ".sam":
            bam_copy = tmp_dir + dataset + "_unsorted.bam"
            convert_to_bam(sam, bam_copy)
            sam = bam_copy
        sorted_bam = tmp_dir + dataset + ".bam"
        pysam.sort("-@", str(n_threads), "-o", sorted_bam, sam)
        renamed_sams.append(sorted_bam)

    merged_bam = tmp_dir + "merged.bam"
    merge_args = [merged_bam] + renamed_sams + ["-f", "-r", "-@", str(n_threads)]
    # index_args = [merged_bam, "-@", str(n_threads)]

    # Merge datasets and use -r option to include a read group tag
    try:
        pysam.merge(*merge_args)
        pysam.index(merged_bam)
        ts = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
        print("[ %s ] Merged input SAM/BAM files" % (ts))
    except:
        raise RuntimeError(("Problem merging and indexing SAM/BAM files. "
                            "Check your file paths and make sure that all "
                            "files have headers."))
    return merged_bam 
開發者ID:mortazavilab,項目名稱:TALON,代碼行數:38,代碼來源:process_sams.py

示例9: partition_reads

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import sort [as 別名]
def partition_reads(sam_files, datasets, tmp_dir = "talon_tmp/", n_threads = 0):
    """ Use bedtools merge to create non-overlapping intervals from all of the
        transcripts in a series of SAM/BAM files. Then, iterate over the intervals
        to extract all reads inside of them from the pysam object.
       
        Returns:
            - List of lists: sublists contain pysam reads from a given interval
            - List of tuple intervals
            - filename of merged bam file (to keep track of the header)
           """

    merged_bam = preprocess_sam(sam_files, datasets, tmp_dir = tmp_dir, 
                                n_threads = n_threads)

    try:
        all_reads = pybedtools.BedTool(merged_bam).bam_to_bed()
    except Exception as e:
        print(e)
        raise RuntimeError("Problem opening sam file %s" % (merged_bam))

    # Must sort the Bedtool object
    sorted_reads = all_reads.sort()
    intervals = sorted_reads.merge(d = 100000000)

    # Now open each sam file using pysam and extract the reads
    coords = []
    read_groups = []
    with pysam.AlignmentFile(merged_bam) as bam:  # type: pysam.AlignmentFile
        for interval in intervals:
            reads = get_reads_in_interval(bam, interval.chrom,
                                          interval.start, interval.end)
            read_groups.append(reads)
            coords.append((interval.chrom, interval.start + 1, interval.end))

    return read_groups, coords, merged_bam 
開發者ID:mortazavilab,項目名稱:TALON,代碼行數:37,代碼來源:process_sams.py

示例10: create_simple_bam

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import sort [as 別名]
def create_simple_bam(fname, calls):
    """Create a small bam file with RLE encoding coded in the qscores."""
    ref_len = len(simple_data['ref'])

    header = {'HD': {'VN': '1.0'},
              'SQ': [{'LN': ref_len, 'SN': 'ref'}]}

    tmp_file = '{}.tmp'.format(fname)
    with pysam.AlignmentFile(
            tmp_file, 'wb', reference_names=['ref', ],
            reference_lengths=[ref_len, ], header=header) as output:
        for index, basecall in enumerate(calls):
            a =common.initialise_alignment(
                query_name=basecall['query_name'],
                reference_id=0,
                reference_start=0,
                query_sequence=basecall['seq'],
                cigarstring=basecall['cigarstring'],
                flag=basecall['flag'],
                query_qualities=basecall['quality'],
                tags=basecall['tags'])

            output.write(a)

    pysam.sort("-o", fname, tmp_file)
    os.remove(tmp_file)
    pysam.index(fname) 
開發者ID:nanoporetech,項目名稱:medaka,代碼行數:29,代碼來源:mock_data.py

示例11: check_te_overlaps_dels

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import sort [as 別名]
def check_te_overlaps_dels(te, bamfile, te_list, prefix):
    pybedtools.BedTool(bamfile).bam_to_bed().saveas(prefix + 'split.temp')
    convert_split_pairbed(prefix + 'split.temp', prefix + 'split_bedpe.temp')
    split = pybedtools.BedTool(prefix + 'split_bedpe.temp').each(append_origin, word='split').saveas()
    create_deletion_coords(split, prefix + 'second_pass_del_coords.temp')
    dels = pybedtools.BedTool(prefix + 'second_pass_del_coords.temp').sort()
    intersections = dels.intersect(te, wb=True, nonamecheck=True, sorted=True)
    os.remove(prefix + 'second_pass_del_coords.temp')
    reads = []
    for r in intersections:
        if r[-3] in te_list:
            reads.append(r[3])
        else:
            pass
    return reads 
開發者ID:ListerLab,項目名稱:TEPID,代碼行數:17,代碼來源:tepid.py

示例12: refine

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import sort [as 別名]
def refine(options):
    """
    Refine TE insertion and deletion calls within a group of related samples
    Use indel calls from other samples in the group, inspect areas of the genome in samples where
    indel was not called, and look for evidence of the same indel with much lower
    read count threshold
    """
    te = pybedtools.BedTool(options.te).sort()
    names = readNames(options.all_samples)
    if options.insertions is not False:
        insertions = getOtherLines(names, options.insertions)
    if options.deletions is not False:
        deletions = getOtherLines(names, options.deletions)  # format ([data], [inverse_accessions])
    print "Processing "+options.name
    chrom_sizes = check_bam(options.conc, options.proc, options.prefix)
    check_bam(options.split, options.proc, options.prefix, make_new_index=True)
    cov = calc_cov(options.conc, 100000, 120000)
    concordant = pysam.AlignmentFile(options.conc, 'rb')
    split_alignments = pysam.AlignmentFile(options.split, 'rb')
    name_indexed = pysam.IndexedReads(split_alignments)
    name_indexed.build()
    if options.deletions is not False:
        print "  checking deletions"
        process_missed(deletions, "deletion", concordant, split_alignments, name_indexed, options.name, te, cov/5, chrom_sizes, options.prefix)
    else:
        pass
    if options.insertions is not False:
        print "  checking insertions"
        process_missed(insertions, "insertion", concordant, split_alignments, name_indexed, options.name, te, cov/10, chrom_sizes, options.prefix)
    else:
        pass 
開發者ID:ListerLab,項目名稱:TEPID,代碼行數:33,代碼來源:tepid.py

示例13: check_bam

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import sort [as 別名]
def check_bam(bam, p, prefix, make_new_index=False):
    """
    Sort and index bam file
    returns dictionary of chromosome names and lengths
    """
    # check if sorted
    test_head = pysam.AlignmentFile(bam, 'rb')
    chrom_sizes = {}
    p = str(p)
    for i in test_head.header['SQ']:
        chrom_sizes[i['SN']] = int(i['LN'])
    try:
        test_head.header['HD']['SO']
    except KeyError:
        print '  sorting bam file'
        pysam.sort('-@', p, bam, prefix + 'sorted.temp')
        os.remove(bam)
        os.rename(prefix + 'sorted.temp.bam', bam)
    else:
        if test_head.header['HD']['SO'] == 'coordinate':
            pass
        else:
            print '  sorting bam file'
            pysam.sort('-@', p, bam, prefix + 'sorted.temp')
            os.remove(bam)
            os.rename(prefix + 'sorted.temp.bam', bam)
    test_head.close()
    # check if indexed
    if '{}.bai'.format(bam) in os.listdir('.') and make_new_index is False:
        pass
    else:
        print '  indexing bam file'
        pysam.index(bam)
    return chrom_sizes 
開發者ID:ListerLab,項目名稱:TEPID,代碼行數:36,代碼來源:tepid.py

示例14: prepareBED

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import sort [as 別名]
def prepareBED(bed, slamSimBed, minLength):
    utrs = []
    for utr in BedIterator(bed):
        utrs.append(utr)

    utrs.sort(key=lambda x: (x.name, -x.getLength()))

    outBed = open(slamSimBed, "w")

    partList = []
    lastUtr = None
    for utr in utrs:
        if utr.hasStrand() and utr.hasNonEmptyName():
            currentUtr = utr.name
            if currentUtr == lastUtr:
                partList.append(utr)
            else:
                if(not lastUtr is None):
                    printUTR(partList[0], outBed, minLength)
                partList = [utr]
            lastUtr = currentUtr
        else:
            print("Warning: Invalid BED entry found: " + str(utr))

    if(not lastUtr is None):
        printUTR(partList[0], outBed, minLength)

    outBed.close() 
開發者ID:t-neumann,項目名稱:slamdunk,代碼行數:30,代碼來源:simulator.py

示例15: createGenomeWindows

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import sort [as 別名]
def createGenomeWindows(genome, outfile, windows):

    statement = '''
        cgat windows2gff
            --genome=%(genome)s
            --output-format=bed
            --fixed-width-windows=%(windows)s
            --log=%(outfile)s.log |
        bedtools sort -i stdin |
        gzip > %(outfile)s
        '''
    P.run() 
開發者ID:CGATOxford,項目名稱:CGATPipelines,代碼行數:14,代碼來源:PipelineIntervals.py


注:本文中的pysam.sort方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。