当前位置: 首页>>代码示例>>Python>>正文


Python pysam.sort方法代码示例

本文整理汇总了Python中pysam.sort方法的典型用法代码示例。如果您正苦于以下问题:Python pysam.sort方法的具体用法?Python pysam.sort怎么用?Python pysam.sort使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pysam的用法示例。


在下文中一共展示了pysam.sort方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: check_name_sorted

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def check_name_sorted(bam, p, prefix):
    """
    Sort bam file by name if position sorted
    """
    test_head = pysam.AlignmentFile(bam, 'rb')
    p = str(p)
    try:
        test_head.header['HD']['SO']
    except KeyError:
        print '  sorting bam file'
        pysam.sort('-@', p, '-n', bam, prefix + 'sorted.temp')
        os.remove(bam)
        os.rename(prefix + 'sorted.temp.bam', bam)
    else:
        if test_head.header['HD']['SO'] == 'queryname':
            pass
        else:
            print '  sorting bam file'
            pysam.sort('-@', p, '-n', bam, prefix + 'sorted.temp')
            os.remove(bam)
            os.rename(prefix + 'sorted.temp.bam', bam)
    test_head.close() 
开发者ID:ListerLab,项目名称:TEPID,代码行数:24,代码来源:tepid.py

示例2: sam_to_bam

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def sam_to_bam(self, sam_path, bam_path_prefix):
        if self._sam_file_is_empty(sam_path) is True:
            # pysam will generate an error if an emtpy SAM file will
            # be converted. Due to this an empty bam file with the
            # same header information will be generated from scratch
            self._generate_empty_bam_file(sam_path, bam_path_prefix)
            # Remove SAM file
            os.remove(sam_path)
            return
        temp_unsorted_bam_path = self._temp_unsorted_bam_path(
            bam_path_prefix)
        # Generate unsorted BAM file
        pysam.samtools.view(
            "-b", "-o{}".format(temp_unsorted_bam_path), sam_path,
            catch_stdout=False)
        # Generate sorted BAM file
        pysam.sort(temp_unsorted_bam_path, "-o", bam_path_prefix + ".bam")
        # Generate index for BAM file
        pysam.index("%s.bam" % bam_path_prefix)
        # Remove unsorted BAM file
        os.remove(temp_unsorted_bam_path)
        # Remove SAM file
        os.remove(sam_path) 
开发者ID:foerstner-lab,项目名称:READemption,代码行数:25,代码来源:sambamconverter.py

示例3: bamSort

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def bamSort(outputBAM, log, newHeader, verbose):

    tmp = outputBAM + "_tmp"
    if(newHeader != None):
        pyOutputBAM = pysam.AlignmentFile(outputBAM, "rb")
        pyTmp = pysam.AlignmentFile(tmp, "wb", header=newHeader)
        for read in pyOutputBAM:
            pyTmp.write(read)
        pyOutputBAM.close()
        pyTmp.close()
    else:
        os.rename(outputBAM, tmp)

    #run(" ".join(["samtools", "sort", "-@", str(threads) , tmp, replaceExtension(outFile, "")]), log, verbose=verbose, dry=dry)
    run(" ".join(["samtools sort", "-o", outputBAM, tmp]), log, verbose=verbose, dry=False)
    #pysam.sort(tmp, outputBAM)  # @UndefinedVariable
    removeFile(tmp) 
开发者ID:t-neumann,项目名称:slamdunk,代码行数:19,代码来源:filter.py

示例4: sam_to_bam

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def sam_to_bam(filename):
    """Convert sam to bam"""

    import pysam
    infile = pysam.AlignmentFile(filename, "r")
    name = os.path.splitext(filename)[0]+'.bam'
    bamfile = pysam.AlignmentFile(name, "wb", template=infile)
    for s in infile:
        bamfile.write(s)
    pysam.sort("-o", name, name)
    pysam.index(name)
    bamfile = pysam.AlignmentFile(name, "rb")
    return 
开发者ID:dmnfarrell,项目名称:smallrnaseq,代码行数:15,代码来源:utils.py

示例5: saveReads

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def saveReads(dataHub, nameExtra=None):
    if dataHub.args.save_reads:
        logging.info("* Saving relevant reads *")
        for i, sample in enumerate(dataHub):
            outbam_path = dataHub.args.save_reads
            if not outbam_path.endswith(".bam"):
                outbam_path += ".bam"

            if len(dataHub.samples) > 1:
                logging.debug("Using i = {}".format(i))
                outbam_path = outbam_path.replace(".bam", ".{}.bam".format(i))

            if nameExtra is not None:
                outbam_path = outbam_path.replace(".bam", ".{}.bam".format(nameExtra))

            logging.info("  Outpath: {}".format(outbam_path))

            # print out just the reads we're interested for use later
            bam_small = pysam.Samfile(outbam_path, "wb", template=sample.bam)
            for read in sample.reads:
                bam_small.write(read)

            for read in sample.readStatistics.reads:
                bam_small.write(read)

            bam_small.close()
            sorted_path = outbam_path.replace(".bam", ".sorted")
            pysam.sort(outbam_path, sorted_path)
            pysam.index(sorted_path+".bam") 
开发者ID:svviz,项目名称:svviz,代码行数:31,代码来源:app.py

示例6: print_read_stack

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def print_read_stack(reads, refseq=None, cutoff=0, by=None, label=''):
    """Print local read alignments from a sam file against the mapped sequence.
       Args:
        reads: dataframe of read counts with position info
        refseq: sequence segment or gene that reads are mapped to
        cutoff: don't display read with <=cutoff counts
        by: column to sort reads by, e.g. 'reads', 'start' or 'end'
       Returns:
        string of printed read stacks to display or write to a file
    """

    if refseq != None:
        seqlen = len(refseq)
    else:
        seqlen = reads.end.max()
    f = None
    reads = reads[reads.reads>=cutoff]
    if by is not None:
        reads = reads.sort_values(by, ascending=False)

    l = len(reads)
    if l==0: return
    s = ''
    s += '%s unique reads\n' %l
    s += '-------------------------------------\n'
    s += refseq+'\n'
    for idx,r in reads.iterrows():
        seq = r.seq
        count = r.reads
        pos = find_subseq(refseq, seq)
        if pos == -1: continue
        i = len(seq)+pos
        s += "{:>{w}} ({c})\n".format(seq,w=i,c=count)
    s += '\n'
    return s 
开发者ID:dmnfarrell,项目名称:smallrnaseq,代码行数:37,代码来源:utils.py

示例7: featurecounts

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def featurecounts(samfile, gtffile):
    """Count aligned features with the featureCounts program.
       Returns: a dataframe of counts"""

    params = '-T 5 -t exon -g transcript_id'
    cmd = 'featureCounts %s -a %s -o counts.txt %s' %(params, gtffile, samfile)
    print (cmd)
    result = subprocess.check_output(cmd, shell=True, executable='/bin/bash')
    counts =  pd.read_csv('counts.txt', sep='\t', comment='#')
    counts = counts.rename(columns={samfile:'reads'})
    counts = counts.sort('reads', ascending=False)
    return counts 
开发者ID:dmnfarrell,项目名称:smallrnaseq,代码行数:14,代码来源:utils.py

示例8: preprocess_sam

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def preprocess_sam(sam_files, datasets, tmp_dir = "talon_tmp/", n_threads = 0):
    """ Copy and rename the provided SAM/BAM file(s), merge them, and index.
        This is necessary in order to use Pybedtools commands on the reads.
        The renaming is necessary in order to label the reads according to
        their dataset."""

    # Create the tmp dir
    os.system("mkdir -p %s " % (tmp_dir))

    # Copy and rename SAM files with dataset names to ensure correct RG tags
    renamed_sams = []
    for sam, dataset in zip(sam_files, datasets):
        suffix = "." + sam.split(".")[-1]
        if suffix == ".sam":
            bam_copy = tmp_dir + dataset + "_unsorted.bam"
            convert_to_bam(sam, bam_copy)
            sam = bam_copy
        sorted_bam = tmp_dir + dataset + ".bam"
        pysam.sort("-@", str(n_threads), "-o", sorted_bam, sam)
        renamed_sams.append(sorted_bam)

    merged_bam = tmp_dir + "merged.bam"
    merge_args = [merged_bam] + renamed_sams + ["-f", "-r", "-@", str(n_threads)]
    # index_args = [merged_bam, "-@", str(n_threads)]

    # Merge datasets and use -r option to include a read group tag
    try:
        pysam.merge(*merge_args)
        pysam.index(merged_bam)
        ts = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
        print("[ %s ] Merged input SAM/BAM files" % (ts))
    except:
        raise RuntimeError(("Problem merging and indexing SAM/BAM files. "
                            "Check your file paths and make sure that all "
                            "files have headers."))
    return merged_bam 
开发者ID:mortazavilab,项目名称:TALON,代码行数:38,代码来源:process_sams.py

示例9: partition_reads

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def partition_reads(sam_files, datasets, tmp_dir = "talon_tmp/", n_threads = 0):
    """ Use bedtools merge to create non-overlapping intervals from all of the
        transcripts in a series of SAM/BAM files. Then, iterate over the intervals
        to extract all reads inside of them from the pysam object.
       
        Returns:
            - List of lists: sublists contain pysam reads from a given interval
            - List of tuple intervals
            - filename of merged bam file (to keep track of the header)
           """

    merged_bam = preprocess_sam(sam_files, datasets, tmp_dir = tmp_dir, 
                                n_threads = n_threads)

    try:
        all_reads = pybedtools.BedTool(merged_bam).bam_to_bed()
    except Exception as e:
        print(e)
        raise RuntimeError("Problem opening sam file %s" % (merged_bam))

    # Must sort the Bedtool object
    sorted_reads = all_reads.sort()
    intervals = sorted_reads.merge(d = 100000000)

    # Now open each sam file using pysam and extract the reads
    coords = []
    read_groups = []
    with pysam.AlignmentFile(merged_bam) as bam:  # type: pysam.AlignmentFile
        for interval in intervals:
            reads = get_reads_in_interval(bam, interval.chrom,
                                          interval.start, interval.end)
            read_groups.append(reads)
            coords.append((interval.chrom, interval.start + 1, interval.end))

    return read_groups, coords, merged_bam 
开发者ID:mortazavilab,项目名称:TALON,代码行数:37,代码来源:process_sams.py

示例10: create_simple_bam

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def create_simple_bam(fname, calls):
    """Create a small bam file with RLE encoding coded in the qscores."""
    ref_len = len(simple_data['ref'])

    header = {'HD': {'VN': '1.0'},
              'SQ': [{'LN': ref_len, 'SN': 'ref'}]}

    tmp_file = '{}.tmp'.format(fname)
    with pysam.AlignmentFile(
            tmp_file, 'wb', reference_names=['ref', ],
            reference_lengths=[ref_len, ], header=header) as output:
        for index, basecall in enumerate(calls):
            a =common.initialise_alignment(
                query_name=basecall['query_name'],
                reference_id=0,
                reference_start=0,
                query_sequence=basecall['seq'],
                cigarstring=basecall['cigarstring'],
                flag=basecall['flag'],
                query_qualities=basecall['quality'],
                tags=basecall['tags'])

            output.write(a)

    pysam.sort("-o", fname, tmp_file)
    os.remove(tmp_file)
    pysam.index(fname) 
开发者ID:nanoporetech,项目名称:medaka,代码行数:29,代码来源:mock_data.py

示例11: check_te_overlaps_dels

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def check_te_overlaps_dels(te, bamfile, te_list, prefix):
    pybedtools.BedTool(bamfile).bam_to_bed().saveas(prefix + 'split.temp')
    convert_split_pairbed(prefix + 'split.temp', prefix + 'split_bedpe.temp')
    split = pybedtools.BedTool(prefix + 'split_bedpe.temp').each(append_origin, word='split').saveas()
    create_deletion_coords(split, prefix + 'second_pass_del_coords.temp')
    dels = pybedtools.BedTool(prefix + 'second_pass_del_coords.temp').sort()
    intersections = dels.intersect(te, wb=True, nonamecheck=True, sorted=True)
    os.remove(prefix + 'second_pass_del_coords.temp')
    reads = []
    for r in intersections:
        if r[-3] in te_list:
            reads.append(r[3])
        else:
            pass
    return reads 
开发者ID:ListerLab,项目名称:TEPID,代码行数:17,代码来源:tepid.py

示例12: refine

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def refine(options):
    """
    Refine TE insertion and deletion calls within a group of related samples
    Use indel calls from other samples in the group, inspect areas of the genome in samples where
    indel was not called, and look for evidence of the same indel with much lower
    read count threshold
    """
    te = pybedtools.BedTool(options.te).sort()
    names = readNames(options.all_samples)
    if options.insertions is not False:
        insertions = getOtherLines(names, options.insertions)
    if options.deletions is not False:
        deletions = getOtherLines(names, options.deletions)  # format ([data], [inverse_accessions])
    print "Processing "+options.name
    chrom_sizes = check_bam(options.conc, options.proc, options.prefix)
    check_bam(options.split, options.proc, options.prefix, make_new_index=True)
    cov = calc_cov(options.conc, 100000, 120000)
    concordant = pysam.AlignmentFile(options.conc, 'rb')
    split_alignments = pysam.AlignmentFile(options.split, 'rb')
    name_indexed = pysam.IndexedReads(split_alignments)
    name_indexed.build()
    if options.deletions is not False:
        print "  checking deletions"
        process_missed(deletions, "deletion", concordant, split_alignments, name_indexed, options.name, te, cov/5, chrom_sizes, options.prefix)
    else:
        pass
    if options.insertions is not False:
        print "  checking insertions"
        process_missed(insertions, "insertion", concordant, split_alignments, name_indexed, options.name, te, cov/10, chrom_sizes, options.prefix)
    else:
        pass 
开发者ID:ListerLab,项目名称:TEPID,代码行数:33,代码来源:tepid.py

示例13: check_bam

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def check_bam(bam, p, prefix, make_new_index=False):
    """
    Sort and index bam file
    returns dictionary of chromosome names and lengths
    """
    # check if sorted
    test_head = pysam.AlignmentFile(bam, 'rb')
    chrom_sizes = {}
    p = str(p)
    for i in test_head.header['SQ']:
        chrom_sizes[i['SN']] = int(i['LN'])
    try:
        test_head.header['HD']['SO']
    except KeyError:
        print '  sorting bam file'
        pysam.sort('-@', p, bam, prefix + 'sorted.temp')
        os.remove(bam)
        os.rename(prefix + 'sorted.temp.bam', bam)
    else:
        if test_head.header['HD']['SO'] == 'coordinate':
            pass
        else:
            print '  sorting bam file'
            pysam.sort('-@', p, bam, prefix + 'sorted.temp')
            os.remove(bam)
            os.rename(prefix + 'sorted.temp.bam', bam)
    test_head.close()
    # check if indexed
    if '{}.bai'.format(bam) in os.listdir('.') and make_new_index is False:
        pass
    else:
        print '  indexing bam file'
        pysam.index(bam)
    return chrom_sizes 
开发者ID:ListerLab,项目名称:TEPID,代码行数:36,代码来源:tepid.py

示例14: prepareBED

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def prepareBED(bed, slamSimBed, minLength):
    utrs = []
    for utr in BedIterator(bed):
        utrs.append(utr)

    utrs.sort(key=lambda x: (x.name, -x.getLength()))

    outBed = open(slamSimBed, "w")

    partList = []
    lastUtr = None
    for utr in utrs:
        if utr.hasStrand() and utr.hasNonEmptyName():
            currentUtr = utr.name
            if currentUtr == lastUtr:
                partList.append(utr)
            else:
                if(not lastUtr is None):
                    printUTR(partList[0], outBed, minLength)
                partList = [utr]
            lastUtr = currentUtr
        else:
            print("Warning: Invalid BED entry found: " + str(utr))

    if(not lastUtr is None):
        printUTR(partList[0], outBed, minLength)

    outBed.close() 
开发者ID:t-neumann,项目名称:slamdunk,代码行数:30,代码来源:simulator.py

示例15: createGenomeWindows

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def createGenomeWindows(genome, outfile, windows):

    statement = '''
        cgat windows2gff
            --genome=%(genome)s
            --output-format=bed
            --fixed-width-windows=%(windows)s
            --log=%(outfile)s.log |
        bedtools sort -i stdin |
        gzip > %(outfile)s
        '''
    P.run() 
开发者ID:CGATOxford,项目名称:CGATPipelines,代码行数:14,代码来源:PipelineIntervals.py


注:本文中的pysam.sort方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。