當前位置: 首頁>>代碼示例>>Python>>正文


Python BamTools.getNumReads方法代碼示例

本文整理匯總了Python中CGAT.BamTools.getNumReads方法的典型用法代碼示例。如果您正苦於以下問題:Python BamTools.getNumReads方法的具體用法?Python BamTools.getNumReads怎麽用?Python BamTools.getNumReads使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在CGAT.BamTools的用法示例。


在下文中一共展示了BamTools.getNumReads方法的8個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: buildPicardGCStats

# 需要導入模塊: from CGAT import BamTools [as 別名]
# 或者: from CGAT.BamTools import getNumReads [as 別名]
def buildPicardGCStats(infile, outfile, genome_file):
    """picard:CollectGCBiasMetrics
    Collect GC bias metrics.
    Arguments
    ---------
    infile : string
        Input filename in :term:`BAM` format.
    outfile : string
        Output filename with picard output.
    genome_file : string
        Filename with genomic sequence.
    """

    job_memory = PICARD_MEMORY
    picard_opts = '-Xmx%(job_memory)s -XX:+UseParNewGC -XX:+UseConcMarkSweepGC' % locals()
    job_threads = 3

    if BamTools.getNumReads(infile) == 0:
        E.warn("no reads in %s - no metrics" % infile)
        P.touch(outfile)
        return

    statement = '''picard %(picard_opts)s CollectGcBiasMetrics
    INPUT=%(infile)s
    REFERENCE_SEQUENCE=%(genome_file)s
    OUTPUT=%(outfile)s
    VALIDATION_STRINGENCY=SILENT
    CHART_OUTPUT=%(outfile)s.pdf
    SUMMARY_OUTPUT=%(outfile)s.summary
    >& %(outfile)s'''

    P.run()
開發者ID:CGATOxford,項目名稱:CGATPipelines,代碼行數:34,代碼來源:PipelineBamStats.py

示例2: buildPicardDuplicateStats

# 需要導入模塊: from CGAT import BamTools [as 別名]
# 或者: from CGAT.BamTools import getNumReads [as 別名]
def buildPicardDuplicateStats(infile, outfile):
    '''run picard:MarkDuplicates
    Record duplicate metrics using Picard and keep the dedupped .bam
    file.
    Pair duplication is properly handled, including inter-chromosomal
    cases. SE data is also handled.  These stats also contain a
    histogram that estimates the return from additional sequecing.  No
    marked bam files are retained (/dev/null...)  Note that picards
    counts reads but they are in fact alignments.
    Arguments
    ---------
    infile : string
        Input filename in :term:`BAM` format.
    outfile : string
        Output filename with picard output.
    '''
    job_memory = PICARD_MEMORY
    picard_opts = '-Xmx%(job_memory)s -XX:+UseParNewGC -XX:+UseConcMarkSweepGC' % locals()
    job_threads = 3

    if BamTools.getNumReads(infile) == 0:
        E.warn("no reads in %s - no metrics" % infile)
        P.touch(outfile)
        return

    statement = '''picard %(picard_opts)s MarkDuplicates
    INPUT=%(infile)s
    ASSUME_SORTED=true
    METRICS_FILE=%(outfile)s.duplicate_metrics
    OUTPUT=%(outfile)s
    VALIDATION_STRINGENCY=SILENT;
    '''
    statement += '''samtools index %(outfile)s ;'''
    P.run()
開發者ID:CGATOxford,項目名稱:CGATPipelines,代碼行數:36,代碼來源:PipelineBamStats.py

示例3: buildPicardCoverageStats

# 需要導入模塊: from CGAT import BamTools [as 別名]
# 或者: from CGAT.BamTools import getNumReads [as 別名]
def buildPicardCoverageStats(infile, outfile, baits, regions):
    '''run picard:CollectHsMetrics
    Generate coverage statistics for regions of interest from a bed
    file using Picard.
    Arguments
    ---------
    infile : string
        Input filename in :term:`BAM` format.
    outfile : string
        Output filename with picard output.
    baits : :term:`bed` formatted file of bait regions
    regions : :term:`bed` formatted file of target regions
    '''

    job_memory = PICARD_MEMORY
    picard_opts = '-Xmx%(job_memory)s -XX:+UseParNewGC -XX:+UseConcMarkSweepGC' % locals()
    job_threads = 3

    if BamTools.getNumReads(infile) == 0:
        E.warn("no reads in %s - no metrics" % infile)
        P.touch(outfile)
        return

    statement = '''picard %(picard_opts)s CollectHsMetrics
    BAIT_INTERVALS=%(baits)s
    TARGET_INTERVALS=%(regions)s
    INPUT=%(infile)s
    OUTPUT=%(outfile)s
    VALIDATION_STRINGENCY=LENIENT''' % locals()
    P.run()
開發者ID:CGATOxford,項目名稱:CGATPipelines,代碼行數:32,代碼來源:PipelineBamStats.py

示例4: buildPicardAlignmentStats

# 需要導入模塊: from CGAT import BamTools [as 別名]
# 或者: from CGAT.BamTools import getNumReads [as 別名]
def buildPicardAlignmentStats(infile, outfile, genome_file):
    '''run picard:CollectMultipleMetrics
    Arguments
    ---------
    infile : string
        Input filename in :term:`BAM` format.
    outfile : string
        Output filename with picard output.
    genome_file : string
        Filename with genomic sequence.
    '''

    job_memory = PICARD_MEMORY
    picard_opts = '-Xmx%(job_memory)s -XX:+UseParNewGC -XX:+UseConcMarkSweepGC' % locals()
    job_threads = 3

    if BamTools.getNumReads(infile) == 0:
        E.warn("no reads in %s - no metrics" % infile)
        P.touch(outfile)
        return

    statement = '''picard %(picard_opts)s CollectMultipleMetrics
    INPUT=%(infile)s
    REFERENCE_SEQUENCE=%(genome_file)s
    ASSUME_SORTED=true
    OUTPUT=%(outfile)s
    VALIDATION_STRINGENCY=SILENT
    >& %(outfile)s'''

    P.run()
開發者ID:CGATOxford,項目名稱:CGATPipelines,代碼行數:32,代碼來源:PipelineBamStats.py

示例5: buildPicardRnaSeqMetrics

# 需要導入模塊: from CGAT import BamTools [as 別名]
# 或者: from CGAT.BamTools import getNumReads [as 別名]
def buildPicardRnaSeqMetrics(infiles, strand, outfile):
    '''run picard:RNASeqMetrics



    Arguments
    ---------
    infiles : string
        Input filename in :term:`BAM` format.
        Genome file in refflat format
            (http://genome.ucsc.edu/goldenPath/gbdDescriptionsOld.html#RefFlat)
    outfile : string
        Output filename with picard output.

    '''
    job_memory = PICARD_MEMORY
    picard_opts = '-Xmx%(job_memory)s -XX:+UseParNewGC -XX:+UseConcMarkSweepGC' % locals()
    job_threads = 3
    infile, genome = infiles

    if BamTools.getNumReads(infile) == 0:
        E.warn("no reads in %s - no metrics" % infile)
        P.touch(outfile)
        return

    statement = '''picard %(picard_opts)s CollectRnaSeqMetrics
    REF_FLAT=%(genome)s
    INPUT=%(infile)s
    ASSUME_SORTED=true
    OUTPUT=%(outfile)s
    STRAND=%(strand)s
    VALIDATION_STRINGENCY=SILENT
    '''
    P.run()
開發者ID:CGATOxford,項目名稱:CGATPipelines,代碼行數:36,代碼來源:PipelineBamStats.py

示例6: buildPicardInsertSizeStats

# 需要導入模塊: from CGAT import BamTools [as 別名]
# 或者: from CGAT.BamTools import getNumReads [as 別名]
def buildPicardInsertSizeStats(infile, outfile, genome_file):
    '''run Picard:CollectInsertSizeMetrics

    Collect insert size statistics.

    Arguments
    ---------
    infile : string
        Input filename in :term:`BAM` format.
    outfile : string
        Output filename with picard output.
    genome_file : string
        Filename with genomic sequence.
    '''

    job_memory = PICARD_MEMORY
    job_threads = 3

    if BamTools.getNumReads(infile) == 0:
        E.warn("no reads in %s - no metrics" % infile)
        P.touch(outfile)
        return

    statement = '''CollectInsertSizeMetrics
    INPUT=%(infile)s
    REFERENCE_SEQUENCE=%(genome_file)s
    ASSUME_SORTED=true
    OUTPUT=%(outfile)s
    VALIDATION_STRINGENCY=SILENT
    >& %(outfile)s'''

    P.run()
開發者ID:sudlab,項目名稱:CGATPipelines,代碼行數:34,代碼來源:PipelineMappingQC.py

示例7: buildPicardDuplicationStats

# 需要導入模塊: from CGAT import BamTools [as 別名]
# 或者: from CGAT.BamTools import getNumReads [as 別名]
def buildPicardDuplicationStats(infile, outfile):
    '''run picard:MarkDuplicates

    Record duplicate metrics using Picard, the marked records
    are discarded.

    Arguments
    ---------
    infile : string
        Input filename in :term:`BAM` format.
    outfile : string
        Output filename with picard output.
    '''

    job_memory = PICARD_MEMORY
    job_threads = 3

    if BamTools.getNumReads(infile) == 0:
        E.warn("no reads in %s - no metrics" % infile)
        P.touch(outfile)
        return

    # currently, MarkDuplicates cannot handle split alignments from gsnap
    # these can be identified by the custom XT tag.
    if ".gsnap.bam" in infile:
        tmpf = P.getTempFile(".")
        tmpfile_name = tmpf.name
        statement = '''samtools view -h %(infile)s
        | awk "!/\\tXT:/"
        | samtools view /dev/stdin -S -b > %(tmpfile_name)s;
        ''' % locals()
        data_source = tmpfile_name
    else:
        statement = ""
        data_source = infile

    os.environ["CGAT_JAVA_OPTS"] = "-Xmx%s -XX:+UseParNewGC\
                                    -XX:+UseConcMarkSweepGC" % (PICARD_MEMORY)

    statement += '''MarkDuplicates
    INPUT=%(data_source)s
    ASSUME_SORTED=true
    METRICS_FILE=%(outfile)s
    OUTPUT=/dev/null
    VALIDATION_STRINGENCY=SILENT
    '''
    P.run()

    os.unsetenv("CGAT_JAVA_OPTS")

    if ".gsnap.bam" in infile:
        os.unlink(tmpfile_name)
開發者ID:sudlab,項目名稱:CGATPipelines,代碼行數:54,代碼來源:PipelineMappingQC.py

示例8: buildPicardAlignmentStats

# 需要導入模塊: from CGAT import BamTools [as 別名]
# 或者: from CGAT.BamTools import getNumReads [as 別名]
def buildPicardAlignmentStats(infile, outfile, genome_file):
    '''run picard:CollectMultipleMetrics

    Arguments
    ---------
    infile : string
        Input filename in :term:`BAM` format.
    outfile : string
        Output filename with picard output.
    genome_file : string
        Filename with genomic sequence.
    '''

    job_memory = PICARD_MEMORY
    picard_opts = '-Xmx%(job_memory)s -XX:+UseParNewGC -XX:+UseConcMarkSweepGC' % locals()
    job_threads = 3

    if BamTools.getNumReads(infile) == 0:
        E.warn("no reads in %s - no metrics" % infile)
        P.touch(outfile)
        return

    # Picard seems to have problem if quality information is missing
    # or there is no sequence/quality information within the bam file.
    # Thus, add it explicitly.
    statement = '''cat %(infile)s
    | cgat bam2bam -v 0
    --method=set-sequence --output-sam
    | picard %(picard_opts)s CollectMultipleMetrics
    INPUT=/dev/stdin
    REFERENCE_SEQUENCE=%(genome_file)s
    ASSUME_SORTED=true
    OUTPUT=%(outfile)s
    VALIDATION_STRINGENCY=SILENT
    >& %(outfile)s'''

    P.run()
開發者ID:CGATOxford,項目名稱:CGATPipelines,代碼行數:39,代碼來源:PipelineMappingQC.py


注:本文中的CGAT.BamTools.getNumReads方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。