本文整理匯總了Python中CGAT.BamTools.getNumReads方法的典型用法代碼示例。如果您正苦於以下問題:Python BamTools.getNumReads方法的具體用法?Python BamTools.getNumReads怎麽用?Python BamTools.getNumReads使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類CGAT.BamTools
的用法示例。
在下文中一共展示了BamTools.getNumReads方法的8個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: buildPicardGCStats
# 需要導入模塊: from CGAT import BamTools [as 別名]
# 或者: from CGAT.BamTools import getNumReads [as 別名]
def buildPicardGCStats(infile, outfile, genome_file):
"""picard:CollectGCBiasMetrics
Collect GC bias metrics.
Arguments
---------
infile : string
Input filename in :term:`BAM` format.
outfile : string
Output filename with picard output.
genome_file : string
Filename with genomic sequence.
"""
job_memory = PICARD_MEMORY
picard_opts = '-Xmx%(job_memory)s -XX:+UseParNewGC -XX:+UseConcMarkSweepGC' % locals()
job_threads = 3
if BamTools.getNumReads(infile) == 0:
E.warn("no reads in %s - no metrics" % infile)
P.touch(outfile)
return
statement = '''picard %(picard_opts)s CollectGcBiasMetrics
INPUT=%(infile)s
REFERENCE_SEQUENCE=%(genome_file)s
OUTPUT=%(outfile)s
VALIDATION_STRINGENCY=SILENT
CHART_OUTPUT=%(outfile)s.pdf
SUMMARY_OUTPUT=%(outfile)s.summary
>& %(outfile)s'''
P.run()
示例2: buildPicardDuplicateStats
# 需要導入模塊: from CGAT import BamTools [as 別名]
# 或者: from CGAT.BamTools import getNumReads [as 別名]
def buildPicardDuplicateStats(infile, outfile):
'''run picard:MarkDuplicates
Record duplicate metrics using Picard and keep the dedupped .bam
file.
Pair duplication is properly handled, including inter-chromosomal
cases. SE data is also handled. These stats also contain a
histogram that estimates the return from additional sequecing. No
marked bam files are retained (/dev/null...) Note that picards
counts reads but they are in fact alignments.
Arguments
---------
infile : string
Input filename in :term:`BAM` format.
outfile : string
Output filename with picard output.
'''
job_memory = PICARD_MEMORY
picard_opts = '-Xmx%(job_memory)s -XX:+UseParNewGC -XX:+UseConcMarkSweepGC' % locals()
job_threads = 3
if BamTools.getNumReads(infile) == 0:
E.warn("no reads in %s - no metrics" % infile)
P.touch(outfile)
return
statement = '''picard %(picard_opts)s MarkDuplicates
INPUT=%(infile)s
ASSUME_SORTED=true
METRICS_FILE=%(outfile)s.duplicate_metrics
OUTPUT=%(outfile)s
VALIDATION_STRINGENCY=SILENT;
'''
statement += '''samtools index %(outfile)s ;'''
P.run()
示例3: buildPicardCoverageStats
# 需要導入模塊: from CGAT import BamTools [as 別名]
# 或者: from CGAT.BamTools import getNumReads [as 別名]
def buildPicardCoverageStats(infile, outfile, baits, regions):
'''run picard:CollectHsMetrics
Generate coverage statistics for regions of interest from a bed
file using Picard.
Arguments
---------
infile : string
Input filename in :term:`BAM` format.
outfile : string
Output filename with picard output.
baits : :term:`bed` formatted file of bait regions
regions : :term:`bed` formatted file of target regions
'''
job_memory = PICARD_MEMORY
picard_opts = '-Xmx%(job_memory)s -XX:+UseParNewGC -XX:+UseConcMarkSweepGC' % locals()
job_threads = 3
if BamTools.getNumReads(infile) == 0:
E.warn("no reads in %s - no metrics" % infile)
P.touch(outfile)
return
statement = '''picard %(picard_opts)s CollectHsMetrics
BAIT_INTERVALS=%(baits)s
TARGET_INTERVALS=%(regions)s
INPUT=%(infile)s
OUTPUT=%(outfile)s
VALIDATION_STRINGENCY=LENIENT''' % locals()
P.run()
示例4: buildPicardAlignmentStats
# 需要導入模塊: from CGAT import BamTools [as 別名]
# 或者: from CGAT.BamTools import getNumReads [as 別名]
def buildPicardAlignmentStats(infile, outfile, genome_file):
'''run picard:CollectMultipleMetrics
Arguments
---------
infile : string
Input filename in :term:`BAM` format.
outfile : string
Output filename with picard output.
genome_file : string
Filename with genomic sequence.
'''
job_memory = PICARD_MEMORY
picard_opts = '-Xmx%(job_memory)s -XX:+UseParNewGC -XX:+UseConcMarkSweepGC' % locals()
job_threads = 3
if BamTools.getNumReads(infile) == 0:
E.warn("no reads in %s - no metrics" % infile)
P.touch(outfile)
return
statement = '''picard %(picard_opts)s CollectMultipleMetrics
INPUT=%(infile)s
REFERENCE_SEQUENCE=%(genome_file)s
ASSUME_SORTED=true
OUTPUT=%(outfile)s
VALIDATION_STRINGENCY=SILENT
>& %(outfile)s'''
P.run()
示例5: buildPicardRnaSeqMetrics
# 需要導入模塊: from CGAT import BamTools [as 別名]
# 或者: from CGAT.BamTools import getNumReads [as 別名]
def buildPicardRnaSeqMetrics(infiles, strand, outfile):
'''run picard:RNASeqMetrics
Arguments
---------
infiles : string
Input filename in :term:`BAM` format.
Genome file in refflat format
(http://genome.ucsc.edu/goldenPath/gbdDescriptionsOld.html#RefFlat)
outfile : string
Output filename with picard output.
'''
job_memory = PICARD_MEMORY
picard_opts = '-Xmx%(job_memory)s -XX:+UseParNewGC -XX:+UseConcMarkSweepGC' % locals()
job_threads = 3
infile, genome = infiles
if BamTools.getNumReads(infile) == 0:
E.warn("no reads in %s - no metrics" % infile)
P.touch(outfile)
return
statement = '''picard %(picard_opts)s CollectRnaSeqMetrics
REF_FLAT=%(genome)s
INPUT=%(infile)s
ASSUME_SORTED=true
OUTPUT=%(outfile)s
STRAND=%(strand)s
VALIDATION_STRINGENCY=SILENT
'''
P.run()
示例6: buildPicardInsertSizeStats
# 需要導入模塊: from CGAT import BamTools [as 別名]
# 或者: from CGAT.BamTools import getNumReads [as 別名]
def buildPicardInsertSizeStats(infile, outfile, genome_file):
'''run Picard:CollectInsertSizeMetrics
Collect insert size statistics.
Arguments
---------
infile : string
Input filename in :term:`BAM` format.
outfile : string
Output filename with picard output.
genome_file : string
Filename with genomic sequence.
'''
job_memory = PICARD_MEMORY
job_threads = 3
if BamTools.getNumReads(infile) == 0:
E.warn("no reads in %s - no metrics" % infile)
P.touch(outfile)
return
statement = '''CollectInsertSizeMetrics
INPUT=%(infile)s
REFERENCE_SEQUENCE=%(genome_file)s
ASSUME_SORTED=true
OUTPUT=%(outfile)s
VALIDATION_STRINGENCY=SILENT
>& %(outfile)s'''
P.run()
示例7: buildPicardDuplicationStats
# 需要導入模塊: from CGAT import BamTools [as 別名]
# 或者: from CGAT.BamTools import getNumReads [as 別名]
def buildPicardDuplicationStats(infile, outfile):
'''run picard:MarkDuplicates
Record duplicate metrics using Picard, the marked records
are discarded.
Arguments
---------
infile : string
Input filename in :term:`BAM` format.
outfile : string
Output filename with picard output.
'''
job_memory = PICARD_MEMORY
job_threads = 3
if BamTools.getNumReads(infile) == 0:
E.warn("no reads in %s - no metrics" % infile)
P.touch(outfile)
return
# currently, MarkDuplicates cannot handle split alignments from gsnap
# these can be identified by the custom XT tag.
if ".gsnap.bam" in infile:
tmpf = P.getTempFile(".")
tmpfile_name = tmpf.name
statement = '''samtools view -h %(infile)s
| awk "!/\\tXT:/"
| samtools view /dev/stdin -S -b > %(tmpfile_name)s;
''' % locals()
data_source = tmpfile_name
else:
statement = ""
data_source = infile
os.environ["CGAT_JAVA_OPTS"] = "-Xmx%s -XX:+UseParNewGC\
-XX:+UseConcMarkSweepGC" % (PICARD_MEMORY)
statement += '''MarkDuplicates
INPUT=%(data_source)s
ASSUME_SORTED=true
METRICS_FILE=%(outfile)s
OUTPUT=/dev/null
VALIDATION_STRINGENCY=SILENT
'''
P.run()
os.unsetenv("CGAT_JAVA_OPTS")
if ".gsnap.bam" in infile:
os.unlink(tmpfile_name)
示例8: buildPicardAlignmentStats
# 需要導入模塊: from CGAT import BamTools [as 別名]
# 或者: from CGAT.BamTools import getNumReads [as 別名]
def buildPicardAlignmentStats(infile, outfile, genome_file):
'''run picard:CollectMultipleMetrics
Arguments
---------
infile : string
Input filename in :term:`BAM` format.
outfile : string
Output filename with picard output.
genome_file : string
Filename with genomic sequence.
'''
job_memory = PICARD_MEMORY
picard_opts = '-Xmx%(job_memory)s -XX:+UseParNewGC -XX:+UseConcMarkSweepGC' % locals()
job_threads = 3
if BamTools.getNumReads(infile) == 0:
E.warn("no reads in %s - no metrics" % infile)
P.touch(outfile)
return
# Picard seems to have problem if quality information is missing
# or there is no sequence/quality information within the bam file.
# Thus, add it explicitly.
statement = '''cat %(infile)s
| cgat bam2bam -v 0
--method=set-sequence --output-sam
| picard %(picard_opts)s CollectMultipleMetrics
INPUT=/dev/stdin
REFERENCE_SEQUENCE=%(genome_file)s
ASSUME_SORTED=true
OUTPUT=%(outfile)s
VALIDATION_STRINGENCY=SILENT
>& %(outfile)s'''
P.run()