本文整理汇总了Python中pysam.Samfile类的典型用法代码示例。如果您正苦于以下问题:Python Samfile类的具体用法?Python Samfile怎么用?Python Samfile使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Samfile类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_process_bam_mismatches
def test_process_bam_mismatches():
tbam = os.path.join(DATA, "tmp.bam")
bam = os.path.join(DATA, "ordered_umi.bam")
if os.path.exists(tbam):
os.remove(tbam)
with captured_output() as (out, err):
process_bam(bam, tbam, mismatches=1)
assert os.path.exists(tbam)
it = iter(out.getvalue().split("\n"))
assert it.next().strip() == "1\t9\t10\t4\t2"
assert it.next().strip() == "1\t11\t12\t2\t1"
assert it.next().strip() == "1\t29\t30\t2\t1"
bam_reader = Samfile(tbam)
it = iter(bam_reader)
r = it.next()
assert r.pos == 4
assert r.qname == "read8:UMI_ATTCAGGG"
r = it.next()
assert r.pos == 9
assert r.qname == "read1:UMI_AAAAAGGG"
r = it.next()
assert r.pos == 9
assert r.qname == "read4:UMI_AAAGGGGG"
r = it.next()
assert r.pos == 11
assert r.qname == "read5:UMI_ATTTAGGG"
bam_reader.close()
os.remove(tbam)
示例2: callbase
def callbase(bamfile, snpsites, out):
BF = Samfile(bamfile, 'rb') #open your bam file
SF = open(snpsites, 'r') #the file contain snp sites info
RF = open(out, 'w') #resulte file
RF.write('ref_name\tpos\tRbase\tAbase\tA\tT\tC\tG\tN\tothers\n')
for i in SF:
if i.startswith('#'):
continue
else:
line = ParseSNPsitesLine(i)
vcf_pos = line.pos-1 #change 1-base to 0-based
vcf_refname = line.chrom
print 'processing: %s %s...'%(vcf_refname, str(vcf_pos))
At, Tt, Ct, Gt, Nt, othert = 0, 0, 0, 0, 0, 0
for i in BF.pileup(vcf_refname, vcf_pos, vcf_pos+1):
if i.pos == vcf_pos:
vcf_Rbase = line.Rbase
vcf_Abase = line.Abase
for j in i.pileups:
yourbase = j.alignment.seq[j.qpos]
if yourbase == 'A': At += 1
elif yourbase == 'T': Tt += 1
elif yourbase == 'C': Ct += 1
elif yourbase == 'G': Gt += 1
elif yourbase == 'N': Nt += 1
else: othert += 1
RF.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n'%(vcf_refname, \
str(vcf_pos+1), vcf_Rbase, vcf_Abase, str(At), str(Tt), str(Ct), str(Gt), \
str(Nt), str(othert)))
BF.close()
示例3: parse_bam_differential
def parse_bam_differential(afn, bfn, regs, step):
"""(internal) Parses bam file in absolute mode. Proceeds by counting reads mapping
onto a segment (chr, start, end). No normalization is done at this step.
"""
abam = Samfile(str(afn), "rb")
bbam = Samfile(str(bfn), "rb")
acount = []
bcount = []
oldchr = "chr1"
for reg in regs:
chr, start, end = reg[:3]
if chr != oldchr:
log("files: %s - %s : %s counted" % (afn, bfn, oldchr))
oldchr = chr
# this could be improved
for s in xrange(start, end, step):
e = s + step
an = abam.count(chr, s, e)
bn = bbam.count(chr, s, e)
acount.append(an)
bcount.append(bn)
acount.append(-1)
bcount.append(-1)
log("files: %s - %s : %s counted (finished)" % (afn, bfn, oldchr))
return acount, bcount
示例4: bam_read_id
def bam_read_id(url):
'''Read first read id out of a remote bam file.
Note: requires a patched version of pysam
'''
stream = Samfile(url, 'rb')
read = stream.next()
return read.qname
示例5: parse_bam_absolute
def parse_bam_absolute(fn, regs):
"""(internal) Parses bam file in absolute mode. Proceeds by counting reads mapping
onto a segment (chr, start, end) and normalizes the count by the segment's length.
"""
bam = Samfile(str(fn), "rb")
count = []
for reg in regs:
chr, start, end = reg[:3]
n = bam.count(chr, start, end)
count.append(float(n) / (end - start))
return count
示例6: main
def main():
bam = Samfile("bedtools/tests/data/NA18152.bam", "rb")
rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
for al in bam:
chrom = bam.getrname(al.rname)
start = al.pos
end = al.aend
name = al.qname
for hit in rmsk.search(chrom, start, end):
print chrom, start, end, name,
print hit.chrom, hit.start, hit.end, hit.name
示例7: main
def main(args=None):
if args is None:
args = sys.argv[1:]
f = Samfile(args[0])
header = f.header
f.close()
reflen = header['SQ'][0]['LN']
BamIO.write(clip(BamIO.parse(args[0]), reflen), args[1], header=header)
return 0
示例8: main
def main(args):
option = "r" if args.samformat else "rb"
samfile = Samfile(args.bamfile, "rb")
#Iterates over each read instead of each contig
outputs = defaultdict(list)
#import ipdb; ipdb.set_trace()
for aln in samfile.fetch(until_eof = True):
ref = samfile.getrname(aln.tid)
outputs[ref].append(aln)
for ref, alns in outputs.iteritems():
print_reads(alns, ref, samfile.header)
示例9: main
def main():
bam = Samfile("bedtools/tests/data/NA18152.bam", "rb")
rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
# Example 1:
# Method: IntervalFile.all_hits()
# Report _all_ of the rmsk features that overlap with the BAM alignment
for al in bam:
strand = "+"
if al.is_reverse: strand = "-"
i = Interval(bam.getrname(al.rname), al.pos, al.aend, strand)
for hit in rmsk.all_hits(i, same_strand=True, ovlp_pct=0.75):
print "\t".join(str(x) for x in [i,hit])
示例10: __init__
def __init__(self, file_name):
"""
Initializes GenomicSignal.
"""
self.file_name = file_name
self.sg_coefs = None
self.bam = Samfile(file_name, "rb")
示例11: get_raw_tracks
def get_raw_tracks(args):
# Initializing Error Handler
err = ErrorHandler()
if len(args.input_files) != 2:
err.throw_error("ME_FEW_ARG", add_msg="You must specify reads and regions file.")
output_fname = os.path.join(args.output_location, "{}.wig".format(args.output_prefix))
bam = Samfile(args.input_files[0], "rb")
regions = GenomicRegionSet("Interested regions")
regions.read(args.input_files[1])
regions.merge()
reads_file = GenomicSignal()
with open(output_fname, "a") as output_f:
for region in regions:
# Raw counts
signal = [0.0] * (region.final - region.initial)
for read in bam.fetch(region.chrom, region.initial, region.final):
if not read.is_reverse:
cut_site = read.pos + args.forward_shift
if region.initial <= cut_site < region.final:
signal[cut_site - region.initial] += 1.0
else:
cut_site = read.aend + args.reverse_shift - 1
if region.initial <= cut_site < region.final:
signal[cut_site - region.initial] += 1.0
if args.norm:
signal = reads_file.boyle_norm(signal)
perc = scoreatpercentile(signal, 98)
std = np.std(signal)
signal = reads_file.hon_norm_atac(signal, perc, std)
output_f.write("fixedStep chrom=" + region.chrom + " start=" + str(region.initial + 1) + " step=1\n" +
"\n".join([str(e) for e in np.nan_to_num(signal)]) + "\n")
output_f.close()
if args.bigWig:
genome_data = GenomeData(args.organism)
chrom_sizes_file = genome_data.get_chromosome_sizes()
bw_filename = os.path.join(args.output_location, "{}.bw".format(args.output_prefix))
os.system(" ".join(["wigToBigWig", output_fname, chrom_sizes_file, bw_filename, "-verbose=0"]))
os.remove(output_fname)
示例12: subsample
def subsample(fn, ns=None):
if ns is None:
fn, ns = fn
sample = []
count = 0
outdir_base = path.join(path.dirname(fn), "subset")
sf = Samfile(fn)
try:
i_weight = float(sf.mapped) / max(ns)
print "Read out ", i_weight
except ValueError:
i_weight = 0.0
for read in sf:
i_weight += 1
print "Counted ", i_weight
i_weight /= float(max(ns))
sf = Samfile(fn)
print fn, count, i_weight
for i, read in enumerate(sf):
key = random() ** i_weight
if len(sample) < max(ns):
heappush(sample, (key, read, i + count))
else:
heappushpop(sample, (key, read, i + count))
count += i
for n in ns:
if n == min(ns):
outdir = outdir_base + "_min"
else:
outdir = outdir_base + "{:04.1f}M".format(n / 1e6)
try:
makedirs(outdir)
except OSError:
pass
sampN = sorted(sample, reverse=True)[: int(n)]
print "Kept {: >12,} of {: >12,} reads".format(len(sampN), count)
print fn, "->", outdir
stdout.flush()
of = Samfile(path.join(outdir, "accepted_hits.bam"), mode="wb", template=sf)
sample.sort(key=lambda (key, read, pos): (read.tid, read.pos))
for key, read, pos in sampN:
of.write(read)
of.close()
sf.close()
return [count for key, read, count in sample]
示例13: split_reads
def split_reads(reads, ref_reads, alt_reads):
read_results = Counter()
ref_bam = Samfile(ref_reads, 'wb', template=reads)
alt_bam = Samfile(alt_reads, 'wb', template=reads)
prev_phase = None
prev_read = None
prev_qname = None
test = 0
for read in reads.fetch(until_eof=True):
test += 1
chrom = read.reference_name
snps_on_chrom = snp_dict[chrom]
phase = get_phase(read, snps_on_chrom)
read_qname = read.qname
if read_qname == prev_qname:
read_results["tot_read"]+=1
phase_set = set([phase, prev_phase])
phase_set.discard(None)
if len(phase_set) == 1:
read_phase = phase_set.pop()
if read_phase == -1:
ref_bam.write(read)
ref_bam.write(prev_read)
read_results["ref_read"]+=1
elif read_phase == 1:
alt_bam.write(read)
alt_bam.write(prev_read)
read_results["alt_read"]+=1
elif read_phase == 0:
read_results['misphased_read']+=1
elif len(phase_set)==0:
read_results["no_snps_read"]+=1
else:
read_results['misphased_read']+=1
prev_read = read
prev_phase = phase
prev_qname = read_qname
return(read_results)
示例14: _bowtie2_filter
def _bowtie2_filter(fnam, fastq_path, unmap_out, map_out):
"""
Divides reads in a map file in two categories: uniquely mapped, and not.
Writes them in two files
"""
try:
fhandler = Samfile(fnam)
except IOError:
raise Exception('ERROR: file "%s" not found' % fnam)
# getrname chromosome names
i = 0
crm_dict = {}
while True:
try:
crm_dict[i] = fhandler.getrname(i)
i += 1
except ValueError:
break
# iteration over reads
unmap_out = open(unmap_out, 'w')
map_out = open(map_out, 'w')
fastq_in = open(fastq_path , 'r')
for line in fhandler:
line_in = fastq_in.readline()
if line.is_unmapped or line.mapq < 4:
read = '%s\t%s\t%s\t%s\t%s\n' % (
line_in.split('\t', 1)[0].rstrip('\n')[1:],
line.seq, line.qual, '-', '-'
)
unmap_out.write(read)
else:
read = '%s\t%s\t%s\t%s\t%s:%s:%d:%d\n' % (
line.qname, line.seq, line.qual, '1',
crm_dict[line.tid],
'-' if line.is_reverse else '+', line.pos + 1, len(line.seq))
map_out.write(read)
for _ in range(3):
fastq_in.readline()
unmap_out.close()
map_out.close()
fastq_in.close()
示例15: main
def main(args):
option = "r" if args.samformat else "rb"
samfile = Samfile(args.bamfile, "rb")
ref_ids = [samfile.gettid(r) for r in samfile.references]
#Iterates over each read instead of each contig
reads_to_print = []
for aln in samfile.fetch(until_eof = True):
if pair_is_aligned(aln, ref_ids):
if args.read_pair == 1 and aln.is_read1:
reads_to_print.append(aln)
elif args.read_pair == 2 and aln.is_read2:
reads_to_print.append(aln)
elif args.read_pair == 0:
reads_to_print.append(aln)
if len(reads_to_print) >= 10000:
# Flush the reads collected
print_reads(reads_to_print)
reads_to_print = []
print_reads(reads_to_print)