本文整理汇总了Python中pysam.Samfile.getrname方法的典型用法代码示例。如果您正苦于以下问题:Python Samfile.getrname方法的具体用法?Python Samfile.getrname怎么用?Python Samfile.getrname使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pysam.Samfile
的用法示例。
在下文中一共展示了Samfile.getrname方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from pysam import Samfile [as 别名]
# 或者: from pysam.Samfile import getrname [as 别名]
def main(args):
option = "r" if args.samformat else "rb"
samfile = Samfile(args.bamfile, "rb")
#Iterates over each read instead of each contig
outputs = defaultdict(list)
#import ipdb; ipdb.set_trace()
for aln in samfile.fetch(until_eof = True):
ref = samfile.getrname(aln.tid)
outputs[ref].append(aln)
for ref, alns in outputs.iteritems():
print_reads(alns, ref, samfile.header)
示例2: main
# 需要导入模块: from pysam import Samfile [as 别名]
# 或者: from pysam.Samfile import getrname [as 别名]
def main():
bam = Samfile("bedtools/tests/data/NA18152.bam", "rb")
rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
for al in bam:
chrom = bam.getrname(al.rname)
start = al.pos
end = al.aend
name = al.qname
for hit in rmsk.search(chrom, start, end):
print chrom, start, end, name,
print hit.chrom, hit.start, hit.end, hit.name
示例3: main
# 需要导入模块: from pysam import Samfile [as 别名]
# 或者: from pysam.Samfile import getrname [as 别名]
def main():
bam = Samfile("bedtools/tests/data/NA18152.bam", "rb")
rmsk = IntervalFile("bedtools/tests/data/rmsk.hg18.chr21.bed")
# Example 1:
# Method: IntervalFile.all_hits()
# Report _all_ of the rmsk features that overlap with the BAM alignment
for al in bam:
strand = "+"
if al.is_reverse: strand = "-"
i = Interval(bam.getrname(al.rname), al.pos, al.aend, strand)
for hit in rmsk.all_hits(i, same_strand=True, ovlp_pct=0.75):
print "\t".join(str(x) for x in [i,hit])
示例4: _bowtie2_filter
# 需要导入模块: from pysam import Samfile [as 别名]
# 或者: from pysam.Samfile import getrname [as 别名]
def _bowtie2_filter(fnam, fastq_path, unmap_out, map_out):
"""
Divides reads in a map file in two categories: uniquely mapped, and not.
Writes them in two files
"""
try:
fhandler = Samfile(fnam)
except IOError:
raise Exception('ERROR: file "%s" not found' % fnam)
# getrname chromosome names
i = 0
crm_dict = {}
while True:
try:
crm_dict[i] = fhandler.getrname(i)
i += 1
except ValueError:
break
# iteration over reads
unmap_out = open(unmap_out, 'w')
map_out = open(map_out, 'w')
fastq_in = open(fastq_path , 'r')
for line in fhandler:
line_in = fastq_in.readline()
if line.is_unmapped or line.mapq < 4:
read = '%s\t%s\t%s\t%s\t%s\n' % (
line_in.split('\t', 1)[0].rstrip('\n')[1:],
line.seq, line.qual, '-', '-'
)
unmap_out.write(read)
else:
read = '%s\t%s\t%s\t%s\t%s:%s:%d:%d\n' % (
line.qname, line.seq, line.qual, '1',
crm_dict[line.tid],
'-' if line.is_reverse else '+', line.pos + 1, len(line.seq))
map_out.write(read)
for _ in range(3):
fastq_in.readline()
unmap_out.close()
map_out.close()
fastq_in.close()
示例5: parse_sam
# 需要导入模块: from pysam import Samfile [as 别名]
# 或者: from pysam.Samfile import getrname [as 别名]
#.........这里部分代码省略.........
reads = []
for fnam in fnames[read]:
try:
fhandler = Samfile(fnam)
except IOError:
print 'WARNING: file "%s" not found' % fnam
continue
except ValueError:
raise Exception('ERROR: not a SAM/BAM file\n%s' % fnam)
# get the iteration number of the iterative mapping
try:
num = int(fnam.split('.')[-1].split(':')[0])
except:
num += 1
# set read counter
windows[read].setdefault(num, 0)
# guess mapper used
if not mapper:
mapper = fhandler.header['PG'][0]['ID']
if mapper.lower()=='gem':
condition = lambda x: x[1][0][0] != 'N'
elif mapper.lower() in ['bowtie', 'bowtie2']:
condition = lambda x: 'XS' in dict(x)
else:
warn('WARNING: unrecognized mapper used to generate file\n')
condition = lambda x: x[1][1] != 1
if verbose:
print 'loading SAM file from %s: %s' % (mapper, fnam)
# getrname chromosome names
i = 0
crm_dict = {}
while True:
try:
crm_dict[i] = fhandler.getrname(i)
i += 1
except ValueError:
break
# iteration over reads
sub_count = 0 # to empty read buffer
for r in fhandler:
if r.is_unmapped:
continue
if condition(r.tags):
continue
positive = not r.is_reverse
crm = crm_dict[r.tid]
len_seq = len(r.seq)
if positive:
pos = r.pos + 1
else:
pos = r.pos + len_seq
try:
frag_piece = frags[crm][pos / frag_chunk]
except KeyError:
# Chromosome not in hash
continue
idx = bisect(frag_piece, pos)
try:
next_re = frag_piece[idx]
except IndexError:
# case where part of the read is mapped outside chromosome
count = 0
while idx >= len(frag_piece) and count < len_seq:
pos -= 1
count += 1
frag_piece = frags[crm][pos / frag_chunk]
示例6: parse_sam
# 需要导入模块: from pysam import Samfile [as 别名]
# 或者: from pysam.Samfile import getrname [as 别名]
def parse_sam(f_names1, f_names2, frags, out_file1, out_file2, genome_seq,
re_name, verbose=False, **kwargs):
"""
Parse sam/bam file using pysam tools.
Keep a summary of the results into 2 tab-separated files that will contain 6
columns: read ID, Chromosome, position, strand (either 0 or 1), mapped
sequence lebgth, position of the closest upstream RE site, position of
the closest downstream RE site
:param f_names1: a list of path to sam/bam files corresponding to the
mapping of read1, can also be just one file
:param f_names1: a list of path to sam/bam files corresponding to the
mapping of read2, can also be just one file
:param frags: a dictionary generated by :func:`pyatdbit.mapping.restriction_enzymes.map_re_sites`.
"""
frags = map_re_sites(re_name, genome_seq, verbose=True)
frag_chunk = kwargs.get('frag_chunk', 100000)
fnames = f_names1, f_names2
outfiles = out_file1, out_file2
for read in range(2):
if verbose:
print 'Loading read' + str(read + 1)
reads = []
for fnam in fnames[read]:
if verbose:
print 'loading file:', fnam
try:
fhandler = Samfile(fnam)
except IOError:
continue
i = 0
crm_dict = {}
while True:
try:
crm_dict[i] = fhandler.getrname(i).replace('chr', '')
i += 1
except ValueError:
break
for r in fhandler:
if r.is_unmapped:
continue
if r.tags[1][1] != 1:
continue
positive = not r.is_reverse
crm = crm_dict[r.tid]
len_seq = len(r.seq)
pos = r.pos + (0 if positive else len_seq)
try:
frag_piece = frags[crm][pos / frag_chunk]
except KeyError:
# Chromosome not in hash
continue
idx = bisect(frag_piece, pos)
prev_re = frag_piece[idx - 1]
next_re = frag_piece[idx]
name = r.qname
reads.append('%s\t%s\t%d\t%d\t%d\t%d\t%d\n' % (
name, crm, pos, positive, len_seq, prev_re, next_re))
reads_fh = open(outfiles[read], 'w')
reads_fh.write(''.join(sorted(reads)))
reads_fh.close()
del(reads)
示例7: parse_sam
# 需要导入模块: from pysam import Samfile [as 别名]
# 或者: from pysam.Samfile import getrname [as 别名]
def parse_sam(f_names1, f_names2=None, out_file1=None, out_file2=None,
genome_seq=None, re_name=None, verbose=False, mapper=None,
**kwargs):
"""
Parse sam/bam file using pysam tools.
Keep a summary of the results into 2 tab-separated files that will contain 6
columns: read ID, Chromosome, position, strand (either 0 or 1), mapped
sequence lebgth, position of the closest upstream RE site, position of
the closest downstream RE site
:param f_names1: a list of path to sam/bam files corresponding to the
mapping of read1, can also be just one file
:param f_names1: a list of path to sam/bam files corresponding to the
mapping of read2, can also be just one file
:param out_file1: path to outfile tab separated format containing mapped
read1 information
:param out_file1: path to outfile tab separated format containing mapped
read2 information
:param genome_seq: a dictionary generated by :func:`pyatdbit.parser.genome_parser.parse_fasta`.
containing the genomic sequence
:param re_name: name of the restriction enzyme used
:param None mapper: software used to map (supported are GEM and BOWTIE2).
Guessed from file by default.
"""
# not nice, dirty fix in order to allow this function to only parse
# one SAM file
if not out_file1:
raise Exception('ERROR: out_file1 should be given\n')
if not re_name:
raise Exception('ERROR: re_name should be given\n')
if not genome_seq:
raise Exception('ERROR: genome_seq should be given\n')
if (f_names2 and not out_file2) or (not f_names2 and out_file2):
raise Exception('ERROR: out_file2 AND f_names2 needed\n')
frag_chunk = kwargs.get('frag_chunk', 100000)
if verbose:
print 'Searching and mapping RE sites to the reference genome'
frags = map_re_sites(re_name, genome_seq, frag_chunk=frag_chunk,
verbose=verbose)
if isinstance(f_names1, str):
f_names1 = [f_names1]
if isinstance(f_names2, str):
f_names2 = [f_names2]
if f_names2:
fnames = f_names1, f_names2
outfiles = out_file1, out_file2
else:
fnames = (f_names1,)
outfiles = (out_file1, )
for read in range(len(fnames)):
if verbose:
print 'Loading read' + str(read + 1)
reads = []
for fnam in fnames[read]:
if verbose:
print 'loading file:', fnam
try:
fhandler = Samfile(fnam)
except IOError:
continue
# guess mapper used
if not mapper:
mapper = fhandler.header['PG'][0]['ID']
if mapper.lower()=='gem':
condition = lambda x: x[1][1] != 1
elif mapper.lower() in ['bowtie', 'bowtie2']:
condition = lambda x: 'XS' in dict(x)
else:
warn('WARNING: unrecognized mapper used to generate file\n')
condition = lambda x: x[1][1] != 1
if verbose:
print 'MAPPER:', mapper
# iteration over reads
i = 0
crm_dict = {}
while True:
try:
crm_dict[i] = fhandler.getrname(i)
i += 1
except ValueError:
break
for r in fhandler:
if r.is_unmapped:
continue
if condition(r.tags):
continue
positive = not r.is_reverse
crm = crm_dict[r.tid]
len_seq = len(r.seq)
pos = r.pos + (0 if positive else len_seq)
try:
frag_piece = frags[crm][pos / frag_chunk]
except KeyError:
# Chromosome not in hash
continue
idx = bisect(frag_piece, pos)
#.........这里部分代码省略.........
示例8: defaultdict
# 需要导入模块: from pysam import Samfile [as 别名]
# 或者: from pysam.Samfile import getrname [as 别名]
errors2segments = defaultdict(lambda: defaultdict(list));
samfile = Samfile(args.path)
for segment in samfile.fetch(until_eof=True):
num = segment.query_name.split("|")[0]
for etype, eset in errors.iteritems():
if(num in eset):
errors2segments[etype][num].append(segment);
break;
additional = defaultdict(list);
for fname in args.additional:
tsamfile = Samfile(fname);
for segment in tsamfile.fetch(until_eof=True):
num = segment.query_name.split("|")[0]
additional[num].append(ArWrapper(segment, tsamfile.getrname(segment.tid)))
tsamfile.close();
for etype, d in errors2segments.iteritems():
with open(os.path.join(args.outdir, "%s_%s_error.txt" % etype), 'w') as f:
for num, segments in d.iteritems():
if(segments[0].is_reverse):
seq = reverse_complement(segments[0].seq);
else:
seq = segments[0].seq
f.write("%s\nnumber of read:\t%s\n\nSequence:\t%s\n\nSegments:\n\n" % ("_"*140, num, seq))
for segment in segments:
示例9: _read_one_sam
# 需要导入模块: from pysam import Samfile [as 别名]
# 或者: from pysam.Samfile import getrname [as 别名]
def _read_one_sam(fnam, mapper, verbose, frags, frag_chunk, num):
out = open(fnam + '.tsv', 'w')
lwindows = {}
try:
fhandler = Samfile(fnam)
except IOError:
print 'WARNING: file "%s" not found' % fnam
return {}, []
except ValueError:
raise Exception('ERROR: not a SAM/BAM file\n%s' % fnam)
# get the iteration number of the iterative mapping
num = int(fnam.split('.')[-1].split(':')[0])
lwindows.setdefault(num, 0)
# guess mapper used
if not mapper:
mapper = fhandler.header['PG'][0]['ID']
if mapper.lower()=='gem':
condition = lambda x: x[1][1] != 1
elif mapper.lower() in ['bowtie', 'bowtie2']:
condition = lambda x: 'XS' in dict(x)
else:
warn('WARNING: unrecognized mapper used to generate file\n')
condition = lambda x: x[1][1] != 1
if verbose:
print 'loading %s file: %s\n' % (mapper, fnam),
# iteration over lreads
i = 0
crm_dict = {}
while True:
try:
crm_dict[i] = fhandler.getrname(i)
i += 1
except ValueError:
break
for r in fhandler:
if r.is_unmapped:
continue
if condition(r.tags):
continue
positive = not r.is_reverse
crm = crm_dict[r.tid]
len_seq = len(r.seq)
if positive:
pos = r.pos + 1
else:
pos = r.pos + len_seq + 1
try:
frag_piece = frags[crm][pos / frag_chunk]
except KeyError:
# Chromosome not in hash
continue
idx = bisect(frag_piece, pos)
try:
next_re = frag_piece[idx]
except IndexError:
# case where part of the read is mapped outside chromosome
count = 0
while idx >= len(frag_piece) and count < len_seq:
pos -= 1
count += 1
frag_piece = frags[crm][pos / frag_chunk]
idx = bisect(frag_piece, pos)
if count >= len_seq:
raise Exception('Read mapped mostly outside ' +
'chromosome\n')
next_re = frag_piece[idx]
prev_re = frag_piece[idx - 1 if idx else 0]
name = r.qname
out.write('%s\t%s\t%d\t%d\t%d\t%d\t%d\n' % (
name, crm, pos, positive, len_seq, prev_re, next_re))
lwindows[num] += 1
out.close()
return lwindows