本文整理匯總了Python中HTSeq類的典型用法代碼示例。如果您正苦於以下問題:Python HTSeq類的具體用法?Python HTSeq怎麽用?Python HTSeq使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了HTSeq類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: count_reads_paired
def count_reads_paired(read_seq, counter, order, quiet, minaqual):
if order == "name":
read_seq = HTSeq.pair_SAM_alignments(read_seq)
elif order == "pos":
read_seq = HTSeq.pair_SAM_alignments_with_buffer(read_seq)
else:
raise ValueError("Illegal order specified.")
i = 0
for r in read_seq:
if i > 0 and i % 100000 == 0 and not quiet:
msg = "%d SAM alignment record pairs processed.\n" % (i)
sys.stderr.write(msg)
i += 1
if r[0] is not None and r[0].aligned:
forward_iv_seq = (co.ref_iv for co in r[0].cigar
if co.type == "M" and co.size > 0)
reverse_iv_seq = (invert_strand(co.ref_iv) for co in r[0].cigar
if co.type == "M" and co.size > 0)
else:
forward_iv_seq = tuple()
reverse_iv_seq = tuple()
if r[1] is not None and r[1].aligned:
rest = (invert_strand(co.ref_iv) for co in r[1].cigar
if co.type == "M" and co.size > 0)
forward_iv_seq = itertools.chain(forward_iv_seq, rest)
rest = (co.ref_iv for co in r[1].cigar
if co.type == "M" and co.size > 0)
reverse_iv_seq = itertools.chain(reverse_iv_seq, rest)
else:
if (r[0] is None) or not (r[0].aligned):
counter.not_aligned(r)
continue
try:
if (r[0] is not None and r[0].optional_field("NH") > 1) or \
(r[1] is not None and r[1].optional_field("NH") > 1):
counter.non_unique(r)
continue
except KeyError:
pass
if (r[0] and r[0].aQual < minaqual) or \
(r[1] and r[1].aQual < minaqual):
counter.too_low_quality(r)
continue
counter.forward_count(forward_iv_seq, r)
counter.reverse_count(reverse_iv_seq, r)
if not quiet:
sys.stderr.write("%d SAM alignment pairs processed.\n" % (i))
示例2: count_reads_paired
def count_reads_paired(read_seq, counter, order, stranded,
quiet, minaqual, write_to_samout ):
if order == "name":
read_seq = HTSeq.pair_SAM_alignments( read_seq )
elif order == "pos":
read_seq = HTSeq.pair_SAM_alignments_with_buffer( read_seq )
else:
raise ValueError, "Illegal order specified."
i = 0
for r in read_seq:
if i > 0 and i % 100000 == 0 and not quiet:
sys.stderr.write( "%d SAM alignment record%s processed.\n" % ( i, "s" if not pe_mode else " pairs" ) )
i += 1
if r[0] is not None and r[0].aligned:
if stranded != "reverse":
iv_seq = ( co.ref_iv for co in r[0].cigar if co.type == "M" and co.size > 0 )
else:
iv_seq = ( invert_strand( co.ref_iv ) for co in r[0].cigar if co.type == "M" and co.size > 0 )
else:
iv_seq = tuple()
if r[1] is not None and r[1].aligned:
if stranded != "reverse":
iv_seq = itertools.chain(iv_seq,
( invert_strand( co.ref_iv ) for co in r[1].cigar if co.type == "M" and co.size > 0 ) )
else:
iv_seq = itertools.chain( iv_seq,
( co.ref_iv for co in r[1].cigar if co.type == "M" and co.size > 0 ) )
else:
if ( r[0] is None ) or not ( r[0].aligned ):
write_to_samout( r, "__not_aligned" )
counter.notaligned += 1
continue
try:
if ( r[0] is not None and r[0].optional_field( "NH" ) > 1 ) or \
( r[1] is not None and r[1].optional_field( "NH" ) > 1 ):
counter.nonunique += 1
write_to_samout( r, "__alignment_not_unique" )
continue
except KeyError:
pass
if ( r[0] and r[0].aQual < minaqual ) or ( r[1] and r[1].aQual < minaqual ):
lowqual += 1
write_to_samout( r, "__too_low_aQual" )
continue
counter.count(iv_seq, r)
if not quiet:
sys.stderr.write( "%d SAM %s processed.\n" % ( i, "alignments " if not pe_mode else "alignment pairs" ) )
示例3: parse_fastx_sam_parallel
def parse_fastx_sam_parallel(fastx_infile, sam_infile):
""" Parse fastx and resulting sam file in parallel - generator yielding (name, seq, alignment_list) tuples.
The sam file may contain multiple alignments per read. Program checks that the readnames match.
"""
fastx_generator = basic_seq_utilities.name_seq_generator_from_fasta_fastq(fastx_infile)
sam_generator = iter(HTSeq.bundle_multiple_alignments(HTSeq.SAM_Reader(sam_infile)))
if_finished_fastx, if_finished_sam = False, False
while True:
try: name, seq = fastx_generator.next()
except StopIteration: if_finished_fastx = True
try: alns = sam_generator.next()
except StopIteration: if_finished_sam = True
# if both finished, good, we're doine
if if_finished_fastx and if_finished_sam:
raise StopIteration
# if one file was finished but the other wasn't, error!
elif if_finished_fastx or if_finished_sam:
raise DeepseqError("Parsing seq/aln files in parallel - inconsistent finished states! "
+"(If finished: %s %s, %s %s)"%(fastx_infile, if_finished_fastx, sam_infile, if_finished_sam))
# if all the files still contained data, yield it
else:
name = name.split()[0]
name2 = alns[0].read.name.split()[0]
if not name2 == name:
raise DeepseqError("Non-matching readnames between files! %s in %s, %s in %s"%(fastx_infile, name,
sam_infile, name2))
yield (name, seq, alns)
示例4: HTseq_count
def HTseq_count(bam_file, gtf_file, out_dir, identifier, parallel = True ):
gtf_file = HTSeq.GFF_Reader(gtf_file)
features = HTSeq.GenomicArrayOfSets( "auto", stranded=True )
print "extracting features from gtf file"
for feature in gtf_file:
# if feature.type == "exon":
features[feature.iv] += feature.attr[identifier]
counts = collections.Counter( )
almnt_file = HTSeq.SAM_Reader(bam_file)
counts = collections.Counter( )
for bundle in HTSeq.pair_SAM_alignments( almnt_file, bundle=True ):
if len(bundle) != 1:
continue # Skip multiple alignments
first_almnt, second_almnt = bundle[0] # extract pair
if not first_almnt.aligned and second_almnt.aligned:
count[ "_unmapped" ] += 1
continue
gene_ids = set()
for iv, val in features[ left_almnt.iv ].steps():
gene_ids |= val
for iv, val in features[ right_almnt.iv ].steps():
gene_ids |= val
if len(gene_ids) == 1:
gene_id = list(gene_ids)[0]
counts[ gene_id ] += 1
elif len(gene_ids) == 0:
counts[ "_no_feature" ] += 1
else:
counts[ "_ambiguous" ] += 1
for gene_id in counts:
print gene_id, counts[ gene_id ]
示例5: bam_count
def bam_count(args):
bam = HTSeq.SAM_Reader(args.fi)
#exons = htseq_read_gtf(args.fg)
cnts = collections.Counter()
for bundle in HTSeq.pair_SAM_alignments_with_buffer(bam):
if len(bundle) != 1:
continue
aln1, aln2 = bundle[0]
if not aln1.aligned and aln2.aligned:
cnts["_unmapped"] += 1
continue
gids = set()
for iv, val in exons[aln1.iv].steps():
gids |= val
for iv, val in exons[aln2.iv].steps():
gids |= val
if len(gids) == 1:
gid = list(gids)[0]
cnts[gid] += 1
elif len(gids) == 0:
cnts["_no_feature"] += 1
else:
cnts["_ambiguous"] += 1
for gid in cnts:
print("%s\t%d" % (gid, cnts[gid]))
示例6: listFromCIGAR
def listFromCIGAR(cls, cigarstring,position_b0, refname, strand):
read_parts = []
if strand == MINUS: # need to reverse the CIGAR
logger.debug("Reversing CIGAR for minus strand read fragment")
cigarstring = "".join(reversed(re.findall("\d+[MIDNSHP=X]", cigarstring)))
op_type_list = []
for op in HTSeq.parse_cigar(cigarstring, position_b0, refname, strand):
logger.debug(map(str,(op, op.query_from, op.query_to, op.ref_iv)))
if op.type == "M":
if "M" in op_type_list:
if len(op_type_list) >=2 and op_type_list[-1] == "D" and op_type_list[-2] == "M":
logger.debug(map(str,("extending (D):", op, op.query_from, op.query_to, op.ref_iv)))
read_parts[-1].extend(op.query_from, op.query_to,op.ref_iv.start,op.ref_iv.end,op.ref_iv.chrom,strand)
elif len(op_type_list) >=2 and op_type_list[-1] == "I" and op_type_list[-2] == "M":
logger.debug(map(str,("extending (I):", op, op.query_from, op.query_to, op.ref_iv)))
read_parts[-1].extend(op.query_from, op.query_to,op.ref_iv.start,op.ref_iv.end,op.ref_iv.chrom,strand)
else:
logger.debug("CIGAR WARNING: Number of matches > 1: {0}".format(cigarstring))
else:
logger.debug(map(str,("appending:", op, op.query_from, op.query_to, op.ref_iv)))
suppl_frag = cls(op.query_from, op.query_to,op.ref_iv.start,op.ref_iv.end,op.ref_iv.chrom,strand)
read_parts.append(suppl_frag)
op_type_list.append(op.type)
return read_parts
示例7: set_up_IO
def set_up_IO(fileIN,fileOUT,gff,downstream,upstream):
'''Function that will open all the file required for the alignment processing
'''
## Open alignment
alignIN = HTSeq.SAM_Reader(fileIN)
alignIN = HTSeq.bundle_multiple_alignments(alignIN)
## Open GFF file
annotation = HTSeq.GFF_Reader(gff,end_included = True)
## Open output file - write the header
countTable = open(fileOUT,'w')
coordinates = '\t'.join(i for i in map(str,range(-upstream,downstream)))
countTable.write('name\t{coord}\n'.format(coord = coordinates))
return alignIN, annotation, countTable
示例8: bam_parser_2
def bam_parser_2(bam_file, min_len, max_clip, min_id, mode):
bam_dict = {}
query_counter = 0
output_list = list()
if mode == 'paired':
#import itertools
#for aln in itertools.islice( HTSeq.pair_SAM_alignments(bam_file), 1000 ): # printing first N reads
for aln in HTSeq.pair_SAM_alignments(bam_file):
query_counter += 1
query_1, query_2 = aln
q1_aln = parser_aln_list(query_1, aln_number = query_counter, pair_pos = 1, min_len=min_len, max_clip=max_clip, min_id=min_id)
q2_aln = parser_aln_list(query_2, aln_number = query_counter, pair_pos = 2, min_len=min_len, max_clip=max_clip, min_id=min_id)
alns = [q1_aln, q2_aln]
if alns == [None, None]:
continue
else:
if None in alns:
alns.remove(None)
output_list.append(alns)
elif mode == 'single':
for aln in bam_file:
query_counter += 1
query_1 = aln
q1_aln = parser_aln_list(query_1, aln_number = query_counter, pair_pos = 1, min_len=min_len, max_clip=max_clip, min_id=min_id)
alns = [q1_aln]
if q1_aln != None:
output_list.append(alns)
df_columns = ['ALN','QUERY','REF','SEQ','LEN','ID','SCORE','CLIP_PCT']
output_list = [item for sublist in output_list for item in sublist]
return pd.DataFrame(output_list, columns=df_columns)
示例9: searchGeneName
def searchGeneName(self,annotationstring):
if annotationstring == '.':
genes = 'N/A'
else:
# Split the annotationstring by ',' which collapsed by bedtools groupby
annotationstrings = annotationstring.split(',')
collect = set()
for annotation in annotationstrings:
try:
attr = HTSeq.parse_GFF_attribute_string(annotation)
# Search for gene_name which is used by ensembl gtf annotation
try:
gene = attr['gene_name']
except KeyError:
# Search for gene, which might used in GFF annotation
try:
gene = attr['gene']
except KeyError:
# Search for gene_id
try:
gene = attr['gene_id']
except KeyError:
try:
gene = attr['transcript_id']
except KeyError:
gene = 'N/A'
except:
gene = self.searchGeneName1(annotation)
collect.add(gene)
# Collapse all genes togethor
if len(collect) > 1:
try:
collect.remove('N/A')
except KeyError:
pass
genes = ','.join(collect)
return genes
示例10: set
set_of_gene_names = set( [ f.name.split(":")[0] for f in rs ] )
if len( set_of_gene_names ) == 0:
counts[ '_empty' ] += 1
elif len( set_of_gene_names ) > 1:
counts[ '_ambiguous' ] +=1
else:
for f in rs:
counts[ f.name ] += 1
num_reads += 1
if num_reads % 100000 == 0:
sys.stderr.write( "%d reads processed.\n" % num_reads )
else: # paired-end
num_reads = 0
for af, ar in HTSeq.pair_SAM_alignments( HTSeq.SAM_Reader( sam_file ) ):
rs = set()
if af and ar and not af.aligned and not ar.aligned:
counts[ '_notaligned' ] += 1
continue
if af and ar and not af.aQual < minaqual and ar.aQual < minaqual:
counts[ '_lowaqual' ] += 1
continue
if af and af.aligned and af.aQual >= minaqual and af.iv.chrom in features.chrom_vectors.keys():
for cigop in af.cigar:
if cigop.type != "M":
continue
if reverse:
cigop.ref_iv.strand = reverse_strand( cigop.ref_iv.strand )
for iv, s in features[cigop.ref_iv].steps():
rs = rs.union( s )
示例11: htseq_count
def htseq_count(data):
""" adapted from Simon Anders htseq-count.py script
http://www-huber.embl.de/users/anders/HTSeq/doc/count.html
"""
sam_filename, gff_filename, out_file, stats_file = _get_files(data)
stranded = _get_stranded_flag(data["config"])
overlap_mode = "union"
feature_type = "exon"
id_attribute = "gene_id"
minaqual = 0
if file_exists(out_file):
return out_file
logger.info("Counting reads mapping to exons in %s using %s as the "
"annotation and strandedness as %s." % (os.path.basename(sam_filename),
os.path.basename(gff_filename), _get_strandedness(data["config"])))
features = HTSeq.GenomicArrayOfSets("auto", stranded != "no")
counts = {}
# Try to open samfile to fail early in case it is not there
open(sam_filename).close()
gff = HTSeq.GFF_Reader(gff_filename)
i = 0
try:
for f in gff:
if f.type == feature_type:
try:
feature_id = f.attr[id_attribute]
except KeyError:
sys.exit("Feature %s does not contain a '%s' attribute" %
(f.name, id_attribute))
if stranded != "no" and f.iv.strand == ".":
sys.exit("Feature %s at %s does not have strand "
"information but you are running htseq-count "
"in stranded mode. Use '--stranded=no'." %
(f.name, f.iv))
features[f.iv] += feature_id
counts[f.attr[id_attribute]] = 0
i += 1
if i % 100000 == 0:
sys.stderr.write("%d GFF lines processed.\n" % i)
except:
sys.stderr.write("Error occured in %s.\n"
% gff.get_line_number_string())
raise
sys.stderr.write("%d GFF lines processed.\n" % i)
if len(counts) == 0:
sys.stderr.write("Warning: No features of type '%s' found.\n"
% feature_type)
try:
align_reader = htseq_reader(sam_filename)
first_read = iter(align_reader).next()
pe_mode = first_read.paired_end
except:
sys.stderr.write("Error occured when reading first line of sam "
"file.\n")
raise
try:
if pe_mode:
read_seq_pe_file = align_reader
read_seq = HTSeq.pair_SAM_alignments(align_reader)
empty = 0
ambiguous = 0
notaligned = 0
lowqual = 0
nonunique = 0
i = 0
for r in read_seq:
i += 1
if not pe_mode:
if not r.aligned:
notaligned += 1
continue
try:
if r.optional_field("NH") > 1:
nonunique += 1
continue
except KeyError:
pass
if r.aQual < minaqual:
lowqual += 1
continue
if stranded != "reverse":
iv_seq = (co.ref_iv for co in r.cigar if co.type == "M"
and co.size > 0)
else:
iv_seq = (invert_strand(co.ref_iv) for co in r.cigar if
co.type == "M" and co.size > 0)
else:
if r[0] is not None and r[0].aligned:
if stranded != "reverse":
#.........這裏部分代碼省略.........
示例12: count_reads_in_features
def count_reads_in_features( sam_filename, gff_filename, stranded,
overlap_mode, feature_type, id_attribute, quiet, minaqual, samout ):
def write_to_samout( r, assignment ):
if samoutfile is None:
return
if not pe_mode:
r = (r,)
for read in r:
if read is not None:
samoutfile.write( read.original_sam_line.rstrip() +
"\tXF:Z:" + assignment + "\n" )
if quiet:
warnings.filterwarnings( action="ignore", module="HTSeq" )
if samout != "":
samoutfile = open( samout, "w" )
else:
samoutfile = None
features = HTSeq.GenomicArrayOfSets( "auto", stranded != "no" )
counts = {}
gene_length = {}
# Try to open samfile to fail early in case it is not there
if sam_filename != "-":
open( sam_filename ).close()
counts, colgenes = parse_gff(gff_filename,features,feature_type,id_attribute,stranded,quiet,counts)
if len( counts ) == 0 and not quiet:
sys.stderr.write( "Warning: No features of type '%s' found.\n" % feature_type )
################# read sam file #######################
try:
if sam_filename != "-":
read_seq = HTSeq.SAM_Reader( sam_filename )
first_read = iter(read_seq).next()
else:
read_seq = iter( HTSeq.SAM_Reader( sys.stdin ) )
first_read = read_seq.next()
read_seq = itertools.chain( [ first_read ], read_seq )
pe_mode = first_read.paired_end
except:
sys.stderr.write( "Error occured when reading first line of sam file.\n" )
raise
################ read sam file #######################
try:
if pe_mode:
read_seq_pe_file = read_seq
read_seq = HTSeq.pair_SAM_alignments( read_seq )
empty = 0
ambiguous = 0
notaligned = 0
lowqual = 0
nonunique = 0
i = 0
for r in read_seq:
i += 1
if not pe_mode:
if not r.aligned:
notaligned += 1
write_to_samout( r, "not_aligned" )
continue
try:
if r.optional_field( "NH" ) > 1:
write_to_samout( r, "alignment_not_unique" )
nonunique += 1
continue
except KeyError:
pass
if r.aQual < minaqual:
lowqual += 1
write_to_samout( r, "too_low_aQual" )
continue
if stranded != "reverse":
iv_seq = ( co.ref_iv for co in r.cigar if co.type == "M" and co.size > 0 )
else:
iv_seq = ( invert_strand( co.ref_iv ) for co in r.cigar if co.type == "M" and co.size > 0 )
else:
if r[0] is not None and r[0].aligned:
if stranded != "reverse":
iv_seq = ( co.ref_iv for co in r[0].cigar if co.type == "M" and co.size > 0 )
else:
iv_seq = ( invert_strand( co.ref_iv ) for co in r[0].cigar if co.type == "M" and co.size > 0 )
else:
iv_seq = tuple()
if r[1] is not None and r[1].aligned:
if stranded != "reverse":
iv_seq = itertools.chain( iv_seq,
( invert_strand( co.ref_iv ) for co in r[1].cigar if co.type == "M" and co.size > 0 ) )
else:
iv_seq = itertools.chain( iv_seq,
( co.ref_iv for co in r[1].cigar if co.type == "M" and co.size > 0 ) )
else:
if ( r[0] is None ) or not ( r[0].aligned ):
write_to_samout( r, "not_aligned" )
notaligned += 1
continue
try:
#.........這裏部分代碼省略.........
示例13: count_reads
def count_reads(features, counts, pe_mode, read_seq, order, stranded,
overlap_mode, quiet, minaqual, write_to_samout ):
if pe_mode:
if order == "name":
read_seq = HTSeq.pair_SAM_alignments( read_seq )
elif order == "pos":
read_seq = HTSeq.pair_SAM_alignments_with_buffer( read_seq )
else:
raise ValueError, "Illegal order specified."
empty = 0
ambiguous = 0
notaligned = 0
lowqual = 0
nonunique = 0
i = 0
for r in read_seq:
if i > 0 and i % 100000 == 0 and not quiet:
sys.stderr.write( "%d SAM alignment record%s processed.\n" % ( i, "s" if not pe_mode else " pairs" ) )
i += 1
if not pe_mode:
if not r.aligned:
notaligned += 1
write_to_samout( r, "__not_aligned" )
continue
try:
if r.optional_field( "NH" ) > 1:
nonunique += 1
write_to_samout( r, "__alignment_not_unique" )
continue
except KeyError:
pass
if r.aQual < minaqual:
lowqual += 1
write_to_samout( r, "__too_low_aQual" )
continue
if stranded != "reverse":
iv_seq = ( co.ref_iv for co in r.cigar if co.type == "M" and co.size > 0 )
else:
iv_seq = ( invert_strand( co.ref_iv ) for co in r.cigar if co.type == "M" and co.size > 0 )
else:
if r[0] is not None and r[0].aligned:
if stranded != "reverse":
iv_seq = ( co.ref_iv for co in r[0].cigar if co.type == "M" and co.size > 0 )
else:
iv_seq = ( invert_strand( co.ref_iv ) for co in r[0].cigar if co.type == "M" and co.size > 0 )
else:
iv_seq = tuple()
if r[1] is not None and r[1].aligned:
if stranded != "reverse":
iv_seq = itertools.chain(iv_seq,
( invert_strand( co.ref_iv ) for co in r[1].cigar if co.type == "M" and co.size > 0 ) )
else:
iv_seq = itertools.chain( iv_seq,
( co.ref_iv for co in r[1].cigar if co.type == "M" and co.size > 0 ) )
else:
if ( r[0] is None ) or not ( r[0].aligned ):
write_to_samout( r, "__not_aligned" )
notaligned += 1
continue
try:
if ( r[0] is not None and r[0].optional_field( "NH" ) > 1 ) or \
( r[1] is not None and r[1].optional_field( "NH" ) > 1 ):
nonunique += 1
write_to_samout( r, "__alignment_not_unique" )
continue
except KeyError:
pass
if ( r[0] and r[0].aQual < minaqual ) or ( r[1] and r[1].aQual < minaqual ):
lowqual += 1
write_to_samout( r, "__too_low_aQual" )
continue
try:
if overlap_mode == "union":
fs = set()
for iv in iv_seq:
if iv.chrom not in features.chrom_vectors:
raise UnknownChrom
for iv2, fs2 in features[ iv ].steps():
fs = fs.union( fs2 )
elif overlap_mode == "intersection-strict" or overlap_mode == "intersection-nonempty":
fs = None
for iv in iv_seq:
if iv.chrom not in features.chrom_vectors:
raise UnknownChrom
for iv2, fs2 in features[ iv ].steps():
if len(fs2) > 0 or overlap_mode == "intersection-strict":
if fs is None:
fs = fs2.copy()
else:
fs = fs.intersection( fs2 )
else:
sys.exit( "Illegal overlap mode." )
if fs is None or len( fs ) == 0:
write_to_samout( r, "__no_feature" )
empty += 1
elif len( fs ) > 1:
write_to_samout( r, "__ambiguous[" + '+'.join( fs ) + "]" )
#.........這裏部分代碼省略.........
示例14: count_reads_onto_prebuilt_features
def count_reads_onto_prebuilt_features(
sam_filename, features, feature_ids, stranded, overlap_mode, quiet, minaqual, samout, umis=False
):
def write_to_samout(r, assignment):
if samoutfile is None:
return
if not pe_mode:
r = (r,)
for read in r:
if read is not None:
samoutfile.write(read.original_sam_line.rstrip() + "\tXF:Z:" + assignment + "\n")
if quiet:
warnings.filterwarnings(action="ignore", module="HTSeq")
if samout != "":
samoutfile = open(samout, "w")
else:
samoutfile = None
if umis:
umi_re = re.compile(":UMI:(\w+):")
umi_counts = {}
def count_umis(fs, read_name):
umi_seq = umi_re.search(read_name).group(1)
umi_counts[fs][umi_seq] += 1
for feature_id in feature_ids:
umi_counts[feature_id] = Counter()
else:
def count_umis(x, y):
return None
# Try to open samfile to fail early in case it is not there
if sam_filename != "-":
open(sam_filename).close()
counts = {}
for feature_id in feature_ids:
counts[feature_id] = 0
try:
if sam_filename != "-":
read_seq_file = HTSeq.SAM_Reader(sam_filename)
read_seq = read_seq_file
first_read = iter(read_seq).next()
else:
read_seq_file = HTSeq.SAM_Reader(sys.stdin)
read_seq_iter = iter(read_seq_file)
first_read = read_seq_iter.next()
read_seq = itertools.chain([first_read], read_seq_iter)
pe_mode = first_read.paired_end
except StopIteration:
raise EmptySamError(sam_filename)
try:
if pe_mode:
read_seq = HTSeq.pair_SAM_alignments(read_seq)
empty = 0
ambiguous = 0
notaligned = 0
lowqual = 0
nonunique = 0
i = 0
for r in read_seq:
i += 1
if not pe_mode:
if not r.aligned:
notaligned += 1
write_to_samout(r, "not_aligned")
continue
try:
if r.optional_field("NH") > 1:
write_to_samout(r, "alignment_not_unique")
nonunique += 1
continue
except KeyError:
pass
if r.aQual < minaqual:
lowqual += 1
write_to_samout(r, "too_low_aQual")
continue
if stranded != "reverse":
iv_seq = (co.ref_iv for co in r.cigar if co.type == "M" and co.size > 0)
else:
iv_seq = (invert_strand(co.ref_iv) for co in r.cigar if co.type == "M" and co.size > 0)
else:
if r[0] is not None and r[0].aligned:
if stranded != "reverse":
iv_seq = (co.ref_iv for co in r[0].cigar if co.type == "M" and co.size > 0)
else:
iv_seq = (invert_strand(co.ref_iv) for co in r[0].cigar if co.type == "M" and co.size > 0)
else:
iv_seq = tuple()
if r[1] is not None and r[1].aligned:
if stranded != "reverse":
iv_seq = itertools.chain(
iv_seq, (invert_strand(co.ref_iv) for co in r[1].cigar if co.type == "M" and co.size > 0)
#.........這裏部分代碼省略.........
示例15: iter
#!/usr/bin/python
import HTSeq as h
from collections import defaultdict
#reader_masked = h.SAM_Reader("/Users/dashazhernakova/Documents/UMCG/data/geuvadis/mappedData/ERR188022/aligned_masked.sam")
#reader = h.SAM_Reader("/Users/dashazhernakova/Documents/UMCG/data/geuvadis/mappedData/ERR188022/aligned.sam")
reader = h.SAM_Reader("/Users/dashazhernakova/Documents/UMCG/data/geuvadis/mappedData/ERR188022_masked/Aligned.out.filtered.new.1017680.sam")
reader_masked = h.SAM_Reader("/Users/dashazhernakova/Documents/UMCG/data/geuvadis/mappedData/ERR188022/Aligned.out.filtered.new1mb.sam")
it_p = iter(h.pair_SAM_alignments(reader))
it_p_m = iter(h.pair_SAM_alignments(reader_masked))
same_aligned = 0
one_same_pos = 0
both_same_pos = 0
masked_more_pos = 0
simple_more_pos = 0
#cur_read = {}
#cur_m_read = {}
not_in_simple = 0
not_in_masked = 0
n_m = defaultdict(list)
i = 0
for r1, r2 in h.pair_SAM_alignments(reader):
n_m[r1.read.name].append((r1,r2))
i += 1
if i%10000 == 0:
print i, " lines"
#for k,v in n_m.items():