當前位置: 首頁>>代碼示例>>Python>>正文


Python HTSeq類代碼示例

本文整理匯總了Python中HTSeq的典型用法代碼示例。如果您正苦於以下問題:Python HTSeq類的具體用法?Python HTSeq怎麽用?Python HTSeq使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


在下文中一共展示了HTSeq類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: count_reads_paired

def count_reads_paired(read_seq, counter, order, quiet, minaqual):

    if order == "name":
        read_seq = HTSeq.pair_SAM_alignments(read_seq)
    elif order == "pos":
        read_seq = HTSeq.pair_SAM_alignments_with_buffer(read_seq)
    else:
        raise ValueError("Illegal order specified.")

    i = 0
    for r in read_seq:
        if i > 0 and i % 100000 == 0 and not quiet:
            msg = "%d SAM alignment record pairs processed.\n" % (i)
            sys.stderr.write(msg)

        i += 1
        if r[0] is not None and r[0].aligned:
            forward_iv_seq = (co.ref_iv for co in r[0].cigar
                              if co.type == "M" and co.size > 0)
            reverse_iv_seq = (invert_strand(co.ref_iv) for co in r[0].cigar
                              if co.type == "M" and co.size > 0)
        else:
            forward_iv_seq = tuple()
            reverse_iv_seq = tuple()
        if r[1] is not None and r[1].aligned:
            rest = (invert_strand(co.ref_iv) for co in r[1].cigar
                    if co.type == "M" and co.size > 0)
            forward_iv_seq = itertools.chain(forward_iv_seq, rest)
            rest = (co.ref_iv for co in r[1].cigar
                    if co.type == "M" and co.size > 0)
            reverse_iv_seq = itertools.chain(reverse_iv_seq, rest)
        else:
            if (r[0] is None) or not (r[0].aligned):
                counter.not_aligned(r)
                continue
        try:
            if (r[0] is not None and r[0].optional_field("NH") > 1) or \
                    (r[1] is not None and r[1].optional_field("NH") > 1):
                counter.non_unique(r)
                continue
        except KeyError:
            pass
        if (r[0] and r[0].aQual < minaqual) or \
                (r[1] and r[1].aQual < minaqual):
            counter.too_low_quality(r)
            continue

        counter.forward_count(forward_iv_seq, r)
        counter.reverse_count(reverse_iv_seq, r)

    if not quiet:
        sys.stderr.write("%d SAM alignment pairs processed.\n" % (i))
開發者ID:Christian-B,項目名稱:grid_scripts,代碼行數:52,代碼來源:batch_htseq_count.py

示例2: count_reads_paired

def count_reads_paired(read_seq, counter, order, stranded, 
      quiet, minaqual, write_to_samout ):
      
    if order == "name":
        read_seq = HTSeq.pair_SAM_alignments( read_seq )
    elif order == "pos":
        read_seq = HTSeq.pair_SAM_alignments_with_buffer( read_seq )
    else:
        raise ValueError, "Illegal order specified."

    i = 0   
    for r in read_seq:
        if i > 0 and i % 100000 == 0 and not quiet:
            sys.stderr.write( "%d SAM alignment record%s processed.\n" % ( i, "s" if not pe_mode else " pairs" ) )

        i += 1
        if r[0] is not None and r[0].aligned:
            if stranded != "reverse":
                iv_seq = ( co.ref_iv for co in r[0].cigar if co.type == "M" and co.size > 0 )
            else:
                iv_seq = ( invert_strand( co.ref_iv ) for co in r[0].cigar if co.type == "M" and co.size > 0 )
        else:
            iv_seq = tuple()
        if r[1] is not None and r[1].aligned:            
            if stranded != "reverse":
                iv_seq = itertools.chain(iv_seq, 
                    ( invert_strand( co.ref_iv ) for co in r[1].cigar if co.type == "M" and co.size > 0 ) )
            else:
                iv_seq = itertools.chain( iv_seq, 
                    ( co.ref_iv for co in r[1].cigar if co.type == "M" and co.size > 0 ) )
        else:
            if ( r[0] is None ) or not ( r[0].aligned ):
                write_to_samout( r, "__not_aligned" )
                counter.notaligned += 1
                continue         
        try:
            if ( r[0] is not None and r[0].optional_field( "NH" ) > 1 ) or \
                     ( r[1] is not None and r[1].optional_field( "NH" ) > 1 ):
                counter.nonunique += 1
                write_to_samout( r, "__alignment_not_unique" )
                continue
        except KeyError:
            pass
        if ( r[0] and r[0].aQual < minaqual ) or ( r[1] and r[1].aQual < minaqual ):
            lowqual += 1
            write_to_samout( r, "__too_low_aQual" )
            continue         
        
        counter.count(iv_seq, r)
         
    if not quiet:
        sys.stderr.write( "%d SAM %s processed.\n" % ( i, "alignments " if not pe_mode else "alignment pairs" ) )
開發者ID:Christian-B,項目名稱:grid_scripts,代碼行數:52,代碼來源:batch_htseq_count2.py

示例3: parse_fastx_sam_parallel

def parse_fastx_sam_parallel(fastx_infile, sam_infile):
    """ Parse fastx and resulting sam file in parallel - generator yielding (name, seq, alignment_list) tuples.

    The sam file may contain multiple alignments per read.  Program checks that the readnames match.
    """
    fastx_generator = basic_seq_utilities.name_seq_generator_from_fasta_fastq(fastx_infile)
    sam_generator = iter(HTSeq.bundle_multiple_alignments(HTSeq.SAM_Reader(sam_infile)))
    if_finished_fastx, if_finished_sam = False, False
    while True:
        try:                    name, seq = fastx_generator.next()
        except StopIteration:   if_finished_fastx = True
        try:                    alns = sam_generator.next()
        except StopIteration:   if_finished_sam = True
        # if both finished, good, we're doine
        if if_finished_fastx and if_finished_sam:
            raise StopIteration
        # if one file was finished but the other wasn't, error!
        elif if_finished_fastx or if_finished_sam:
            raise DeepseqError("Parsing seq/aln files in parallel - inconsistent finished states! "
                              +"(If finished: %s %s, %s %s)"%(fastx_infile, if_finished_fastx, sam_infile, if_finished_sam))
        # if all the files still contained data, yield it
        else:
            name = name.split()[0]
            name2 = alns[0].read.name.split()[0]
            if not name2 == name:
                raise DeepseqError("Non-matching readnames between files! %s in %s, %s in %s"%(fastx_infile, name, 
                                                                                               sam_infile, name2))
            yield (name, seq, alns)
開發者ID:Jonikas-Lab,項目名稱:basic-bioinf-utilities,代碼行數:28,代碼來源:deepseq_utilities.py

示例4: HTseq_count

def HTseq_count(bam_file, gtf_file, out_dir, identifier, parallel = True ):
	gtf_file = HTSeq.GFF_Reader(gtf_file)
	features = HTSeq.GenomicArrayOfSets( "auto", stranded=True )

	print "extracting features from gtf file"
	for feature in gtf_file:
		# if feature.type == "exon":
		features[feature.iv] += feature.attr[identifier]

	counts = collections.Counter( )

	almnt_file = HTSeq.SAM_Reader(bam_file)
	counts = collections.Counter( )
	for bundle in HTSeq.pair_SAM_alignments( almnt_file, bundle=True ):
		if len(bundle) != 1:
			continue  # Skip multiple alignments
		first_almnt, second_almnt = bundle[0]  # extract pair
		if not first_almnt.aligned and second_almnt.aligned:
			count[ "_unmapped" ] += 1
			continue
		gene_ids = set()
		for iv, val in features[ left_almnt.iv ].steps():
			gene_ids |= val
		for iv, val in features[ right_almnt.iv ].steps():
			gene_ids |= val
		if len(gene_ids) == 1:
			gene_id = list(gene_ids)[0]
			counts[ gene_id ] += 1
		elif len(gene_ids) == 0:
			counts[ "_no_feature" ] += 1
		else:
			counts[ "_ambiguous" ] += 1

	for gene_id in counts:
		print gene_id, counts[ gene_id ]
開發者ID:rbloom5,項目名稱:RNAseq_analysis,代碼行數:35,代碼來源:long_RNA_pipeline.py

示例5: bam_count

def bam_count(args):
    bam = HTSeq.SAM_Reader(args.fi)
    #exons = htseq_read_gtf(args.fg)
    cnts = collections.Counter()
    for bundle in HTSeq.pair_SAM_alignments_with_buffer(bam):
        if len(bundle) != 1:
            continue
        aln1, aln2 = bundle[0]
        if not aln1.aligned and aln2.aligned:
            cnts["_unmapped"] += 1
            continue
        gids = set()
        for iv, val in exons[aln1.iv].steps():
            gids |= val
        for iv, val in exons[aln2.iv].steps():
            gids |= val
        if len(gids) == 1:
            gid = list(gids)[0]
            cnts[gid] += 1
        elif len(gids) == 0:
            cnts["_no_feature"] += 1
        else:
            cnts["_ambiguous"] += 1
    for gid in cnts:
        print("%s\t%d" % (gid, cnts[gid]))
開發者ID:orionzhou,項目名稱:robin,代碼行數:25,代碼來源:bamcount.py

示例6: listFromCIGAR

    def listFromCIGAR(cls, cigarstring,position_b0, refname, strand):
        read_parts = []
        if strand == MINUS: # need to reverse the CIGAR
            logger.debug("Reversing CIGAR for minus strand read fragment")
            cigarstring = "".join(reversed(re.findall("\d+[MIDNSHP=X]", cigarstring)))

        op_type_list = []
        for op in HTSeq.parse_cigar(cigarstring, position_b0, refname, strand):
            logger.debug(map(str,(op, op.query_from, op.query_to, op.ref_iv)))
            if op.type == "M":
                if "M" in op_type_list:
                    if len(op_type_list) >=2 and op_type_list[-1] == "D" and op_type_list[-2] == "M":
                        logger.debug(map(str,("extending (D):", op, op.query_from, op.query_to, op.ref_iv)))
                        read_parts[-1].extend(op.query_from, op.query_to,op.ref_iv.start,op.ref_iv.end,op.ref_iv.chrom,strand)
                    elif len(op_type_list) >=2 and op_type_list[-1] == "I" and op_type_list[-2] == "M":
                        logger.debug(map(str,("extending (I):", op, op.query_from, op.query_to, op.ref_iv)))
                        read_parts[-1].extend(op.query_from, op.query_to,op.ref_iv.start,op.ref_iv.end,op.ref_iv.chrom,strand)
                    else:
                        logger.debug("CIGAR WARNING: Number of matches > 1: {0}".format(cigarstring))
                else:
                    logger.debug(map(str,("appending:", op, op.query_from, op.query_to, op.ref_iv)))
                    suppl_frag = cls(op.query_from, op.query_to,op.ref_iv.start,op.ref_iv.end,op.ref_iv.chrom,strand)
                    read_parts.append(suppl_frag)
            op_type_list.append(op.type)
        return read_parts
開發者ID:granek,項目名稱:aimhii,代碼行數:25,代碼來源:extract_chimeras.py

示例7: set_up_IO

def set_up_IO(fileIN,fileOUT,gff,downstream,upstream):
    '''Function that will open all the file required for the alignment processing
    '''
    ## Open alignment
    alignIN = HTSeq.SAM_Reader(fileIN)
    alignIN = HTSeq.bundle_multiple_alignments(alignIN)

    ## Open GFF file
    annotation = HTSeq.GFF_Reader(gff,end_included = True)

    ## Open output file - write the header
    countTable = open(fileOUT,'w')
    coordinates = '\t'.join(i for i in map(str,range(-upstream,downstream)))
    countTable.write('name\t{coord}\n'.format(coord = coordinates))
    return alignIN, annotation, countTable
開發者ID:Bioconductor-mirror,項目名稱:DChIPRep,代碼行數:15,代碼來源:DChIPRep.py

示例8: bam_parser_2

def bam_parser_2(bam_file, min_len, max_clip, min_id, mode):
    bam_dict = {}

    query_counter = 0

    output_list = list()

    if mode == 'paired':
        #import itertools
        #for aln in itertools.islice( HTSeq.pair_SAM_alignments(bam_file), 1000 ):  # printing first N reads
        for aln in HTSeq.pair_SAM_alignments(bam_file):
            query_counter += 1

            query_1, query_2 = aln

            q1_aln = parser_aln_list(query_1, aln_number = query_counter, pair_pos = 1, min_len=min_len, max_clip=max_clip, min_id=min_id)
            q2_aln = parser_aln_list(query_2, aln_number = query_counter, pair_pos = 2, min_len=min_len, max_clip=max_clip, min_id=min_id)

            alns = [q1_aln, q2_aln]

            if alns == [None, None]:
                continue
            else:
                if None in alns:
                    alns.remove(None)
                output_list.append(alns)

    elif mode == 'single':
        for aln in bam_file:

            query_counter += 1

            query_1 = aln

            q1_aln = parser_aln_list(query_1, aln_number = query_counter, pair_pos = 1, min_len=min_len, max_clip=max_clip, min_id=min_id)

            alns = [q1_aln]

            if q1_aln != None:
                output_list.append(alns)

    df_columns = ['ALN','QUERY','REF','SEQ','LEN','ID','SCORE','CLIP_PCT']
    output_list = [item for sublist in output_list for item in sublist]

    return pd.DataFrame(output_list, columns=df_columns)
開發者ID:ppflrs,項目名稱:scripts,代碼行數:45,代碼來源:BP-1.py

示例9: searchGeneName

 def searchGeneName(self,annotationstring):
     if annotationstring == '.':
         genes = 'N/A'
     else:
         # Split the annotationstring by ',' which collapsed by bedtools groupby
         annotationstrings = annotationstring.split(',')
         collect = set()
         for annotation in annotationstrings:
             try:
                 attr = HTSeq.parse_GFF_attribute_string(annotation)
                 # Search for gene_name which is used by ensembl gtf annotation
                 try:
                     gene = attr['gene_name']
                 except KeyError:
                     # Search for gene, which might used in GFF annotation
                     try:
                         gene = attr['gene']
                     except KeyError:
                         # Search for gene_id
                         try:
                             gene = attr['gene_id']
                         except KeyError:
                             try:
                                 gene = attr['transcript_id']
                             except KeyError:
                                 gene = 'N/A'
             except:
                 gene = self.searchGeneName1(annotation)
             collect.add(gene)
         # Collapse all genes togethor
         if len(collect) > 1:
             try:
                 collect.remove('N/A')
             except KeyError:
                 pass
         genes = ','.join(collect)
         
     return genes
開發者ID:Voineagulab,項目名稱:DCC,代碼行數:38,代碼來源:circAnnotate.py

示例10: set

      set_of_gene_names = set( [ f.name.split(":")[0] for f in rs ] )
      if len( set_of_gene_names ) == 0:
         counts[ '_empty' ] += 1
      elif len( set_of_gene_names ) > 1:
         counts[ '_ambiguous' ] +=1
      else:
         for f in rs:
            counts[ f.name ] += 1
      num_reads += 1
      if num_reads % 100000 == 0:
         sys.stderr.write( "%d reads processed.\n" % num_reads )

else: # paired-end

   num_reads = 0
   for af, ar in HTSeq.pair_SAM_alignments( HTSeq.SAM_Reader( sam_file ) ):
      rs = set()
      if af and ar and not af.aligned and not ar.aligned:
         counts[ '_notaligned' ] += 1
         continue
      if af and ar and not af.aQual < minaqual and ar.aQual < minaqual:
         counts[ '_lowaqual' ] += 1
         continue
      if af and af.aligned and af.aQual >= minaqual and af.iv.chrom in features.chrom_vectors.keys():
         for cigop in af.cigar:
            if cigop.type != "M":
               continue
            if reverse:
               cigop.ref_iv.strand = reverse_strand( cigop.ref_iv.strand )
            for iv, s in features[cigop.ref_iv].steps():
               rs = rs.union( s )
開發者ID:Al3n70rn,項目名稱:rna-seq-diff-exprn,代碼行數:31,代碼來源:dexseq_count.py

示例11: htseq_count

def htseq_count(data):
    """ adapted from Simon Anders htseq-count.py script
    http://www-huber.embl.de/users/anders/HTSeq/doc/count.html
    """

    sam_filename, gff_filename, out_file, stats_file = _get_files(data)
    stranded = _get_stranded_flag(data["config"])
    overlap_mode = "union"
    feature_type = "exon"
    id_attribute = "gene_id"
    minaqual = 0


    if file_exists(out_file):
        return out_file

    logger.info("Counting reads mapping to exons in %s using %s as the "
                    "annotation and strandedness as %s." % (os.path.basename(sam_filename),
                    os.path.basename(gff_filename), _get_strandedness(data["config"])))

    features = HTSeq.GenomicArrayOfSets("auto", stranded != "no")
    counts = {}

    # Try to open samfile to fail early in case it is not there
    open(sam_filename).close()

    gff = HTSeq.GFF_Reader(gff_filename)
    i = 0
    try:
        for f in gff:
            if f.type == feature_type:
                try:
                    feature_id = f.attr[id_attribute]
                except KeyError:
                    sys.exit("Feature %s does not contain a '%s' attribute" %
                             (f.name, id_attribute))
                if stranded != "no" and f.iv.strand == ".":
                    sys.exit("Feature %s at %s does not have strand "
                             "information but you are running htseq-count "
                             "in stranded mode. Use '--stranded=no'." %
                             (f.name, f.iv))
                features[f.iv] += feature_id
                counts[f.attr[id_attribute]] = 0
            i += 1
            if i % 100000 == 0:
                sys.stderr.write("%d GFF lines processed.\n" % i)
    except:
        sys.stderr.write("Error occured in %s.\n"
                         % gff.get_line_number_string())
        raise

    sys.stderr.write("%d GFF lines processed.\n" % i)

    if len(counts) == 0:
        sys.stderr.write("Warning: No features of type '%s' found.\n"
                         % feature_type)

    try:
        align_reader = htseq_reader(sam_filename)
        first_read = iter(align_reader).next()
        pe_mode = first_read.paired_end
    except:
        sys.stderr.write("Error occured when reading first line of sam "
                         "file.\n")
        raise

    try:
        if pe_mode:
            read_seq_pe_file = align_reader
            read_seq = HTSeq.pair_SAM_alignments(align_reader)
        empty = 0
        ambiguous = 0
        notaligned = 0
        lowqual = 0
        nonunique = 0
        i = 0
        for r in read_seq:
            i += 1
            if not pe_mode:
                if not r.aligned:
                    notaligned += 1
                    continue
                try:
                    if r.optional_field("NH") > 1:
                        nonunique += 1
                        continue
                except KeyError:
                    pass
                if r.aQual < minaqual:
                    lowqual += 1
                    continue
                if stranded != "reverse":
                    iv_seq = (co.ref_iv for co in r.cigar if co.type == "M"
                              and co.size > 0)
                else:
                    iv_seq = (invert_strand(co.ref_iv) for co in r.cigar if
                              co.type == "M" and co.size > 0)
            else:
                if r[0] is not None and r[0].aligned:
                    if stranded != "reverse":
#.........這裏部分代碼省略.........
開發者ID:Lobage,項目名稱:bcbio-nextgen,代碼行數:101,代碼來源:count.py

示例12: count_reads_in_features

def count_reads_in_features( sam_filename, gff_filename, stranded, 
      overlap_mode, feature_type, id_attribute, quiet, minaqual, samout ):
      
   def write_to_samout( r, assignment ):
      if samoutfile is None:
         return
      if not pe_mode:
         r = (r,)
      for read in r:
         if read is not None:
            samoutfile.write( read.original_sam_line.rstrip() + 
               "\tXF:Z:" + assignment + "\n" )
      
   if quiet:
      warnings.filterwarnings( action="ignore", module="HTSeq" ) 
      
   if samout != "":
      samoutfile = open( samout, "w" )
   else:
      samoutfile = None
      
   features = HTSeq.GenomicArrayOfSets( "auto", stranded != "no" )     
   counts = {}
   gene_length = {}

   # Try to open samfile to fail early in case it is not there
   if sam_filename != "-":
      open( sam_filename ).close()
 
   counts, colgenes = parse_gff(gff_filename,features,feature_type,id_attribute,stranded,quiet,counts)
      
   if len( counts ) == 0 and not quiet:
      sys.stderr.write( "Warning: No features of type '%s' found.\n" % feature_type )
   ################# read sam file #######################
   try:
      if sam_filename != "-":
          read_seq = HTSeq.SAM_Reader( sam_filename )
          first_read = iter(read_seq).next()
      else:
          read_seq = iter( HTSeq.SAM_Reader( sys.stdin ) )
          first_read = read_seq.next()
          read_seq = itertools.chain( [ first_read ], read_seq )
      pe_mode = first_read.paired_end
   except:
      sys.stderr.write( "Error occured when reading first line of sam file.\n" )
      raise
   ################ read sam file #######################
   try:
      if pe_mode:
         read_seq_pe_file = read_seq
         read_seq = HTSeq.pair_SAM_alignments( read_seq )
      empty = 0
      ambiguous = 0
      notaligned = 0
      lowqual = 0
      nonunique = 0
      i = 0   
      for r in read_seq:
         i += 1
         if not pe_mode:
            if not r.aligned:
               notaligned += 1
               write_to_samout( r, "not_aligned" )
               continue
            try:
               if r.optional_field( "NH" ) > 1:
                  write_to_samout( r, "alignment_not_unique" )
                  nonunique += 1
                  continue
            except KeyError:
               pass
            if r.aQual < minaqual:
               lowqual += 1
               write_to_samout( r, "too_low_aQual" )
               continue
            if stranded != "reverse":
               iv_seq = ( co.ref_iv for co in r.cigar if co.type == "M" and co.size > 0 )
            else:
               iv_seq = ( invert_strand( co.ref_iv ) for co in r.cigar if co.type == "M" and co.size > 0 )            
         else:
            if r[0] is not None and r[0].aligned:
               if stranded != "reverse":
                  iv_seq = ( co.ref_iv for co in r[0].cigar if co.type == "M" and co.size > 0 )
               else:
                  iv_seq = ( invert_strand( co.ref_iv ) for co in r[0].cigar if co.type == "M" and co.size > 0 )
            else:
               iv_seq = tuple()
            if r[1] is not None and r[1].aligned:            
               if stranded != "reverse":
                  iv_seq = itertools.chain( iv_seq, 
                     ( invert_strand( co.ref_iv ) for co in r[1].cigar if co.type == "M" and co.size > 0 ) )
               else:
                  iv_seq = itertools.chain( iv_seq, 
                     ( co.ref_iv for co in r[1].cigar if co.type == "M" and co.size > 0 ) )
            else:
               if ( r[0] is None ) or not ( r[0].aligned ):
                  write_to_samout( r, "not_aligned" )
                  notaligned += 1
                  continue         
            try:
#.........這裏部分代碼省略.........
開發者ID:gturco,項目名稱:htseq_tools,代碼行數:101,代碼來源:count.py

示例13: count_reads

def count_reads(features, counts, pe_mode, read_seq, order, stranded, 
      overlap_mode, quiet, minaqual, write_to_samout ):
      
    if pe_mode:
        if order == "name":
            read_seq = HTSeq.pair_SAM_alignments( read_seq )
        elif order == "pos":
            read_seq = HTSeq.pair_SAM_alignments_with_buffer( read_seq )
        else:
            raise ValueError, "Illegal order specified."
    empty = 0
    ambiguous = 0
    notaligned = 0
    lowqual = 0
    nonunique = 0
    i = 0   
    for r in read_seq:
        if i > 0 and i % 100000 == 0 and not quiet:
            sys.stderr.write( "%d SAM alignment record%s processed.\n" % ( i, "s" if not pe_mode else " pairs" ) )

        i += 1
        if not pe_mode:
            if not r.aligned:
                notaligned += 1
                write_to_samout( r, "__not_aligned" )
                continue
            try:
                if r.optional_field( "NH" ) > 1:
                    nonunique += 1
                    write_to_samout( r, "__alignment_not_unique" )
                    continue
            except KeyError:
                pass
            if r.aQual < minaqual:
                lowqual += 1
                write_to_samout( r, "__too_low_aQual" )
                continue
            if stranded != "reverse":
                iv_seq = ( co.ref_iv for co in r.cigar if co.type == "M" and co.size > 0 )
            else:
                iv_seq = ( invert_strand( co.ref_iv ) for co in r.cigar if co.type == "M" and co.size > 0 )            
        else:
            if r[0] is not None and r[0].aligned:
                if stranded != "reverse":
                    iv_seq = ( co.ref_iv for co in r[0].cigar if co.type == "M" and co.size > 0 )
                else:
                    iv_seq = ( invert_strand( co.ref_iv ) for co in r[0].cigar if co.type == "M" and co.size > 0 )
            else:
                iv_seq = tuple()
            if r[1] is not None and r[1].aligned:            
                if stranded != "reverse":
                    iv_seq = itertools.chain(iv_seq, 
                        ( invert_strand( co.ref_iv ) for co in r[1].cigar if co.type == "M" and co.size > 0 ) )
                else:
                    iv_seq = itertools.chain( iv_seq, 
                        ( co.ref_iv for co in r[1].cigar if co.type == "M" and co.size > 0 ) )
            else:
                if ( r[0] is None ) or not ( r[0].aligned ):
                    write_to_samout( r, "__not_aligned" )
                    notaligned += 1
                    continue         
            try:
                if ( r[0] is not None and r[0].optional_field( "NH" ) > 1 ) or \
                         ( r[1] is not None and r[1].optional_field( "NH" ) > 1 ):
                    nonunique += 1
                    write_to_samout( r, "__alignment_not_unique" )
                    continue
            except KeyError:
                pass
            if ( r[0] and r[0].aQual < minaqual ) or ( r[1] and r[1].aQual < minaqual ):
                lowqual += 1
                write_to_samout( r, "__too_low_aQual" )
                continue         
         
        try:
            if overlap_mode == "union":
                fs = set()
                for iv in iv_seq:
                    if iv.chrom not in features.chrom_vectors:
                        raise UnknownChrom
                    for iv2, fs2 in features[ iv ].steps():
                        fs = fs.union( fs2 )
            elif overlap_mode == "intersection-strict" or overlap_mode == "intersection-nonempty":
                fs = None
                for iv in iv_seq:
                    if iv.chrom not in features.chrom_vectors:
                        raise UnknownChrom
                    for iv2, fs2 in features[ iv ].steps():
                        if len(fs2) > 0 or overlap_mode == "intersection-strict":
                            if fs is None:
                                fs = fs2.copy()
                            else:
                                fs = fs.intersection( fs2 )
            else:
                sys.exit( "Illegal overlap mode." )
            if fs is None or len( fs ) == 0:
                write_to_samout( r, "__no_feature" )
                empty += 1
            elif len( fs ) > 1:
                write_to_samout( r, "__ambiguous[" + '+'.join( fs ) + "]" )
#.........這裏部分代碼省略.........
開發者ID:Christian-B,項目名稱:grid_scripts,代碼行數:101,代碼來源:batch_htseq_count1.py

示例14: count_reads_onto_prebuilt_features

def count_reads_onto_prebuilt_features(
    sam_filename, features, feature_ids, stranded, overlap_mode, quiet, minaqual, samout, umis=False
):
    def write_to_samout(r, assignment):
        if samoutfile is None:
            return
        if not pe_mode:
            r = (r,)
        for read in r:
            if read is not None:
                samoutfile.write(read.original_sam_line.rstrip() + "\tXF:Z:" + assignment + "\n")

    if quiet:
        warnings.filterwarnings(action="ignore", module="HTSeq")

    if samout != "":
        samoutfile = open(samout, "w")
    else:
        samoutfile = None

    if umis:
        umi_re = re.compile(":UMI:(\w+):")
        umi_counts = {}

        def count_umis(fs, read_name):
            umi_seq = umi_re.search(read_name).group(1)
            umi_counts[fs][umi_seq] += 1

        for feature_id in feature_ids:
            umi_counts[feature_id] = Counter()
    else:

        def count_umis(x, y):
            return None

    # Try to open samfile to fail early in case it is not there
    if sam_filename != "-":
        open(sam_filename).close()

    counts = {}
    for feature_id in feature_ids:
        counts[feature_id] = 0

    try:
        if sam_filename != "-":
            read_seq_file = HTSeq.SAM_Reader(sam_filename)
            read_seq = read_seq_file
            first_read = iter(read_seq).next()
        else:
            read_seq_file = HTSeq.SAM_Reader(sys.stdin)
            read_seq_iter = iter(read_seq_file)
            first_read = read_seq_iter.next()
            read_seq = itertools.chain([first_read], read_seq_iter)
        pe_mode = first_read.paired_end
    except StopIteration:
        raise EmptySamError(sam_filename)

    try:
        if pe_mode:
            read_seq = HTSeq.pair_SAM_alignments(read_seq)
        empty = 0
        ambiguous = 0
        notaligned = 0
        lowqual = 0
        nonunique = 0
        i = 0
        for r in read_seq:
            i += 1
            if not pe_mode:
                if not r.aligned:
                    notaligned += 1
                    write_to_samout(r, "not_aligned")
                    continue
                try:
                    if r.optional_field("NH") > 1:
                        write_to_samout(r, "alignment_not_unique")
                        nonunique += 1
                        continue
                except KeyError:
                    pass
                if r.aQual < minaqual:
                    lowqual += 1
                    write_to_samout(r, "too_low_aQual")
                    continue
                if stranded != "reverse":
                    iv_seq = (co.ref_iv for co in r.cigar if co.type == "M" and co.size > 0)
                else:
                    iv_seq = (invert_strand(co.ref_iv) for co in r.cigar if co.type == "M" and co.size > 0)
            else:
                if r[0] is not None and r[0].aligned:
                    if stranded != "reverse":
                        iv_seq = (co.ref_iv for co in r[0].cigar if co.type == "M" and co.size > 0)
                    else:
                        iv_seq = (invert_strand(co.ref_iv) for co in r[0].cigar if co.type == "M" and co.size > 0)
                else:
                    iv_seq = tuple()
                if r[1] is not None and r[1].aligned:
                    if stranded != "reverse":
                        iv_seq = itertools.chain(
                            iv_seq, (invert_strand(co.ref_iv) for co in r[1].cigar if co.type == "M" and co.size > 0)
#.........這裏部分代碼省略.........
開發者ID:rgranit,項目名稱:CEL-Seq-pipeline,代碼行數:101,代碼來源:htseq_count_umified.py

示例15: iter

#!/usr/bin/python

import HTSeq as h
from collections import defaultdict

#reader_masked = h.SAM_Reader("/Users/dashazhernakova/Documents/UMCG/data/geuvadis/mappedData/ERR188022/aligned_masked.sam")
#reader = h.SAM_Reader("/Users/dashazhernakova/Documents/UMCG/data/geuvadis/mappedData/ERR188022/aligned.sam")

reader = h.SAM_Reader("/Users/dashazhernakova/Documents/UMCG/data/geuvadis/mappedData/ERR188022_masked/Aligned.out.filtered.new.1017680.sam")
reader_masked = h.SAM_Reader("/Users/dashazhernakova/Documents/UMCG/data/geuvadis/mappedData/ERR188022/Aligned.out.filtered.new1mb.sam")
it_p = iter(h.pair_SAM_alignments(reader))
it_p_m = iter(h.pair_SAM_alignments(reader_masked))

same_aligned = 0
one_same_pos = 0
both_same_pos = 0
masked_more_pos = 0
simple_more_pos = 0
#cur_read = {}
#cur_m_read = {}
not_in_simple = 0
not_in_masked = 0

n_m = defaultdict(list)
i = 0
for r1, r2 in h.pair_SAM_alignments(reader):
	n_m[r1.read.name].append((r1,r2))
	i += 1
	if i%10000 == 0:
		print i, " lines"
#for k,v in n_m.items():
開發者ID:DashaZhernakova,項目名稱:misc,代碼行數:31,代碼來源:compareReadMappings.py


注:本文中的HTSeq類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。