当前位置: 首页>>代码示例>>Python>>正文


Python HTSeq.pair_SAM_alignments_with_buffer方法代码示例

本文整理汇总了Python中HTSeq.pair_SAM_alignments_with_buffer方法的典型用法代码示例。如果您正苦于以下问题:Python HTSeq.pair_SAM_alignments_with_buffer方法的具体用法?Python HTSeq.pair_SAM_alignments_with_buffer怎么用?Python HTSeq.pair_SAM_alignments_with_buffer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在HTSeq的用法示例。


在下文中一共展示了HTSeq.pair_SAM_alignments_with_buffer方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: bam_count

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments_with_buffer [as 别名]
def bam_count(args):
    bam = HTSeq.SAM_Reader(args.fi)
    #exons = htseq_read_gtf(args.fg)
    cnts = collections.Counter()
    for bundle in HTSeq.pair_SAM_alignments_with_buffer(bam):
        if len(bundle) != 1:
            continue
        aln1, aln2 = bundle[0]
        if not aln1.aligned and aln2.aligned:
            cnts["_unmapped"] += 1
            continue
        gids = set()
        for iv, val in exons[aln1.iv].steps():
            gids |= val
        for iv, val in exons[aln2.iv].steps():
            gids |= val
        if len(gids) == 1:
            gid = list(gids)[0]
            cnts[gid] += 1
        elif len(gids) == 0:
            cnts["_no_feature"] += 1
        else:
            cnts["_ambiguous"] += 1
    for gid in cnts:
        print("%s\t%d" % (gid, cnts[gid]))
开发者ID:orionzhou,项目名称:robin,代码行数:27,代码来源:bamcount.py

示例2: count_reads_paired

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments_with_buffer [as 别名]
def count_reads_paired(read_seq, counter, order, quiet, minaqual):

    if order == "name":
        read_seq = HTSeq.pair_SAM_alignments(read_seq)
    elif order == "pos":
        read_seq = HTSeq.pair_SAM_alignments_with_buffer(read_seq)
    else:
        raise ValueError("Illegal order specified.")

    i = 0
    for r in read_seq:
        if i > 0 and i % 100000 == 0 and not quiet:
            msg = "%d SAM alignment record pairs processed.\n" % (i)
            sys.stderr.write(msg)

        i += 1
        if r[0] is not None and r[0].aligned:
            forward_iv_seq = (co.ref_iv for co in r[0].cigar
                              if co.type == "M" and co.size > 0)
            reverse_iv_seq = (invert_strand(co.ref_iv) for co in r[0].cigar
                              if co.type == "M" and co.size > 0)
        else:
            forward_iv_seq = tuple()
            reverse_iv_seq = tuple()
        if r[1] is not None and r[1].aligned:
            rest = (invert_strand(co.ref_iv) for co in r[1].cigar
                    if co.type == "M" and co.size > 0)
            forward_iv_seq = itertools.chain(forward_iv_seq, rest)
            rest = (co.ref_iv for co in r[1].cigar
                    if co.type == "M" and co.size > 0)
            reverse_iv_seq = itertools.chain(reverse_iv_seq, rest)
        else:
            if (r[0] is None) or not (r[0].aligned):
                counter.not_aligned(r)
                continue
        try:
            if (r[0] is not None and r[0].optional_field("NH") > 1) or \
                    (r[1] is not None and r[1].optional_field("NH") > 1):
                counter.non_unique(r)
                continue
        except KeyError:
            pass
        if (r[0] and r[0].aQual < minaqual) or \
                (r[1] and r[1].aQual < minaqual):
            counter.too_low_quality(r)
            continue

        counter.forward_count(forward_iv_seq, r)
        counter.reverse_count(reverse_iv_seq, r)

    if not quiet:
        sys.stderr.write("%d SAM alignment pairs processed.\n" % (i))
开发者ID:Christian-B,项目名称:grid_scripts,代码行数:54,代码来源:batch_htseq_count.py

示例3: count_reads_paired

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments_with_buffer [as 别名]
def count_reads_paired(read_seq, counter, order, stranded, 
      quiet, minaqual, write_to_samout ):
      
    if order == "name":
        read_seq = HTSeq.pair_SAM_alignments( read_seq )
    elif order == "pos":
        read_seq = HTSeq.pair_SAM_alignments_with_buffer( read_seq )
    else:
        raise ValueError, "Illegal order specified."

    i = 0   
    for r in read_seq:
        if i > 0 and i % 100000 == 0 and not quiet:
            sys.stderr.write( "%d SAM alignment record%s processed.\n" % ( i, "s" if not pe_mode else " pairs" ) )

        i += 1
        if r[0] is not None and r[0].aligned:
            if stranded != "reverse":
                iv_seq = ( co.ref_iv for co in r[0].cigar if co.type == "M" and co.size > 0 )
            else:
                iv_seq = ( invert_strand( co.ref_iv ) for co in r[0].cigar if co.type == "M" and co.size > 0 )
        else:
            iv_seq = tuple()
        if r[1] is not None and r[1].aligned:            
            if stranded != "reverse":
                iv_seq = itertools.chain(iv_seq, 
                    ( invert_strand( co.ref_iv ) for co in r[1].cigar if co.type == "M" and co.size > 0 ) )
            else:
                iv_seq = itertools.chain( iv_seq, 
                    ( co.ref_iv for co in r[1].cigar if co.type == "M" and co.size > 0 ) )
        else:
            if ( r[0] is None ) or not ( r[0].aligned ):
                write_to_samout( r, "__not_aligned" )
                counter.notaligned += 1
                continue         
        try:
            if ( r[0] is not None and r[0].optional_field( "NH" ) > 1 ) or \
                     ( r[1] is not None and r[1].optional_field( "NH" ) > 1 ):
                counter.nonunique += 1
                write_to_samout( r, "__alignment_not_unique" )
                continue
        except KeyError:
            pass
        if ( r[0] and r[0].aQual < minaqual ) or ( r[1] and r[1].aQual < minaqual ):
            lowqual += 1
            write_to_samout( r, "__too_low_aQual" )
            continue         
        
        counter.count(iv_seq, r)
         
    if not quiet:
        sys.stderr.write( "%d SAM %s processed.\n" % ( i, "alignments " if not pe_mode else "alignment pairs" ) )
开发者ID:Christian-B,项目名称:grid_scripts,代码行数:54,代码来源:batch_htseq_count2.py

示例4: count_reads_in_features

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments_with_buffer [as 别名]
def count_reads_in_features( sam_filename, gff_filename, samtype, order, stranded, 
      overlap_mode, feature_type, id_attribute, quiet, minaqual, samout ):
      
   def write_to_samout( r, assignment ):
      if samoutfile is None:
         return
      if not pe_mode:
         r = (r,)
      for read in r:
         if read is not None:
            samoutfile.write( read.original_sam_line.rstrip() + 
               "\tXF:Z:" + assignment + "\n" )
      

   if samout != "":
      samoutfile = open( samout, "w" )
   else:
      samoutfile = None
      
   features = HTSeq.GenomicArrayOfSets( "auto", stranded != "no" )     
   counts = {}

   # Try to open samfile to fail early in case it is not there
   if sam_filename != "-":
      open( sam_filename ).close()
      
   gff = HTSeq.GFF_Reader( gff_filename )   
   i = 0
   try:
      for f in gff:
         if f.type == feature_type:
            try:
               feature_id = f.attr[ id_attribute ]
            except KeyError:
               raise ValueError, ( "Feature %s does not contain a '%s' attribute" % 
                  ( f.name, id_attribute ) )
            if stranded != "no" and f.iv.strand == ".":
               raise ValueError, ( "Feature %s at %s does not have strand information but you are "
                  "running htseq-count in stranded mode. Use '--stranded=no'." % 
                  ( f.name, f.iv ) )
            features[ f.iv ] += feature_id
            counts[ f.attr[ id_attribute ] ] = 0
         i += 1
         if i % 100000 == 0 and not quiet:
            sys.stderr.write( "%d GFF lines processed.\n" % i )
   except:
      sys.stderr.write( "Error occured when processing GFF file (%s):\n" % gff.get_line_number_string() )
      raise
      
   if not quiet:
      sys.stderr.write( "%d GFF lines processed.\n" % i )
      
   if len( counts ) == 0:
      sys.stderr.write( "Warning: No features of type '%s' found.\n" % feature_type )
   
   if samtype == "sam":
      SAM_or_BAM_Reader = HTSeq.SAM_Reader
   elif samtype == "bam":
      SAM_or_BAM_Reader = HTSeq.BAM_Reader
   else:
      raise ValueError, "Unknown input format %s specified." % samtype

   try:
      if sam_filename != "-":
         read_seq_file = SAM_or_BAM_Reader( sam_filename )
         read_seq = read_seq_file
         first_read = iter(read_seq).next()
      else:
         read_seq_file = SAM_or_BAM_Reader( sys.stdin )
         read_seq_iter = iter( read_seq_file )
         first_read = read_seq_iter.next()
         read_seq = itertools.chain( [ first_read ], read_seq_iter )
      pe_mode = first_read.paired_end
   except:
      sys.stderr.write( "Error occured when reading beginning of SAM/BAM file.\n" )
      raise

   try:
      if pe_mode:
         if order == "name":
            read_seq = HTSeq.pair_SAM_alignments( read_seq )
         elif order == "pos":
            read_seq = HTSeq.pair_SAM_alignments_with_buffer( read_seq )
         else:
            raise ValueError, "Illegal order specified."
      empty = 0
      ambiguous = 0
      notaligned = 0
      lowqual = 0
      nonunique = 0
      i = 0   
      for r in read_seq:
         if i > 0 and i % 100000 == 0 and not quiet:
            sys.stderr.write( "%d SAM alignment record%s processed.\n" % ( i, "s" if not pe_mode else " pairs" ) )

         i += 1
         if not pe_mode:
            if not r.aligned:
               notaligned += 1
               write_to_samout( r, "__not_aligned" )
#.........这里部分代码省略.........
开发者ID:JosephRyanPeterson,项目名称:htseq,代码行数:103,代码来源:count.py

示例5: main

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments_with_buffer [as 别名]
def main():
    exe_parser = argparse.ArgumentParser()
    exe_parser.add_argument('infile', type=str, help='<input file> [(full path), -b/-s required]')
    exe_parser.add_argument("-u", "--not_aligned",
                            help="output reads that were not aligned, including those that were aligned multiple times(flat file).",
                            type=str)
    exe_parser.add_argument("-s", "--samout", help="output not aligned reads to [file path].", type=str)
    exe_parser.add_argument("-b", "--ambiguous_out", help="output a fasta file of ambiguous hits [file path].",
                            type=str)
    exe_parser.add_argument("-v", "--verbose", help="verbose. (default = TRUE).", action="store_true")
    exe_parser.add_argument("gff", help="<gff file> [(full path)]", type=str)
    exe_parser.add_argument("-f", "--fasta", help="output fasta file of hits (full path).", type=str)
    exe_parser.add_argument("-m", "--min_read_length", help="minimal read length to consider. (default = 60b).",
                            type=int)
    exe_parser.add_argument("-i", "--min_id", help="minimal percent id of hit to consider. (default = 80).", type=int)
    exe_parser.add_argument("-z", "--min_score", help="minimal aligner score to consider. (default = 0).", type=int)
    exe_parser.add_argument("-c", "--max_clip",
                            help="proportion of bases clipped from read for alignment. (default = 0.3).", type=float)
    exe_parser.add_argument("--stranded", help="whether the data is stranded (y, n, reverse). (default = n).", type=str,
                            choices=["y", "n", "reverse"], default="n")
    exe_parser.add_argument("--idattr", help="GFF attribute to be used as feature ID. (default = GeneID).", type=str)
    exe_parser.add_argument("--type", help="feature type (3rd column in GFF file) to be used. (default = CDS).",
                            type=str)
    exe_parser.add_argument("-a", "--minaqual", help="min. alignment quality (default = 0).", type=str)
    exe_parser.add_argument("-p", "--paired_end_mode",
                            help="input is paired end sorted by name (n) or position (p) . (default = p).", type=str,
                            choices=["p", "n"], default="p")
    exe_parser.add_argument("-o", "--out", help="name of counts output file.", type=str)
    args = exe_parser.parse_args()

    if args.paired_end_mode == 'p':
        paired_end = True
        pe_order = 'p'
    elif args.paired_end_mode == 'n':
        paired_end = True
        pe_order = 'n'

    if args.infile:
        try:
            if args.infile == '-':  # get sam on a stream
                seqfile = HTSeq.SAM_Reader(sys.stdin)
                if args.paired_end_mode:
                    # read_seq_iter = iter(seqfile)
                    # first_read = read_seq_iter.next()
                    # read_seq = itertools.chain([first_read], read_seq_iter)
                    # reader = HTSeq.pair_SAM_alignments(read_seq)
                    if pe_order == 'p':
                        reader = HTSeq.pair_SAM_alignments_with_buffer(seqfile)
                    elif pe_order == 'n':
                        reader = HTSeq.pair_SAM_alignments(seqfile)  # (read_seq)
                else:
                    reader = seqfile
            elif args.infile != '-':
                seqfile = HTSeq.SAM_Reader(args.infile)
                if args.paired_end_mode:
                    read_seq_iter = iter(seqfile)
                    first_read = read_seq_iter.next()
                    read_seq = itertools.chain([first_read], read_seq_iter)
                    reader = HTSeq.pair_SAM_alignments(read_seq)
                    if pe_order == 'p':
                        reader = HTSeq.pair_SAM_alignments_with_buffer(reader)
                    elif pe_order == 'n':
                        reader = HTSeq.pair_SAM_alignments(reader)
                else:
                    reader = seqfile
                    # fread_seq_iter = iter(reader)
                    # first_read = iter(read_seq).next()
            elif args.infile == '':
                print "no input file type given. exiting..."
                sys.exit(1)
        except:
            print "failed processing SAM/BAM file"
            raise
    elif not args.infile:
        print "no input file given. exiting..."
        sys.exit(1)

    if args.gff:
        gff_file = args.gff
    else:
        print "no gff file given. exiting..."
        sys.exit(1)

    if args.verbose:
        verbose = True
    else:
        verbose = False

    if args.min_read_length:
        min_read_len = args.min_read_length
    else:
        min_read_len = 60  # default read length

    if args.max_clip:
        max_clip_ = float(args.max_clip)
    else:
        max_clip_ = float(0.3)  # default read length

    if args.min_id:
        min_id = float(args.min_id)
#.........这里部分代码省略.........
开发者ID:ppflrs,项目名称:scripts,代码行数:103,代码来源:parse_and_count_sam_alignment_atlas_final.py

示例6: count_reads_in_features

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments_with_buffer [as 别名]
def count_reads_in_features(sam_filename, gff_filename, samtype, order, overlap_mode,
    feature_type, id_attribute, quiet, minaqual, mapping_file, scale_method):

    features = HTSeq.GenomicArrayOfSets("auto", False)
    counts = {}

    # Try to open samfile to fail early in case it is not there
    if sam_filename != "-":
        open(sam_filename).close()

    # Try to open mapping file to fail early in case it is not there
    if mapping_file:
        open(mapping_file).close()

    gff = HTSeq.GFF_Reader(gff_filename)
    i = 0
    try:
        for f in gff:
            if f.type == feature_type:
                try:
                    feature_id = f.attr[id_attribute]
                except KeyError:
                    continue
                features[f.iv] += feature_id
                counts[feature_id] = 0
            i += 1
            if i % 100000 == 0 and not quiet:
                sys.stderr.write("{!s} GFF lines processed.\n".format(i))
    except:
        sys.stderr.write("Error occured when processing GFF file ({}):\n"
            .format(gff.get_line_number_string()))
        raise

    if not quiet:
        sys.stderr.write("{!s} GFF lines processed.\n".format(i))

    num_features = len(counts)
    if num_features == 0:
        sys.stderr.write("Warning: No features of type '{}' found.\n"
            .format(feature_type))

    if samtype == "sam":
        align_reader = HTSeq.SAM_Reader
    elif samtype == "bam":
        align_reader = HTSeq.BAM_Reader
    else:
        raise ValueError, "Unknown input format {} specified.".format(samtype)

    try:
        if sam_filename != "-":
            read_seq_file = align_reader(sam_filename)
            read_seq = read_seq_file
            first_read = iter(read_seq).next()
        else:
            read_seq_file = align_reader(sys.stdin)
            read_seq_iter = iter(read_seq_file)
            first_read = read_seq_iter.next()
            read_seq = itertools.chain([first_read], read_seq_iter)
        pe_mode = first_read.paired_end
    except:
        sys.stderr.write("Error occured when reading SAM/BAM file.\n" )
        raise

    try:
        if pe_mode:
            if order == "name":
                read_seq = HTSeq.pair_SAM_alignments(read_seq)
            elif order == "position":
                read_seq = HTSeq.pair_SAM_alignments_with_buffer(read_seq)
            else:
                raise ValueError, "Illegal order specified."
        empty = 0
        ambiguous = 0
        notaligned = 0
        lowqual = 0
        nonunique = 0
        i = 0
        for r in read_seq:
            if i > 0 and i % 100000 == 0 and not quiet:
                sys.stderr.write("{!s} SAM alignment record{} processed.\n"
                    .format(i, "s" if not pe_mode else " pairs"))

            i += 1
            if not pe_mode:
                if not r.aligned:
                    notaligned += 1
                    continue
                try:
                    if r.optional_field("NH") > 1:
                        nonunique += 1
                        continue
                except KeyError:
                    pass
                if r.aQual < minaqual:
                    lowqual += 1
                    continue
                iv_seq = (invert_strand(co.ref_iv) for co in r.cigar if co.type == "M" and co.size > 0)
            else:
                if r[0] is not None and r[0].aligned:
                    iv_seq = (invert_strand(co.ref_iv) for co in r[0].cigar if co.type == "M" and co.size > 0)
#.........这里部分代码省略.........
开发者ID:Brazelton-Lab,项目名称:lab_scripts,代码行数:103,代码来源:count_features.py

示例7: count_reads_in_features

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments_with_buffer [as 别名]

#.........这里部分代码省略.........
                read_seq_file = SAM_or_BAM_Reader(sys.stdin)
            else:
                read_seq_file = SAM_or_BAM_Reader(sam_filename)
            read_seq_iter = iter(read_seq_file)
            # Catch empty BAM files
            try:
                first_read = next(read_seq_iter)
                pe_mode = first_read.paired_end
            except:
                first_read = None
                pe_mode = False
            if first_read is not None:
                read_seq = itertools.chain([first_read], read_seq_iter)
            else:
                read_seq = []
        except:
            sys.stderr.write(
                "Error occured when reading beginning of {:} file.\n".format(
                    samname))
            raise

        try:
            if pe_mode:
                if ((supplementary_alignment_mode == 'ignore') and
                   (secondary_alignment_mode == 'ignore')):
                    primary_only = True
                else:
                    primary_only = False
                if order == "name":
                    read_seq = HTSeq.pair_SAM_alignments(
                            read_seq,
                            primary_only=primary_only)
                elif order == "pos":
                    read_seq = HTSeq.pair_SAM_alignments_with_buffer(
                            read_seq,
                            max_buffer_size=max_buffer_size,
                            primary_only=primary_only)
                else:
                    raise ValueError("Illegal order specified.")
            empty = 0
            ambiguous = 0
            notaligned = 0
            lowqual = 0
            nonunique = 0
            i = 0
            for r in read_seq:
                if i > 0 and i % 100000 == 0 and not quiet:
                    sys.stderr.write(
                        "%d %s alignment record%s processed.\n" %
                        (i, samname, "s" if not pe_mode else " pairs"))
                    sys.stderr.flush()

                i += 1
                if not pe_mode:
                    if not r.aligned:
                        notaligned += 1
                        write_to_samout(r, "__not_aligned", samoutfile)
                        continue
                    if ((secondary_alignment_mode == 'ignore') and
                       r.not_primary_alignment):
                        continue
                    if ((supplementary_alignment_mode == 'ignore') and
                       r.supplementary):
                        continue
                    try:
                        if r.optional_field("NH") > 1:
开发者ID:simon-anders,项目名称:htseq,代码行数:70,代码来源:count.py

示例8: count_reads

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments_with_buffer [as 别名]
def count_reads(features, counts, pe_mode, read_seq, order, stranded, 
      overlap_mode, quiet, minaqual, write_to_samout ):
      
    if pe_mode:
        if order == "name":
            read_seq = HTSeq.pair_SAM_alignments( read_seq )
        elif order == "pos":
            read_seq = HTSeq.pair_SAM_alignments_with_buffer( read_seq )
        else:
            raise ValueError, "Illegal order specified."
    empty = 0
    ambiguous = 0
    notaligned = 0
    lowqual = 0
    nonunique = 0
    i = 0   
    for r in read_seq:
        if i > 0 and i % 100000 == 0 and not quiet:
            sys.stderr.write( "%d SAM alignment record%s processed.\n" % ( i, "s" if not pe_mode else " pairs" ) )

        i += 1
        if not pe_mode:
            if not r.aligned:
                notaligned += 1
                write_to_samout( r, "__not_aligned" )
                continue
            try:
                if r.optional_field( "NH" ) > 1:
                    nonunique += 1
                    write_to_samout( r, "__alignment_not_unique" )
                    continue
            except KeyError:
                pass
            if r.aQual < minaqual:
                lowqual += 1
                write_to_samout( r, "__too_low_aQual" )
                continue
            if stranded != "reverse":
                iv_seq = ( co.ref_iv for co in r.cigar if co.type == "M" and co.size > 0 )
            else:
                iv_seq = ( invert_strand( co.ref_iv ) for co in r.cigar if co.type == "M" and co.size > 0 )            
        else:
            if r[0] is not None and r[0].aligned:
                if stranded != "reverse":
                    iv_seq = ( co.ref_iv for co in r[0].cigar if co.type == "M" and co.size > 0 )
                else:
                    iv_seq = ( invert_strand( co.ref_iv ) for co in r[0].cigar if co.type == "M" and co.size > 0 )
            else:
                iv_seq = tuple()
            if r[1] is not None and r[1].aligned:            
                if stranded != "reverse":
                    iv_seq = itertools.chain(iv_seq, 
                        ( invert_strand( co.ref_iv ) for co in r[1].cigar if co.type == "M" and co.size > 0 ) )
                else:
                    iv_seq = itertools.chain( iv_seq, 
                        ( co.ref_iv for co in r[1].cigar if co.type == "M" and co.size > 0 ) )
            else:
                if ( r[0] is None ) or not ( r[0].aligned ):
                    write_to_samout( r, "__not_aligned" )
                    notaligned += 1
                    continue         
            try:
                if ( r[0] is not None and r[0].optional_field( "NH" ) > 1 ) or \
                         ( r[1] is not None and r[1].optional_field( "NH" ) > 1 ):
                    nonunique += 1
                    write_to_samout( r, "__alignment_not_unique" )
                    continue
            except KeyError:
                pass
            if ( r[0] and r[0].aQual < minaqual ) or ( r[1] and r[1].aQual < minaqual ):
                lowqual += 1
                write_to_samout( r, "__too_low_aQual" )
                continue         
         
        try:
            if overlap_mode == "union":
                fs = set()
                for iv in iv_seq:
                    if iv.chrom not in features.chrom_vectors:
                        raise UnknownChrom
                    for iv2, fs2 in features[ iv ].steps():
                        fs = fs.union( fs2 )
            elif overlap_mode == "intersection-strict" or overlap_mode == "intersection-nonempty":
                fs = None
                for iv in iv_seq:
                    if iv.chrom not in features.chrom_vectors:
                        raise UnknownChrom
                    for iv2, fs2 in features[ iv ].steps():
                        if len(fs2) > 0 or overlap_mode == "intersection-strict":
                            if fs is None:
                                fs = fs2.copy()
                            else:
                                fs = fs.intersection( fs2 )
            else:
                sys.exit( "Illegal overlap mode." )
            if fs is None or len( fs ) == 0:
                write_to_samout( r, "__no_feature" )
                empty += 1
            elif len( fs ) > 1:
                write_to_samout( r, "__ambiguous[" + '+'.join( fs ) + "]" )
#.........这里部分代码省略.........
开发者ID:Christian-B,项目名称:grid_scripts,代码行数:103,代码来源:batch_htseq_count1.py

示例9: count_reads

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments_with_buffer [as 别名]
def count_reads(sam_filename, features, counts, samtype, order, forward,
                reverse, overlap_mode, quiet, minaqual, samout, directory):

    def write_to_samout(r, assignment):
        if samoutfile is None:
            return
        if not pe_mode:
            r = (r,)
        for read in r:
            if read is not None:
                samoutfile.write(read.original_sam_line.rstrip() +
                                 "\tXF:Z:" + assignment + "\n")

    if samout != "":
        samoutfile = open(samout, "w")
    else:
        samoutfile = None

    if samtype is None:
        samtype = detect_sam_type(sam_filename)

    if samtype == "sam":
        SAM_or_BAM_Reader = HTSeq.SAM_Reader
    elif samtype == "bam":
        SAM_or_BAM_Reader = HTSeq.BAM_Reader
    else:
        raise ValueError("Unknown input format %s specified." % samtype)

    try:
        if sam_filename != "-":
            read_seq_file = SAM_or_BAM_Reader(sam_filename)
            read_seq = read_seq_file
            first_read = iter(read_seq).next()
        else:
            read_seq_file = SAM_or_BAM_Reader(sys.stdin)
            read_seq_iter = iter(read_seq_file)
            first_read = read_seq_iter.next()
            read_seq = itertools.chain([first_read], read_seq_iter)
        pe_mode = first_read.paired_end
    except:
        sys.stderr.write("Error occured when reading beginning "
                         "of SAM/BAM file.\n")
        raise

    try:
        if pe_mode:
            if order == "name":
                read_seq = HTSeq.pair_SAM_alignments(read_seq)
            elif order == "pos":
                read_seq = HTSeq.pair_SAM_alignments_with_buffer(read_seq)
            else:
                raise ValueError("Illegal order specified.")
        if forward:
            empty_forward = 0
            ambiguous_forward = 0
            counts_forward = copy.copy(counts)
        if reverse:
            empty_reverse = 0
            ambiguous_reverse = 0
            counts_reverse = copy.copy(counts)
        notaligned = 0
        lowqual = 0
        nonunique = 0
        i = 0
        for r in read_seq:
            if i > 0 and i % 100000 == 0 and not quiet:
                sys.stderr.write("%d SAM alignment record%s processed.\n" %
                                 (i, "s" if not pe_mode else " pairs"))

            i += 1
            if not pe_mode:
                if not r.aligned:
                    notaligned += 1
                    write_to_samout(r, "__not_aligned")
                    continue
                try:
                    if r.optional_field("NH") > 1:
                        nonunique += 1
                        write_to_samout(r, "__alignment_not_unique")
                        continue
                except KeyError:
                    pass
                if r.aQual < minaqual:
                    lowqual += 1
                    write_to_samout(r, "__too_low_aQual")
                    continue
                if forward:
                    iv_seq_for = (co.ref_iv for co in r.cigar
                                  if co.type == "M" and co.size > 0)
                if reverse:
                    iv_seq_rev = (invert_strand(co.ref_iv) for co in r.cigar
                                  if co.type == "M" and co.size > 0)
            else:
                if r[0] is not None and r[0].aligned:
                    if forward:
                        iv_seq_for = (co.ref_iv for co in r[0].cigar
                                      if co.type == "M" and co.size > 0)
                    if reverse:
                        iv_seq_rev = (invert_strand(co.ref_iv) for co in
                                      r[0].cigar if co.type == "M"
#.........这里部分代码省略.........
开发者ID:Christian-B,项目名称:grid_scripts,代码行数:103,代码来源:batch_count.py


注:本文中的HTSeq.pair_SAM_alignments_with_buffer方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。