当前位置: 首页>>代码示例>>Python>>正文


Python HTSeq.pair_SAM_alignments方法代码示例

本文整理汇总了Python中HTSeq.pair_SAM_alignments方法的典型用法代码示例。如果您正苦于以下问题:Python HTSeq.pair_SAM_alignments方法的具体用法?Python HTSeq.pair_SAM_alignments怎么用?Python HTSeq.pair_SAM_alignments使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在HTSeq的用法示例。


在下文中一共展示了HTSeq.pair_SAM_alignments方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: HTseq_count

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments [as 别名]
def HTseq_count(bam_file, gtf_file, out_dir, identifier, parallel = True ):
	gtf_file = HTSeq.GFF_Reader(gtf_file)
	features = HTSeq.GenomicArrayOfSets( "auto", stranded=True )

	print "extracting features from gtf file"
	for feature in gtf_file:
		# if feature.type == "exon":
		features[feature.iv] += feature.attr[identifier]

	counts = collections.Counter( )

	almnt_file = HTSeq.SAM_Reader(bam_file)
	counts = collections.Counter( )
	for bundle in HTSeq.pair_SAM_alignments( almnt_file, bundle=True ):
		if len(bundle) != 1:
			continue  # Skip multiple alignments
		first_almnt, second_almnt = bundle[0]  # extract pair
		if not first_almnt.aligned and second_almnt.aligned:
			count[ "_unmapped" ] += 1
			continue
		gene_ids = set()
		for iv, val in features[ left_almnt.iv ].steps():
			gene_ids |= val
		for iv, val in features[ right_almnt.iv ].steps():
			gene_ids |= val
		if len(gene_ids) == 1:
			gene_id = list(gene_ids)[0]
			counts[ gene_id ] += 1
		elif len(gene_ids) == 0:
			counts[ "_no_feature" ] += 1
		else:
			counts[ "_ambiguous" ] += 1

	for gene_id in counts:
		print gene_id, counts[ gene_id ]
开发者ID:rbloom5,项目名称:RNAseq_analysis,代码行数:37,代码来源:long_RNA_pipeline.py

示例2: count_reads_paired

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments [as 别名]
def count_reads_paired(read_seq, counter, order, stranded, 
      quiet, minaqual, write_to_samout ):
      
    if order == "name":
        read_seq = HTSeq.pair_SAM_alignments( read_seq )
    elif order == "pos":
        read_seq = HTSeq.pair_SAM_alignments_with_buffer( read_seq )
    else:
        raise ValueError, "Illegal order specified."

    i = 0   
    for r in read_seq:
        if i > 0 and i % 100000 == 0 and not quiet:
            sys.stderr.write( "%d SAM alignment record%s processed.\n" % ( i, "s" if not pe_mode else " pairs" ) )

        i += 1
        if r[0] is not None and r[0].aligned:
            if stranded != "reverse":
                iv_seq = ( co.ref_iv for co in r[0].cigar if co.type == "M" and co.size > 0 )
            else:
                iv_seq = ( invert_strand( co.ref_iv ) for co in r[0].cigar if co.type == "M" and co.size > 0 )
        else:
            iv_seq = tuple()
        if r[1] is not None and r[1].aligned:            
            if stranded != "reverse":
                iv_seq = itertools.chain(iv_seq, 
                    ( invert_strand( co.ref_iv ) for co in r[1].cigar if co.type == "M" and co.size > 0 ) )
            else:
                iv_seq = itertools.chain( iv_seq, 
                    ( co.ref_iv for co in r[1].cigar if co.type == "M" and co.size > 0 ) )
        else:
            if ( r[0] is None ) or not ( r[0].aligned ):
                write_to_samout( r, "__not_aligned" )
                counter.notaligned += 1
                continue         
        try:
            if ( r[0] is not None and r[0].optional_field( "NH" ) > 1 ) or \
                     ( r[1] is not None and r[1].optional_field( "NH" ) > 1 ):
                counter.nonunique += 1
                write_to_samout( r, "__alignment_not_unique" )
                continue
        except KeyError:
            pass
        if ( r[0] and r[0].aQual < minaqual ) or ( r[1] and r[1].aQual < minaqual ):
            lowqual += 1
            write_to_samout( r, "__too_low_aQual" )
            continue         
        
        counter.count(iv_seq, r)
         
    if not quiet:
        sys.stderr.write( "%d SAM %s processed.\n" % ( i, "alignments " if not pe_mode else "alignment pairs" ) )
开发者ID:Christian-B,项目名称:grid_scripts,代码行数:54,代码来源:batch_htseq_count2.py

示例3: count_reads_paired

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments [as 别名]
def count_reads_paired(read_seq, counter, order, quiet, minaqual):

    if order == "name":
        read_seq = HTSeq.pair_SAM_alignments(read_seq)
    elif order == "pos":
        read_seq = HTSeq.pair_SAM_alignments_with_buffer(read_seq)
    else:
        raise ValueError("Illegal order specified.")

    i = 0
    for r in read_seq:
        if i > 0 and i % 100000 == 0 and not quiet:
            msg = "%d SAM alignment record pairs processed.\n" % (i)
            sys.stderr.write(msg)

        i += 1
        if r[0] is not None and r[0].aligned:
            forward_iv_seq = (co.ref_iv for co in r[0].cigar
                              if co.type == "M" and co.size > 0)
            reverse_iv_seq = (invert_strand(co.ref_iv) for co in r[0].cigar
                              if co.type == "M" and co.size > 0)
        else:
            forward_iv_seq = tuple()
            reverse_iv_seq = tuple()
        if r[1] is not None and r[1].aligned:
            rest = (invert_strand(co.ref_iv) for co in r[1].cigar
                    if co.type == "M" and co.size > 0)
            forward_iv_seq = itertools.chain(forward_iv_seq, rest)
            rest = (co.ref_iv for co in r[1].cigar
                    if co.type == "M" and co.size > 0)
            reverse_iv_seq = itertools.chain(reverse_iv_seq, rest)
        else:
            if (r[0] is None) or not (r[0].aligned):
                counter.not_aligned(r)
                continue
        try:
            if (r[0] is not None and r[0].optional_field("NH") > 1) or \
                    (r[1] is not None and r[1].optional_field("NH") > 1):
                counter.non_unique(r)
                continue
        except KeyError:
            pass
        if (r[0] and r[0].aQual < minaqual) or \
                (r[1] and r[1].aQual < minaqual):
            counter.too_low_quality(r)
            continue

        counter.forward_count(forward_iv_seq, r)
        counter.reverse_count(reverse_iv_seq, r)

    if not quiet:
        sys.stderr.write("%d SAM alignment pairs processed.\n" % (i))
开发者ID:Christian-B,项目名称:grid_scripts,代码行数:54,代码来源:batch_htseq_count.py

示例4: ungapped_pe_counter

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments [as 别名]
def ungapped_pe_counter(sam_reader, feature_array):
    counts = collections.Counter( )
    pair_iterator = hts.pair_SAM_alignments( sam_reader, bundle=True )
    # bundle puts all multiply-mapped pairs together.

    t0 = datetime.datetime.now()
    for ic, bundle in enumerate(pair_iterator):

        # report progress (to prove that it is still alive):
        if ic % 1000000 == 0:
            t1 = datetime.datetime.now()
            print "\r%d read bundles counted in %s\r" % (ic, t1-t0)
            sys.stdout.flush()

        if bundle == []: # first bundle for some reason is always an empty list
            continue

        bcounts = assess_bundle(bundle, feature_array)

        """
        To evaluate the multiply mapped bundles, each pair in a bundle must still ALWAYS
        and ONLY map to a single feature. Thus, every aligned pair has come from the same
        feature (gene), and this bundle counts as evidence of one read for this gene.

        If any of the read pairs maps to a different gene, or no gene, or multiple genes,
        then the bundle is considered ambiguous.

        If all pairs in a bundle map as _no_feature, _unmapped or _ambiguous, then the
        bundle counts as one count towards this feature type. (ie, it is passed on to
        the final counter to increment by 1).
        """

        if len(bcounts) > 1: # ie, is a multiply mapped feature with multiple gene mappings
            counts[ "_ambiguous" ] += 1
            continue
        elif len(bcounts) == 0:  # uh oh! There is an error somewhere.
            print "#" * 40
            print "Error! bundle was not assigned any status"
            print "Contents of bundle:"
            print bundle
            continue
        else:
            counts[ bcounts.keys()[0] ] += 1

    return counts
开发者ID:oxpeter,项目名称:htseq-transcriptome,代码行数:47,代码来源:htseq-trancriptome.py

示例5: bam_parser_2

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments [as 别名]
def bam_parser_2(bam_file, min_len, max_clip, min_id, mode):
    bam_dict = {}

    query_counter = 0

    output_list = list()

    if mode == 'paired':
        #import itertools
        #for aln in itertools.islice( HTSeq.pair_SAM_alignments(bam_file), 1000 ):  # printing first N reads
        for aln in HTSeq.pair_SAM_alignments(bam_file):
            query_counter += 1

            query_1, query_2 = aln

            q1_aln = parser_aln_list(query_1, aln_number = query_counter, pair_pos = 1, min_len=min_len, max_clip=max_clip, min_id=min_id)
            q2_aln = parser_aln_list(query_2, aln_number = query_counter, pair_pos = 2, min_len=min_len, max_clip=max_clip, min_id=min_id)

            alns = [q1_aln, q2_aln]

            if alns == [None, None]:
                continue
            else:
                if None in alns:
                    alns.remove(None)
                output_list.append(alns)

    elif mode == 'single':
        for aln in bam_file:

            query_counter += 1

            query_1 = aln

            q1_aln = parser_aln_list(query_1, aln_number = query_counter, pair_pos = 1, min_len=min_len, max_clip=max_clip, min_id=min_id)

            alns = [q1_aln]

            if q1_aln != None:
                output_list.append(alns)

    df_columns = ['ALN','QUERY','REF','SEQ','LEN','ID','SCORE','CLIP_PCT']
    output_list = [item for sublist in output_list for item in sublist]

    return pd.DataFrame(output_list, columns=df_columns)
开发者ID:ppflrs,项目名称:scripts,代码行数:47,代码来源:BP-1.py

示例6: htseq_count

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments [as 别名]
def htseq_count(data):
    """ adapted from Simon Anders htseq-count.py script
    http://www-huber.embl.de/users/anders/HTSeq/doc/count.html
    """

    sam_filename, gff_filename, out_file, stats_file = _get_files(data)
    stranded = _get_stranded_flag(data["config"])
    overlap_mode = "union"
    feature_type = "exon"
    id_attribute = "gene_id"
    minaqual = 0


    if file_exists(out_file):
        return out_file

    logger.info("Counting reads mapping to exons in %s using %s as the "
                    "annotation and strandedness as %s." % (os.path.basename(sam_filename),
                    os.path.basename(gff_filename), _get_strandedness(data["config"])))

    features = HTSeq.GenomicArrayOfSets("auto", stranded != "no")
    counts = {}

    # Try to open samfile to fail early in case it is not there
    open(sam_filename).close()

    gff = HTSeq.GFF_Reader(gff_filename)
    i = 0
    try:
        for f in gff:
            if f.type == feature_type:
                try:
                    feature_id = f.attr[id_attribute]
                except KeyError:
                    sys.exit("Feature %s does not contain a '%s' attribute" %
                             (f.name, id_attribute))
                if stranded != "no" and f.iv.strand == ".":
                    sys.exit("Feature %s at %s does not have strand "
                             "information but you are running htseq-count "
                             "in stranded mode. Use '--stranded=no'." %
                             (f.name, f.iv))
                features[f.iv] += feature_id
                counts[f.attr[id_attribute]] = 0
            i += 1
            if i % 100000 == 0:
                sys.stderr.write("%d GFF lines processed.\n" % i)
    except:
        sys.stderr.write("Error occured in %s.\n"
                         % gff.get_line_number_string())
        raise

    sys.stderr.write("%d GFF lines processed.\n" % i)

    if len(counts) == 0:
        sys.stderr.write("Warning: No features of type '%s' found.\n"
                         % feature_type)

    try:
        align_reader = htseq_reader(sam_filename)
        first_read = iter(align_reader).next()
        pe_mode = first_read.paired_end
    except:
        sys.stderr.write("Error occured when reading first line of sam "
                         "file.\n")
        raise

    try:
        if pe_mode:
            read_seq_pe_file = align_reader
            read_seq = HTSeq.pair_SAM_alignments(align_reader)
        empty = 0
        ambiguous = 0
        notaligned = 0
        lowqual = 0
        nonunique = 0
        i = 0
        for r in read_seq:
            i += 1
            if not pe_mode:
                if not r.aligned:
                    notaligned += 1
                    continue
                try:
                    if r.optional_field("NH") > 1:
                        nonunique += 1
                        continue
                except KeyError:
                    pass
                if r.aQual < minaqual:
                    lowqual += 1
                    continue
                if stranded != "reverse":
                    iv_seq = (co.ref_iv for co in r.cigar if co.type == "M"
                              and co.size > 0)
                else:
                    iv_seq = (invert_strand(co.ref_iv) for co in r.cigar if
                              co.type == "M" and co.size > 0)
            else:
                if r[0] is not None and r[0].aligned:
                    if stranded != "reverse":
#.........这里部分代码省略.........
开发者ID:Lobage,项目名称:bcbio-nextgen,代码行数:103,代码来源:count.py

示例7: count_reads_onto_prebuilt_features

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments [as 别名]
def count_reads_onto_prebuilt_features(
    sam_filename, features, feature_ids, stranded, overlap_mode, quiet, minaqual, samout, umis=False
):
    def write_to_samout(r, assignment):
        if samoutfile is None:
            return
        if not pe_mode:
            r = (r,)
        for read in r:
            if read is not None:
                samoutfile.write(read.original_sam_line.rstrip() + "\tXF:Z:" + assignment + "\n")

    if quiet:
        warnings.filterwarnings(action="ignore", module="HTSeq")

    if samout != "":
        samoutfile = open(samout, "w")
    else:
        samoutfile = None

    if umis:
        umi_re = re.compile(":UMI:(\w+):")
        umi_counts = {}

        def count_umis(fs, read_name):
            umi_seq = umi_re.search(read_name).group(1)
            umi_counts[fs][umi_seq] += 1

        for feature_id in feature_ids:
            umi_counts[feature_id] = Counter()
    else:

        def count_umis(x, y):
            return None

    # Try to open samfile to fail early in case it is not there
    if sam_filename != "-":
        open(sam_filename).close()

    counts = {}
    for feature_id in feature_ids:
        counts[feature_id] = 0

    try:
        if sam_filename != "-":
            read_seq_file = HTSeq.SAM_Reader(sam_filename)
            read_seq = read_seq_file
            first_read = iter(read_seq).next()
        else:
            read_seq_file = HTSeq.SAM_Reader(sys.stdin)
            read_seq_iter = iter(read_seq_file)
            first_read = read_seq_iter.next()
            read_seq = itertools.chain([first_read], read_seq_iter)
        pe_mode = first_read.paired_end
    except StopIteration:
        raise EmptySamError(sam_filename)

    try:
        if pe_mode:
            read_seq = HTSeq.pair_SAM_alignments(read_seq)
        empty = 0
        ambiguous = 0
        notaligned = 0
        lowqual = 0
        nonunique = 0
        i = 0
        for r in read_seq:
            i += 1
            if not pe_mode:
                if not r.aligned:
                    notaligned += 1
                    write_to_samout(r, "not_aligned")
                    continue
                try:
                    if r.optional_field("NH") > 1:
                        write_to_samout(r, "alignment_not_unique")
                        nonunique += 1
                        continue
                except KeyError:
                    pass
                if r.aQual < minaqual:
                    lowqual += 1
                    write_to_samout(r, "too_low_aQual")
                    continue
                if stranded != "reverse":
                    iv_seq = (co.ref_iv for co in r.cigar if co.type == "M" and co.size > 0)
                else:
                    iv_seq = (invert_strand(co.ref_iv) for co in r.cigar if co.type == "M" and co.size > 0)
            else:
                if r[0] is not None and r[0].aligned:
                    if stranded != "reverse":
                        iv_seq = (co.ref_iv for co in r[0].cigar if co.type == "M" and co.size > 0)
                    else:
                        iv_seq = (invert_strand(co.ref_iv) for co in r[0].cigar if co.type == "M" and co.size > 0)
                else:
                    iv_seq = tuple()
                if r[1] is not None and r[1].aligned:
                    if stranded != "reverse":
                        iv_seq = itertools.chain(
                            iv_seq, (invert_strand(co.ref_iv) for co in r[1].cigar if co.type == "M" and co.size > 0)
#.........这里部分代码省略.........
开发者ID:rgranit,项目名称:CEL-Seq-pipeline,代码行数:103,代码来源:htseq_count_umified.py

示例8: main

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments [as 别名]
def main():
    exe_parser = argparse.ArgumentParser()
    exe_parser.add_argument('infile', type=str, help='<input file> [(full path), -b/-s required]')
    exe_parser.add_argument("-u", "--not_aligned",
                            help="output reads that were not aligned, including those that were aligned multiple times(flat file).",
                            type=str)
    exe_parser.add_argument("-s", "--samout", help="output not aligned reads to [file path].", type=str)
    exe_parser.add_argument("-b", "--ambiguous_out", help="output a fasta file of ambiguous hits [file path].",
                            type=str)
    exe_parser.add_argument("-v", "--verbose", help="verbose. (default = TRUE).", action="store_true")
    exe_parser.add_argument("gff", help="<gff file> [(full path)]", type=str)
    exe_parser.add_argument("-f", "--fasta", help="output fasta file of hits (full path).", type=str)
    exe_parser.add_argument("-m", "--min_read_length", help="minimal read length to consider. (default = 60b).",
                            type=int)
    exe_parser.add_argument("-i", "--min_id", help="minimal percent id of hit to consider. (default = 80).", type=int)
    exe_parser.add_argument("-z", "--min_score", help="minimal aligner score to consider. (default = 0).", type=int)
    exe_parser.add_argument("-c", "--max_clip",
                            help="proportion of bases clipped from read for alignment. (default = 0.3).", type=float)
    exe_parser.add_argument("--stranded", help="whether the data is stranded (y, n, reverse). (default = n).", type=str,
                            choices=["y", "n", "reverse"], default="n")
    exe_parser.add_argument("--idattr", help="GFF attribute to be used as feature ID. (default = GeneID).", type=str)
    exe_parser.add_argument("--type", help="feature type (3rd column in GFF file) to be used. (default = CDS).",
                            type=str)
    exe_parser.add_argument("-a", "--minaqual", help="min. alignment quality (default = 0).", type=str)
    exe_parser.add_argument("-p", "--paired_end_mode",
                            help="input is paired end sorted by name (n) or position (p) . (default = p).", type=str,
                            choices=["p", "n"], default="p")
    exe_parser.add_argument("-o", "--out", help="name of counts output file.", type=str)
    args = exe_parser.parse_args()

    if args.paired_end_mode == 'p':
        paired_end = True
        pe_order = 'p'
    elif args.paired_end_mode == 'n':
        paired_end = True
        pe_order = 'n'

    if args.infile:
        try:
            if args.infile == '-':  # get sam on a stream
                seqfile = HTSeq.SAM_Reader(sys.stdin)
                if args.paired_end_mode:
                    # read_seq_iter = iter(seqfile)
                    # first_read = read_seq_iter.next()
                    # read_seq = itertools.chain([first_read], read_seq_iter)
                    # reader = HTSeq.pair_SAM_alignments(read_seq)
                    if pe_order == 'p':
                        reader = HTSeq.pair_SAM_alignments_with_buffer(seqfile)
                    elif pe_order == 'n':
                        reader = HTSeq.pair_SAM_alignments(seqfile)  # (read_seq)
                else:
                    reader = seqfile
            elif args.infile != '-':
                seqfile = HTSeq.SAM_Reader(args.infile)
                if args.paired_end_mode:
                    read_seq_iter = iter(seqfile)
                    first_read = read_seq_iter.next()
                    read_seq = itertools.chain([first_read], read_seq_iter)
                    reader = HTSeq.pair_SAM_alignments(read_seq)
                    if pe_order == 'p':
                        reader = HTSeq.pair_SAM_alignments_with_buffer(reader)
                    elif pe_order == 'n':
                        reader = HTSeq.pair_SAM_alignments(reader)
                else:
                    reader = seqfile
                    # fread_seq_iter = iter(reader)
                    # first_read = iter(read_seq).next()
            elif args.infile == '':
                print "no input file type given. exiting..."
                sys.exit(1)
        except:
            print "failed processing SAM/BAM file"
            raise
    elif not args.infile:
        print "no input file given. exiting..."
        sys.exit(1)

    if args.gff:
        gff_file = args.gff
    else:
        print "no gff file given. exiting..."
        sys.exit(1)

    if args.verbose:
        verbose = True
    else:
        verbose = False

    if args.min_read_length:
        min_read_len = args.min_read_length
    else:
        min_read_len = 60  # default read length

    if args.max_clip:
        max_clip_ = float(args.max_clip)
    else:
        max_clip_ = float(0.3)  # default read length

    if args.min_id:
        min_id = float(args.min_id)
#.........这里部分代码省略.........
开发者ID:ppflrs,项目名称:scripts,代码行数:103,代码来源:parse_and_count_sam_alignment_atlas_final.py

示例9: count_reads_in_features

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments [as 别名]

#.........这里部分代码省略.........
		dict_gene_nonunique_counts = initialise_counts_per_feature(dict_gene_nonunique_counts, feature_name)
		dict_gene_unique_counts_ambiguous = initialise_counts_per_feature(dict_gene_unique_counts_ambiguous, feature_name)
         i += 1
         if i % 100000 == 0 and not quiet:
            sys.stderr.write( "%d GFF lines processed.\n" % i )
   except:
      sys.stderr.write( "Error occured in %s.\n" % gff.get_line_number_string() )
      raise
      
   if not quiet:
      sys.stderr.write( "%d GFF lines processed.\n" % i )
      
   if len( counts ) == 0 and not quiet:
      sys.stderr.write( "Warning: No features of type '%s' found.\n" % feature_type )
   
   try:
      if sam_filename != "-":
         read_seq = HTSeq.SAM_Reader( sam_filename )
         first_read = iter(read_seq).next()
      else:
         read_seq = iter( HTSeq.SAM_Reader( sys.stdin ) )
         first_read = read_seq.next()
         read_seq = itertools.chain( [ first_read ], read_seq )
      pe_mode = first_read.paired_end
      #pe_mode = 1 ## Added by us
   except:
      sys.stderr.write( "Error occured when reading first line of sam file.\n" )
      raise

   ###################################################################################################   
   try:
      if pe_mode:
         read_seq_pe_file = read_seq
         read_seq = HTSeq.pair_SAM_alignments( read_seq )
      empty = 0
      ambiguous = 0
      ambiguous_tag=0
      notaligned = 0
      lowqual = 0
      nonunique = 0
      nonunique_nonamb_to_be_rescued = 0
      temp_read_name="NA"
      previous_read_name="NA"
      temp_interval_r0="NA"
      temp_interval_r1="NA"
      counter_fragment = 0	
      flag_result = 0
      i = 0   
      pe_mode_for_SE = 0
      ## -- Added pe_mode on for SE files so that multireads reads will be accounted for
      if not pe_mode: # real SE
      	pe_mode_for_SE = 1 #
      	read_seq_pe_file = read_seq
      	pe_mode=1
      ## -- End
      index_fragment = 0
      for r in read_seq:
         prev_index_fragment = index_fragment
	 tag_nonunique_NH = 0
	 tag_overlapping_genes = 0
	 flag_aln_not_unique = 0 #
	 flag_ambiguous = 0 #
	 #-- LOOP OVER ALL READS IN INPUT BAM FILE
	 if pe_mode_for_SE:
	 	r = (r, None)
      	 counter_fragment += 1	
开发者ID:rnaseq,项目名称:peakRescue,代码行数:70,代码来源:count_peakRescue_step2.py

示例10: count_reads_in_features

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments [as 别名]
def count_reads_in_features( sam_filename, gff_filename, stranded,
      overlap_mode, feature_type, id_attribute, quiet, minaqual, samout, custom_stat ):
      
   def write_to_samout( r, assignment ):
      if samoutfile is None:
         return
      if not pe_mode:
         r = (r,)
      for read in r:
         if read is not None:
            samoutfile.write( read.original_sam_line.rstrip() +
               "\tXF:Z:" + assignment + "\n" )
      
   if quiet:
      warnings.filterwarnings( action="ignore", module="HTSeq" )
      
   if samout != "":
      samoutfile = open( samout, "w" )
   else:
      samoutfile = None

   # MB
   if custom_stat != "":
      custom_stat_file=open(custom_stat,"a")
   else:
      custom_stat_file = None
   # endMB
      
   features = HTSeq.GenomicArrayOfSets( "auto", stranded != "no" )
   counts = {}

   # Try to open samfile to fail early in case it is not there
   if sam_filename != "-":
      open( sam_filename ).close()
      
   gff = HTSeq.GFF_Reader( gff_filename )
   i = 0
   try:
      for f in gff:
         if f.type == feature_type:
            try:
               feature_id = f.attr[ id_attribute ]
            except KeyError:
               sys.exit( "Feature %s does not contain a '%s' attribute" %
                  ( f.name, id_attribute ) )
            if stranded != "no" and f.iv.strand == ".":
               sys.exit( "Feature %s at %s does not have strand information but you are "
                  "running htseq-count in stranded mode. Use '--stranded=no'." %
                  ( f.name, f.iv ) )
            features[ f.iv ] += feature_id
            counts[ f.attr[ id_attribute ] ] = 0
         i += 1
         if i % 100000 == 0 and not quiet:
            sys.stderr.write( "%d GFF lines processed.\n" % i )
   except:
      sys.stderr.write( "Error occured in %s.\n" % gff.get_line_number_string() )
      raise
      
   if not quiet:
      sys.stderr.write( "%d GFF lines processed.\n" % i )
      
   if len( counts ) == 0 and not quiet:
      sys.stderr.write( "Warning: No features of type '%s' found.\n" % feature_type )
   
   try:
      if sam_filename != "-":
         read_seq = HTSeq.SAM_Reader( sam_filename )
         first_read = iter(read_seq).next()
      else:
         read_seq = iter( HTSeq.SAM_Reader( sys.stdin ) )
         first_read = read_seq.next()
         read_seq = itertools.chain( [ first_read ], read_seq )
      pe_mode = first_read.paired_end
   except:
      sys.stderr.write( "Error occured when reading first line of sam file.\n" )
      raise

   try:
      if pe_mode:
         read_seq_pe_file = read_seq
         read_seq = HTSeq.pair_SAM_alignments( read_seq )
      empty = 0
      ambiguous = 0
      notaligned = 0
      lowqual = 0
      nonunique = 0
      # MB: Creating detailed stats
      if custom_stat_file:
		  sam_lines = 0
		  skipped = 0
		  assigned_reads = 0
		  assigned_reads_s = 0
		  assigned_reads_p = 0
		  assigned_genes = 0
		  assigned_genes_s = 0
		  assigned_genes_p = 0
		  empty_s = 0
		  empty_p = 0
		  ambiguous_s = 0
		  ambiguous_p = 0
#.........这里部分代码省略.........
开发者ID:mbahin,项目名称:IGDR_scripts,代码行数:103,代码来源:HTSeq-count.retouch.py

示例11: iter

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments [as 别名]
#!/usr/bin/python

import HTSeq as h
from collections import defaultdict

#reader_masked = h.SAM_Reader("/Users/dashazhernakova/Documents/UMCG/data/geuvadis/mappedData/ERR188022/aligned_masked.sam")
#reader = h.SAM_Reader("/Users/dashazhernakova/Documents/UMCG/data/geuvadis/mappedData/ERR188022/aligned.sam")

reader = h.SAM_Reader("/Users/dashazhernakova/Documents/UMCG/data/geuvadis/mappedData/ERR188022_masked/Aligned.out.filtered.new.1017680.sam")
reader_masked = h.SAM_Reader("/Users/dashazhernakova/Documents/UMCG/data/geuvadis/mappedData/ERR188022/Aligned.out.filtered.new1mb.sam")
it_p = iter(h.pair_SAM_alignments(reader))
it_p_m = iter(h.pair_SAM_alignments(reader_masked))

same_aligned = 0
one_same_pos = 0
both_same_pos = 0
masked_more_pos = 0
simple_more_pos = 0
#cur_read = {}
#cur_m_read = {}
not_in_simple = 0
not_in_masked = 0

n_m = defaultdict(list)
i = 0
for r1, r2 in h.pair_SAM_alignments(reader):
	n_m[r1.read.name].append((r1,r2))
	i += 1
	if i%10000 == 0:
		print i, " lines"
#for k,v in n_m.items():
开发者ID:DashaZhernakova,项目名称:misc,代码行数:33,代码来源:compareReadMappings.py

示例12: count_reads_in_features

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments [as 别名]
def count_reads_in_features( sam_filename, gff_filename, samtype, order, stranded, 
      overlap_mode, feature_type, id_attribute, quiet, minaqual, samout ):
      
   def write_to_samout( r, assignment ):
      if samoutfile is None:
         return
      if not pe_mode:
         r = (r,)
      for read in r:
         if read is not None:
            samoutfile.write( read.original_sam_line.rstrip() + 
               "\tXF:Z:" + assignment + "\n" )
      

   if samout != "":
      samoutfile = open( samout, "w" )
   else:
      samoutfile = None
      
   features = HTSeq.GenomicArrayOfSets( "auto", stranded != "no" )     
   counts = {}

   # Try to open samfile to fail early in case it is not there
   if sam_filename != "-":
      open( sam_filename ).close()
      
   gff = HTSeq.GFF_Reader( gff_filename )   
   i = 0
   try:
      for f in gff:
         if f.type == feature_type:
            try:
               feature_id = f.attr[ id_attribute ]
            except KeyError:
               raise ValueError, ( "Feature %s does not contain a '%s' attribute" % 
                  ( f.name, id_attribute ) )
            if stranded != "no" and f.iv.strand == ".":
               raise ValueError, ( "Feature %s at %s does not have strand information but you are "
                  "running htseq-count in stranded mode. Use '--stranded=no'." % 
                  ( f.name, f.iv ) )
            features[ f.iv ] += feature_id
            counts[ f.attr[ id_attribute ] ] = 0
         i += 1
         if i % 100000 == 0 and not quiet:
            sys.stderr.write( "%d GFF lines processed.\n" % i )
   except:
      sys.stderr.write( "Error occured when processing GFF file (%s):\n" % gff.get_line_number_string() )
      raise
      
   if not quiet:
      sys.stderr.write( "%d GFF lines processed.\n" % i )
      
   if len( counts ) == 0:
      sys.stderr.write( "Warning: No features of type '%s' found.\n" % feature_type )
   
   if samtype == "sam":
      SAM_or_BAM_Reader = HTSeq.SAM_Reader
   elif samtype == "bam":
      SAM_or_BAM_Reader = HTSeq.BAM_Reader
   else:
      raise ValueError, "Unknown input format %s specified." % samtype

   try:
      if sam_filename != "-":
         read_seq_file = SAM_or_BAM_Reader( sam_filename )
         read_seq = read_seq_file
         first_read = iter(read_seq).next()
      else:
         read_seq_file = SAM_or_BAM_Reader( sys.stdin )
         read_seq_iter = iter( read_seq_file )
         first_read = read_seq_iter.next()
         read_seq = itertools.chain( [ first_read ], read_seq_iter )
      pe_mode = first_read.paired_end
   except:
      sys.stderr.write( "Error occured when reading beginning of SAM/BAM file.\n" )
      raise

   try:
      if pe_mode:
         if order == "name":
            read_seq = HTSeq.pair_SAM_alignments( read_seq )
         elif order == "pos":
            read_seq = HTSeq.pair_SAM_alignments_with_buffer( read_seq )
         else:
            raise ValueError, "Illegal order specified."
      empty = 0
      ambiguous = 0
      notaligned = 0
      lowqual = 0
      nonunique = 0
      i = 0   
      for r in read_seq:
         if i > 0 and i % 100000 == 0 and not quiet:
            sys.stderr.write( "%d SAM alignment record%s processed.\n" % ( i, "s" if not pe_mode else " pairs" ) )

         i += 1
         if not pe_mode:
            if not r.aligned:
               notaligned += 1
               write_to_samout( r, "__not_aligned" )
#.........这里部分代码省略.........
开发者ID:JosephRyanPeterson,项目名称:htseq,代码行数:103,代码来源:count.py

示例13: count_reads_in_features

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments [as 别名]
def count_reads_in_features( sam_filename, gff_filename, stranded, 
      overlap_mode, feature_type, id_attribute, quiet, minaqual, samout, allow_ambiguous, allow_nonunique ):
      
   def write_to_samout( r, assignment ):
      if samoutfile is None:
         return
      if not pe_mode:
         r = (r,)
      for read in r:
         if read is not None:
            samoutfile.write( read.original_sam_line.rstrip() + 
               "\tXF:Z:" + assignment + "\n" )
      
   if quiet:
      warnings.filterwarnings( action="ignore", module="HTSeq" ) 
      
   if samout != "":
      samoutfile = open( samout, "w" )
   else:
      samoutfile = None
      
   features = HTSeq.GenomicArrayOfSets( "auto", stranded != "no" )
   features_dict = defaultdict(list)
   counts = {}

   # Try to open samfile to fail early in case it is not there
   if sam_filename != "-":
      open( sam_filename ).close()
      
   gff = HTSeq.GFF_Reader( gff_filename )   
   i = 0
   try:
      for f in gff:
         if f.type == feature_type:
            try:
               feature_id = f.attr[ id_attribute ]
            except KeyError:
               sys.exit( "Feature %s does not contain a '%s' attribute" % 
                  ( f.name, id_attribute ) )
            if stranded != "no" and f.iv.strand == ".":
               sys.exit( "Feature %s at %s does not have strand information but you are "
                  "running htseq-count in stranded mode. Use '--stranded=no'." % 
                  ( f.name, f.iv ) )
            features[ f.iv ] += feature_id
            counts[ f.attr[ id_attribute ] ] = 0
            features_dict[ f.attr[ id_attribute ] ].append(f)
         i += 1
         if i % 100000 == 0 and not quiet:
            sys.stderr.write( "%d GFF lines processed.\n" % i )
   except:
      sys.stderr.write( "Error occured in %s.\n" % gff.get_line_number_string() )
      raise
      
   if not quiet:
      sys.stderr.write( "%d GFF lines processed.\n" % i )
   
   sys.stderr.write( "Sorting exons from GFF file.\n" )
   for key, value in features_dict.items():
   		if features_dict[key][0].iv.strand == "-":
   				features_dict[key] = sorted(features_dict[key], key=lambda feat: feat.iv.start, reverse=True)
   		else:
   				features_dict[key] = sorted(features_dict[key], key=lambda feat: feat.iv.start, reverse=False)
      
   if len( counts ) == 0 and not quiet:
      sys.stderr.write( "Warning: No features of type '%s' found.\n" % feature_type )
   
   try:
      if sam_filename != "-":
         read_seq = HTSeq.SAM_Reader( sam_filename )
         first_read = iter(read_seq).next()
      else:
         read_seq = iter( HTSeq.SAM_Reader( sys.stdin ) )
         first_read = read_seq.next()
         read_seq = itertools.chain( [ first_read ], read_seq )
      pe_mode = first_read.paired_end
   except:
      sys.stderr.write( "Error occured when reading first line of sam file.\n" )
      raise

   try:
      if pe_mode:
         read_seq_pe_file = read_seq
         read_seq = HTSeq.pair_SAM_alignments( read_seq )
      empty = 0
      ambiguous = 0
      notaligned = 0
      lowqual = 0
      nonunique = 0
      i = 0   
      for r in read_seq:
         i += 1
         if not pe_mode:
            if not r.aligned:
               notaligned += 1
               write_to_samout( r, "not_aligned" )
               continue
            try:
               if ((allow_nonunique == "no") and (r.optional_field( "NH" ) > 1)):
                  write_to_samout( r, "alignment_not_unique" )
                  nonunique += 1
#.........这里部分代码省略.........
开发者ID:fanli-gcb,项目名称:Core.RNAseq,代码行数:103,代码来源:sam2pos.py

示例14: count_reads_in_features

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments [as 别名]
def count_reads_in_features( sam_filename, gff_filename, stranded, 
      overlap_mode, feature_type, id_attribute, quiet, minaqual, samout ):
      
   def write_to_samout( r, assignment ):
      if samoutfile is None:
         return
      if not pe_mode:
         r = (r,)
      for read in r:
         if read is not None:
            samoutfile.write( read.original_sam_line.rstrip() + 
               "\tXF:Z:" + assignment + "\n" )
      
   if quiet:
      warnings.filterwarnings( action="ignore", module="HTSeq" ) 
      
   if samout != "":
      samoutfile = open( samout, "w" )
   else:
      samoutfile = None
      
   features = HTSeq.GenomicArrayOfSets( "auto", stranded != "no" )     
   counts = {}
   ## added by CR
   dict_nonunique = {}

   # Try to open samfile to fail early in case it is not there
   if sam_filename != "-":
      open( sam_filename ).close()
      
   gff = HTSeq.GFF_Reader( gff_filename )   
   i = 0
   try:
      for f in gff:
         if f.type == feature_type:
            try:
               feature_id = f.attr[ id_attribute ]
            except KeyError:
               sys.exit( "Feature %s does not contain a '%s' attribute" % 
                  ( f.name, id_attribute ) )
            if stranded != "no" and f.iv.strand == ".":
               sys.exit( "Feature %s at %s does not have strand information but you are "
                  "running htseq-count in stranded mode. Use '--stranded=no'." % 
                  ( f.name, f.iv ) )
            features[ f.iv ] += feature_id
            counts[ f.attr[ id_attribute ] ] = 0
	    ##added by CR
	    dict_nonunique[ f.attr[ id_attribute ] ] = 0
         i += 1
         if i % 100000 == 0 and not quiet:
            sys.stderr.write( "%d GFF lines processed.\n" % i )
   except:
      sys.stderr.write( "Error occured in %s.\n" % gff.get_line_number_string() )
      raise
      
   if not quiet:
      sys.stderr.write( "%d GFF lines processed.\n" % i )
      
   if len( counts ) == 0 and not quiet:
      sys.stderr.write( "Warning: No features of type '%s' found.\n" % feature_type )
   
   try:
      if sam_filename != "-":
         read_seq = HTSeq.SAM_Reader( sam_filename )
         first_read = iter(read_seq).next()
      else:
         read_seq = iter( HTSeq.SAM_Reader( sys.stdin ) )
         first_read = read_seq.next()
         read_seq = itertools.chain( [ first_read ], read_seq )
      pe_mode = first_read.paired_end
   except:
      sys.stderr.write( "Error occured when reading first line of sam file.\n" )
      raise

   try:
      if pe_mode:
         read_seq_pe_file = read_seq
         read_seq = HTSeq.pair_SAM_alignments( read_seq )
      empty = 0
      ambiguous = 0
      notaligned = 0
      lowqual = 0
      nonunique = 0
      #added by SB
      temp_read_name="NA"
      temp_interval_r0="NA"
      temp_interval_r1="NA"
      ## added by CR	
      nonunique2 = 0
      #added by SB
      i = 0   
      for r in read_seq:
         i += 1
         if not pe_mode:
            if not r.aligned:
               notaligned += 1
               write_to_samout( r, "not_aligned" )
               continue
            try:
               if r.optional_field( "NH" ) > 1:
#.........这里部分代码省略.........
开发者ID:rnaseq,项目名称:peakRescue,代码行数:103,代码来源:count_peakRescue_step1.py

示例15: count_reads_in_features

# 需要导入模块: import HTSeq [as 别名]
# 或者: from HTSeq import pair_SAM_alignments [as 别名]

#.........这里部分代码省略.........
            samoutfile = None

        try:
            if sam_filename == "-":
                read_seq_file = SAM_or_BAM_Reader(sys.stdin)
            else:
                read_seq_file = SAM_or_BAM_Reader(sam_filename)
            read_seq_iter = iter(read_seq_file)
            # Catch empty BAM files
            try:
                first_read = next(read_seq_iter)
                pe_mode = first_read.paired_end
            except:
                first_read = None
                pe_mode = False
            if first_read is not None:
                read_seq = itertools.chain([first_read], read_seq_iter)
            else:
                read_seq = []
        except:
            sys.stderr.write(
                "Error occured when reading beginning of {:} file.\n".format(
                    samname))
            raise

        try:
            if pe_mode:
                if ((supplementary_alignment_mode == 'ignore') and
                   (secondary_alignment_mode == 'ignore')):
                    primary_only = True
                else:
                    primary_only = False
                if order == "name":
                    read_seq = HTSeq.pair_SAM_alignments(
                            read_seq,
                            primary_only=primary_only)
                elif order == "pos":
                    read_seq = HTSeq.pair_SAM_alignments_with_buffer(
                            read_seq,
                            max_buffer_size=max_buffer_size,
                            primary_only=primary_only)
                else:
                    raise ValueError("Illegal order specified.")
            empty = 0
            ambiguous = 0
            notaligned = 0
            lowqual = 0
            nonunique = 0
            i = 0
            for r in read_seq:
                if i > 0 and i % 100000 == 0 and not quiet:
                    sys.stderr.write(
                        "%d %s alignment record%s processed.\n" %
                        (i, samname, "s" if not pe_mode else " pairs"))
                    sys.stderr.flush()

                i += 1
                if not pe_mode:
                    if not r.aligned:
                        notaligned += 1
                        write_to_samout(r, "__not_aligned", samoutfile)
                        continue
                    if ((secondary_alignment_mode == 'ignore') and
                       r.not_primary_alignment):
                        continue
                    if ((supplementary_alignment_mode == 'ignore') and
开发者ID:simon-anders,项目名称:htseq,代码行数:70,代码来源:count.py


注:本文中的HTSeq.pair_SAM_alignments方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。