当前位置: 首页>>代码示例>>Python>>正文


Python Samfile.write方法代码示例

本文整理汇总了Python中pysam.Samfile.write方法的典型用法代码示例。如果您正苦于以下问题:Python Samfile.write方法的具体用法?Python Samfile.write怎么用?Python Samfile.write使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pysam.Samfile的用法示例。


在下文中一共展示了Samfile.write方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: split_samfile

# 需要导入模块: from pysam import Samfile [as 别名]
# 或者: from pysam.Samfile import write [as 别名]
def split_samfile(sam_file, splits, prefix='', path=''):
    """Take a sam file and split it splits number of times.

    :path:    Where to put the split files.
    :prefix:  A prefix for the outfile names.
    :returns: A tuple of job files.
    """
    # Determine how many reads will be in each split sam file.
    num_lines = count_reads(sam_file)
    num_reads = int(int(num_lines)/splits) + 1

    # Get rid of starting path
    sam_name = os.path.basename(sam_file)

    # Subset the SAM file into X number of jobs
    cnt      = 0
    currjob  = 1
    suffix   = '.split_sam_' + str(currjob).zfill(4)
    run_file = os.path.join(path, prefix + sam_name + suffix)
    rmode    = 'rb' if sam_name.split('.')[0] == 'bam' else 'r'
    wmode    = 'wb'

    # Actually split the file
    outfiles = [run_file]
    with Samfile(sam_file, rmode) as in_sam:
        sam_split = Samfile(run_file, wmode, template=in_sam)
        for line in in_sam:
            cnt += 1
            if cnt < num_reads:
                sam_split.write(line)
            elif cnt == num_reads:
                # Check if next line is mate-pair. If so, don't split.
                line2     = next(in_sam)
                currjob  += 1
                suffix    = '.split_sam_' + str(currjob).zfill(4)
                run_file  = os.path.join(path, prefix + sam_name + suffix)
                new_sam   = Samfile(run_file, wmode, template=in_sam)
                outfiles.append(run_file)

                if line.qname == line2.qname:
                    sam_split.write(line)
                    sam_split.write(line2)
                    sam_split.close()
                    cnt = 0
                else:
                    sam_split.write(line)
                    sam_split.close()
                    new_sam.write(line2)
                    cnt = 0
                sam_split = new_sam
        sam_split.close()
    return tuple(outfiles)
开发者ID:rmagoglia,项目名称:ASEr,代码行数:54,代码来源:CountSNPASE.py

示例2: parse_barcode

# 需要导入模块: from pysam import Samfile [as 别名]
# 或者: from pysam.Samfile import write [as 别名]
def parse_barcode(bamfile):
	"""parses a sorted and index bam file, removes all cases where rna hits more than one spot in genome
	and writes to a file, create file for mutant and wildtype based on barcodes"""
	samfile = Samfile(bamfile, "rb")
	multi_hit_file = Samfile("MultiHit.bam","wb",template=samfile)
	mutant_one = Samfile("MutantOne.bam","wb",template=samfile)
	wildtype_one = Samfile("WildtypeOne.bam","wb",template=samfile)
	mutant_two = Samfile("MutantTwo.bam","wb",template=samfile)
	wildtype_two = Samfile("WildtypeTwo.bam","wb",template=samfile)
	for line in samfile.fetch():
		#if line.is_secondary:
		## does this hit to more than one spot in genome
		#	multi_hit_file.write(line)
		if "#GAGT"in line.qname: 
		## write to mutant file
			mutant_one.write(line)
		elif "#TTAG" in line.qname:
			mutant_two.write(line)
		elif "#ACCC" in line.qname:
		### write to wildtype file
			wildtype_one.write(line)
		elif "#CGTA" in line.qname:
		### write to wildtype file
			wildtype_two.write(line)

	multi_hit_file.close()
	mutant_one.close()
	wildtype_one.close()
	mutant_two.close()
	wildtype_two.close()
	samfile.close()
开发者ID:gturco,项目名称:seqtools,代码行数:33,代码来源:barcode_parser.py

示例3: subsample

# 需要导入模块: from pysam import Samfile [as 别名]
# 或者: from pysam.Samfile import write [as 别名]
def subsample(fn, ns=None):
    if ns is None:
        fn, ns = fn
    sample = []
    count = 0
    outdir_base = path.join(path.dirname(fn), "subset")
    sf = Samfile(fn)
    try:
        i_weight = float(sf.mapped) / max(ns)
        print "Read out ", i_weight
    except ValueError:
        i_weight = 0.0
        for read in sf:
            i_weight += 1
        print "Counted ", i_weight
        i_weight /= float(max(ns))
        sf = Samfile(fn)

    print fn, count, i_weight
    for i, read in enumerate(sf):
        key = random() ** i_weight
        if len(sample) < max(ns):
            heappush(sample, (key, read, i + count))
        else:
            heappushpop(sample, (key, read, i + count))

    count += i

    for n in ns:
        if n == min(ns):
            outdir = outdir_base + "_min"
        else:
            outdir = outdir_base + "{:04.1f}M".format(n / 1e6)
        try:
            makedirs(outdir)
        except OSError:
            pass
        sampN = sorted(sample, reverse=True)[: int(n)]
        print "Kept {: >12,} of {: >12,} reads".format(len(sampN), count)
        print fn, "->", outdir
        stdout.flush()
        of = Samfile(path.join(outdir, "accepted_hits.bam"), mode="wb", template=sf)
        sample.sort(key=lambda (key, read, pos): (read.tid, read.pos))
        for key, read, pos in sampN:
            of.write(read)
        of.close()
    sf.close()
    return [count for key, read, count in sample]
开发者ID:petercombs,项目名称:EisenLab-Code,代码行数:50,代码来源:SubSample.py

示例4: split_reads

# 需要导入模块: from pysam import Samfile [as 别名]
# 或者: from pysam.Samfile import write [as 别名]
def split_reads(reads, ref_reads, alt_reads):
    read_results = Counter()

    ref_bam = Samfile(ref_reads, 'wb', template=reads)
    alt_bam = Samfile(alt_reads, 'wb', template=reads)

    prev_phase = None
    prev_read = None
    prev_qname = None
    test = 0

    for read in reads.fetch(until_eof=True):
        test += 1
        chrom = read.reference_name
        snps_on_chrom = snp_dict[chrom]
        phase = get_phase(read, snps_on_chrom)
        read_qname = read.qname
        if read_qname == prev_qname:
            read_results["tot_read"]+=1
            phase_set = set([phase, prev_phase])
            phase_set.discard(None)
            if len(phase_set) == 1:
                read_phase = phase_set.pop()
                if read_phase == -1:
                    ref_bam.write(read)
                    ref_bam.write(prev_read)
                    read_results["ref_read"]+=1
                elif read_phase == 1:
                    alt_bam.write(read)
                    alt_bam.write(prev_read)
                    read_results["alt_read"]+=1
                elif read_phase == 0:
                    read_results['misphased_read']+=1
            elif len(phase_set)==0:
                read_results["no_snps_read"]+=1
            else:
                read_results['misphased_read']+=1

        prev_read = read
        prev_phase = phase
        prev_qname = read_qname

    return(read_results)
开发者ID:rmagoglia,项目名称:Genomics,代码行数:45,代码来源:splitSpeciesReads.py

示例5: main

# 需要导入模块: from pysam import Samfile [as 别名]
# 或者: from pysam.Samfile import write [as 别名]
def main():
	parser = OptionParser(usage=usage)
	#parser.add_option("-s", action="store_true", dest="sam_input", default=False,
					  #help="Input is in SAM format instead of BAM format")
	(options, args) = parser.parse_args()
	if len(args) != 4:
		parser.print_help()
		sys.exit(1)
	psl_filename = args[0]
	ref_filename = args[1]
	contigs_filename = args[2]
	bam_filename = args[3]
	liftover_dir = args[1]
	
	references, ref_chromosomes = read_fasta(ref_filename)
	refname_to_id = dict([(name,i) for i,name in enumerate(ref_chromosomes)])
	print('Read', len(ref_chromosomes), 'reference chromosomes:', ','.join(ref_chromosomes), file=sys.stderr)
	contigs, contig_names = read_fasta(contigs_filename)
	print('Read', len(contig_names), 'contigs.', file=sys.stderr)
	bam_header = {'HD': {'VN': '1.0'}, 'SQ': [dict([('LN', len(references[chromosome])), ('SN', chromosome)]) for chromosome in ref_chromosomes] }
	outfile = Samfile(bam_filename, 'wb', header=bam_header)

	line_nr = 0
	header_read = False
	for line in (s.strip() for s in open(psl_filename)):
		line_nr += 1
		if line.startswith('------'): 
			header_read = True
			continue
		if not header_read: continue
		fields = line.split()
		assert len(fields) == 21, 'Error reading PSL file, offending line: %d'%line_nr
		sizes = [int(x) for x in fields[18].strip(',').split(',')]
		contig_starts = [int(x) for x in fields[19].strip(',').split(',')]
		ref_starts = [int(x) for x in fields[20].strip(',').split(',')]
		assert 0 < len(sizes) == len(contig_starts) == len(ref_starts)
		strand = fields[8]
		contig_name = fields[9]
		ref_name = fields[13]
		assert strand in ['-','+']
		assert contig_name in contigs
		assert ref_name in references
		a = AlignedRead()
		a.qname = contig_name
		if strand == '+':
			a.seq = str(contigs[contig_name])
		else:
			a.seq = str(contigs[contig_name].reverse_complement())
		a.flag = (16 if strand == '+' else 0)
		a.rname = refname_to_id[ref_name]
		a.pos = ref_starts[0]
		a.mapq = 255
		qpos = contig_starts[0]
		refpos = ref_starts[0]
		cigar = []
		# soft-clipping at the start?
		if contig_starts[0] > 0:
			cigar.append((4,contig_starts[0]))
		longest_insertion = 0
		longest_deletion = 0
		total_matches = 0
		total_insertion = 0
		total_deletion = 0
		for length, contig_start, ref_start in zip(sizes, contig_starts, ref_starts):
			assert contig_start >= qpos
			assert ref_start >= refpos
			# insertion?
			if contig_start > qpos:
				insertion_length = contig_start - qpos
				longest_insertion = max(longest_insertion, insertion_length)
				total_insertion += insertion_length
				append_to_cigar(cigar, 1, insertion_length)
				qpos = contig_start
			# deletion?
			if ref_start > refpos:
				deletion_length = ref_start - refpos
				longest_deletion = max(longest_deletion, deletion_length)
				total_deletion += deletion_length
				append_to_cigar(cigar, 2, deletion_length)
				refpos = ref_start
			# strech of matches/mismatches
			append_to_cigar(cigar, 0, length)
			refpos += length
			qpos += length
			total_matches += length
		# soft-clipping at the end?
		if len(a.seq) > qpos:
			cigar.append((4,len(a.seq) - qpos))
		a.cigar = tuple(cigar)
		# only use contigs where longest deletion is <= 10000 bp
		if longest_deletion > 10000: continue
		# require at least 200 matching positions
		if total_matches < 200: continue
		# require the matching positions to make up at least 75 percent of the contig
		# (without counting parts of the contig that are insertions).
		if total_matches / (len(a.seq) - total_insertion) < 0.75: continue
		outfile.write(a)
	outfile.close()
开发者ID:ALLBio,项目名称:allbiotc2,代码行数:100,代码来源:convert-blat-output.py

示例6: print_reads

# 需要导入模块: from pysam import Samfile [as 别名]
# 或者: from pysam.Samfile import write [as 别名]
def print_reads(reads_to_print, ref_name, header):
    output_name = "{0}_{1}.bam".format(args.output_base, ref_name)
    output_samfile = Samfile(output_name, "wb", header=header)
    for aln in reads_to_print:
        output_samfile.write(aln)
    output_samfile.close()
开发者ID:alneberg,项目名称:bu,代码行数:8,代码来源:split_bam_per_reference.py

示例7: subsample

# 需要导入模块: from pysam import Samfile [as 别名]
# 或者: from pysam.Samfile import write [as 别名]
def subsample(fn, ns=None, paired=False):
    if ns is None:
        fn, ns = fn
    sample = []
    count = 0
    outdir_base = path.join(path.dirname(fn), 'subset')
    sf = Samfile(fn)
    try:
        i_weight = float(sf.mapped)/max(ns)
        print("Read out ", i_weight)
    except ValueError:
        i_weight = 0.0
        for read in sf:
            i_weight += 1
        print("Counted ", i_weight)
        i_weight /= float(max(ns))
        sf = Samfile(fn)

    if paired:
        read_2s = {}
    print(fn, count, i_weight)
    for i, read in enumerate(sf):
        key = random()**i_weight
        if not paired or read.is_read1:
            if len(sample) < max(ns):
                heappush(sample, (key, i+count, read))
            else:
                dropped = heappushpop(sample, (key, i+count, read))
                if paired:
                    read_2s.pop(dropped[-1].qname, None)
        elif paired:
            read_2s[read.qname] = read
        else:
            assert ValueError("I don't know how we got here")


    count += i

    for n in ns:
        outdir = outdir_base + '{:04.1f}M'.format(n/1e6)
        try:
            makedirs(outdir)
        except OSError:
            pass
        sampN = sorted(sample, reverse=True)[:int(n)]
        print("Kept {: >12,} of {: >12,} reads".format(len(sampN), count))
        print(fn, '->', outdir)
        stdout.flush()
        of = Samfile(path.join(outdir, 'accepted_hits.bam'),
                     mode='wb', template=sf)
        sample.sort(key=lambda heap_item: (heap_item[-1].tid, heap_item[-1].pos))
        missing_mates = 0
        for key, pos, read in sampN:
            of.write(read)
            if paired and read.is_proper_pair:
                if read.qname not in read_2s:
                    missing_mates += 1
                    continue
                of.write(read_2s[read.qname])
        of.close()
    sf.close()
    print(missing_mates)
    return [count for key, read, count in sample]
开发者ID:petercombs,项目名称:HybridSliceSeq,代码行数:65,代码来源:SubSample.py

示例8: Samfile

# 需要导入模块: from pysam import Samfile [as 别名]
# 或者: from pysam.Samfile import write [as 别名]
    """
    Turns abbreviated MD into a full array
    """
    ret = []
    for i in re.findall("\d+|\^?[ATCGN]+", md):
        if i.startswith('^'):
            ret.extend(list(i[1:]))
        elif i[0] in ["A","T","C","G","N"]:
            ret.extend(list(i))
        else:
            ret.extend(['-']*int(i))

    return ret

if __name__ == '__main__':
    f = Samfile(sys.argv[1])
    out = Samfile(sys.argv[1][:-4]+"_realign.bam",'wb', template=f)
    count = 0.0
    n = 0.05
    for read in f: 
        q,t = expandAlign(read)
        query, target = realign(read)
        replace(read, query, target)
        out.write(read)
        count += 1
        if (count / f.mapped) > n:
            n += 0.05
            print "[%s] -- parsed %d of %d reads (%.2f)" % (time.asctime(), int(count), f.mapped, count/f.mapped )
        
    out.close()
开发者ID:dbrowneup,项目名称:PBSuite,代码行数:32,代码来源:realign.py


注:本文中的pysam.Samfile.write方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。