本文整理汇总了Python中pysam.index函数的典型用法代码示例。如果您正苦于以下问题:Python index函数的具体用法?Python index怎么用?Python index使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了index函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: map_paired_reads
def map_paired_reads(pe1_path, pe2_path, genome_path, output_path, args):
work_dir = tempfile.mkdtemp( )
genome_db = os.path.join( work_dir, "genome" )
pe1_output = os.path.join( work_dir, "pe1.sai" )
pe2_output = os.path.join( work_dir, "pe2.sai" )
bwa_output = os.path.join( work_dir, "output.sam" )
null = open( "/dev/null" ) #open("/tmp/bwa_out")#
subprocess.check_call( [ "bwa", "index", "-p", genome_db, genome_path ], stderr = null )
with open( pe1_output, "w" ) as pe1_file:
subprocess.check_call( [ "bwa", "aln", genome_db, pe1_path ], stdout = pe1_file, stderr = null )
with open( pe2_output, "w" ) as pe2_file:
subprocess.check_call( [ "bwa", "aln", genome_db, pe2_path ], stdout = pe2_file, stderr = null )
with open( bwa_output, "w" ) as bwa_file:
subprocess.check_call( [ "bwa", "sampe",
"-r", "@RG\tID:ILLUMINA\tSM:48_2\tPL:ILLUMINA\tLB:LIB1",
genome_db,
pe1_output, pe2_output,
pe1_path, pe2_path ], stdout = bwa_file, stderr = null )
if args.sam:
shutil.move(bwa_output ,output_path+'.sam')
#os.rename(bwa_output ,output_path+'.sam')
else:
sam_to_bam( bwa_output, bwa_output + ".bam" )
if args.sort:
# coordinate sort the file
pysam.sort( bwa_output + ".bam", output_path )
pysam.index(output_path+'.bam')
else:
shutil.move(bwa_output +".bam",output_path+'.bam')
示例2: bwa_mem
def bwa_mem(pe1_path, pe2_path, genome_path, threads, output_path):
print 'Aligning with bwa mem'
start = time()
work_dir = tempfile.mkdtemp()
genome_db = os.path.join(work_dir, "genome")
pe1_output = os.path.join(work_dir, "pe1.sai")
pe2_output = os.path.join(work_dir, "pe2.sai")
bwa_output = os.path.join(work_dir, "output.sam")
stderr_file = open(output_path+'.bwa.1','w')
#null = open("/dev/null")
subprocess.check_call([ "bwa", "index", "-p", genome_db, genome_path ], stderr=stderr_file)
with open(bwa_output, "w") as bwa_file:
subprocess.check_call([ "bwa", "mem", "-t", threads,
genome_db, pe1_path, pe2_path ],
stdout=bwa_file,
stderr=stderr_file)
elapsed = time() - start
print 'Time elapsed for bwa mem: ', elapsed
sam_to_bam(bwa_output, bwa_output + ".bam")
pysam.sort(bwa_output + ".bam", output_path)
pysam.index(output_path + '.bam')
shutil.rmtree(work_dir)
示例3: _generate_empty_bam_file
def _generate_empty_bam_file(self, sam_path, bam_path_prefix):
samfile = pysam.Samfile(sam_path, "r")
bamfile = pysam.Samfile(
"%s.bam" % bam_path_prefix, "wb", header=samfile.header)
bamfile.close()
samfile.close()
pysam.index("%s.bam" % bam_path_prefix)
示例4: addReadGroupSet
def addReadGroupSet(self, datasetName, filePath, moveMode):
"""
Add a read group set to the repo
"""
# move the bam file
self._check()
self._checkDataset(datasetName)
self._checkFile(filePath, self.bamExtension)
fileName = os.path.basename(filePath)
readGroupSetName = filenameWithoutExtension(
fileName, self.bamExtension)
destPath = os.path.join(
self._repoPath, self.datasetsDirName, datasetName,
self.readsDirName, fileName)
self._assertPathEmpty(destPath, inRepo=True)
self._moveFile(filePath, destPath, moveMode)
# move the index file if it exists, otherwise do indexing
indexPath = os.path.join(
os.path.split(filePath)[0],
readGroupSetName + self.bamIndexExtension)
indexMessage = ""
if os.path.exists(indexPath):
dstDir = os.path.split(destPath)[0]
self._moveFile(
indexPath,
os.path.join(dstDir, os.path.basename(indexPath)),
moveMode)
else:
pysam.index(destPath.encode('utf-8'))
indexMessage = " (and indexed)"
# finish
self._repoEmit("ReadGroupSet '{}' added to dataset '{}'{}".format(
fileName, datasetName, indexMessage))
示例5: run_cufflinks
def run_cufflinks(org_db, num_cpus=4):
"""
run cufflinks program on mapped reads
"""
try:
subprocess.call(["cufflinks"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except:
exit("Please make sure that the `Cufflinks` binary is in your $PATH")
org_name = org_db['short_name']
print("preparing for cufflinks run for organism %s" % org_name)
min_intron_length = 20
min_isoform_frac = 0.25
max_intron_length = org_db['max_intron_len']
result_dir = org_db['read_assembly_dir']
bam_file = "%s/%s_Aligned_mmr_sortbyCoord.bam" % (org_db['read_map_dir'], org_name)
if not os.path.isfile(bam_file):
sys.stdout.write("failed to fetch sorted mmr BAM file for organism: %s, trying to get the mmr file...\n" % org_name)
bam_file = "%s/%s_Aligned_mmr.bam" % (org_db['read_map_dir'], org_name)
if not os.path.isfile(bam_file):
exit("error: failed to fetch mmr BAM file for organism %s" % org_name)
## sorting, indexing the bam file
file_prefix, ext = os.path.splitext(bam_file)
sorted_bam = "%s_sortbyCoord" % file_prefix
sys.stdout.write("trying to sort based by the coordinates with output prefix as: %s\n" % sorted_bam)
if not os.path.isfile("%s.bam" % sorted_bam):
pysam.sort(bam_file, sorted_bam)
bam_file = "%s.bam" % sorted_bam
print('using bam file from %s' % bam_file)
if not os.path.exists(bam_file + ".bai"):
pysam.index(bam_file)
## always use quiet mode to avoid problems with storing log output.
cli_cuff = "cufflinks -q --no-update-check \
-F %.2f \
-I %d \
--min-intron-length %d \
--library-type fr-unstranded \
-p %d \
-o %s \
%s" % (min_isoform_frac, max_intron_length, min_intron_length, num_cpus, result_dir, bam_file)
sys.stdout.write('\trun cufflinks as: %s \n' % cli_cuff)
try:
os.chdir(result_dir)
process = subprocess.Popen(cli_cuff, shell=True)
returncode = process.wait()
if returncode !=0:
raise Exception, "Exit status return code = %i" % returncode
except Exception, e:
print 'Error running cufflinks.\n%s' % str( e )
示例6: bwa_sampe
def bwa_sampe(pe1_path, pe2_path, genome_path, output_path):
print 'Aligning with bwa aln/sampe'
start = time()
work_dir = tempfile.mkdtemp()
genome_db = os.path.join(work_dir, "genome")
pe1_output = os.path.join(work_dir, "pe1.sai")
pe2_output = os.path.join(work_dir, "pe2.sai")
bwa_output = os.path.join(work_dir, "output.sam")
null = open("/dev/null")
subprocess.check_call([ "bwa", "index", "-p", genome_db, genome_path ], stderr=null)
with open(pe1_output, "w") as pe1_file:
subprocess.check_call([ "bwa", "aln", genome_db, pe1_path ], stdout=pe1_file, stderr=null)
with open(pe2_output, "w") as pe2_file:
subprocess.check_call([ "bwa", "aln", genome_db, pe2_path ], stdout=pe2_file, stderr=null)
with open(bwa_output, "w") as bwa_file:
subprocess.check_call([ "bwa", "sampe",
genome_db,
pe1_output, pe2_output,
pe1_path, pe2_path ], stdout=bwa_file, stderr=null)
elapsed = time() - start
print 'Time elapsed for bwa aln/sampe: ', elapsed
sam_to_bam(bwa_output, bwa_output + ".bam")
pysam.sort(bwa_output + ".bam", output_path)
pysam.index(output_path + '.bam')
示例7: populate
def populate(self, sam_file_name, minimum_alignment_score):
if self.contig == "":
RuntimeError("contig must be set before reading a bam file")
if self.contig[0]==">":
current_contig_to_analyse = self.contig.lstrip('>') #Necessary because there is no ">" in the bam file...
else:
current_contig_to_analyse = self.contig
sys.stderr.write("Loading file %s\n" %sam_file_name)
samfile = pysam.Samfile(sam_file_name, 'rb')
if not samfile._hasIndex(): #if no index, we must build it
samfile.close()
sys.stderr.write("Building index for %s\n" % sam_file_name)
pysam.index(sam_file_name)
samfile = pysam.Samfile(sam_file_name, 'rb')
if self.position-3 < 0:
sys.stderr.write("%s position %s. I have problem computing this position\n" % (self.contig, self.position))
for pileup_data in samfile.pileup(current_contig_to_analyse, max([0,self.position-3]), self.position+1):
#print(str(self.position-3)+" "+str(pileup_data.pos)+" "+str(self.position+1))
if self.position-3 <= pileup_data.pos <= self.position+1:
#print('in')
for pileup_read in pileup_data.pileups:
if not pileup_read.alignment.qname in self.reads:
self.reads[pileup_read.alignment.qname] = {}
if ord(pileup_read.alignment.qual[pileup_read.qpos])-33 > minimum_alignment_score:
self.reads[pileup_read.alignment.qname][int(pileup_data.pos+1)] = \
pileup_read.alignment.seq[pileup_read.qpos] #using biological position, not python.
samfile.close()
示例8: tophat_map
def tophat_map(gtf, out_dir, prefix, fastq, thread, bw=False, scale=False,
gtf_flag=1):
'''
1. Map reads with TopHat2
2. Extract unmapped reads
3. Create BigWig file if needed
'''
# tophat2 mapping
print('Map reads with TopHat2...')
tophat_cmd = 'tophat2 -g 1 --microexon-search -m 2 '
if gtf_flag:
tophat_cmd += '-G %s ' % gtf
tophat_cmd += '-p %s -o %s ' % (thread, out_dir + '/tophat')
tophat_cmd += '%s/bowtie2_index/%s ' % (out_dir, prefix) + ','.join(fastq)
tophat_cmd += ' 2> %s/tophat.log' % out_dir
print('TopHat2 mapping command:')
print(tophat_cmd)
return_code = os.system(tophat_cmd) >> 8
if return_code:
sys.exit('Error: cannot map reads with TopHat2!')
# extract unmapped reads
print('Extract unmapped reads...')
unmapped_bam = pybedtools.BedTool('%s/tophat/unmapped.bam' % out_dir)
unmapped_bam.bam_to_fastq(fq='%s/tophat/unmapped.fastq' % out_dir)
# create Bigwig file if needed
if bw and which('bedGraphToBigWig') is not None:
print('Create BigWig file...')
map_bam_fname = '%s/tophat/accepted_hits.bam' % out_dir
# index bam if not exist
if not os.path.isfile(map_bam_fname + '.bai'):
pysam.index(map_bam_fname)
map_bam = pysam.AlignmentFile(map_bam_fname, 'rb')
# extract chrom size file
chrom_size_fname = '%s/tophat/chrom.size' % out_dir
with open(chrom_size_fname, 'w') as chrom_size_f:
for seq in map_bam.header['SQ']:
chrom_size_f.write('%s\t%s\n' % (seq['SN'], seq['LN']))
if scale: # scale to HPB
mapped_reads = map_bam.mapped
for read in map_bam:
read_length = read.query_length
break
s = 1000000000.0 / mapped_reads / read_length
else:
s = 1
map_bam = pybedtools.BedTool(map_bam_fname)
bedgraph_fname = '%s/tophat/accepted_hits.bg' % out_dir
with open(bedgraph_fname, 'w') as bedgraph_f:
for line in map_bam.genome_coverage(bg=True, g=chrom_size_fname,
scale=s, split=True):
value = str(int(float(line[3]) + 0.5))
bedgraph_f.write('\t'.join(line[:3]) + '\t%s\n' % value)
bigwig_fname = '%s/tophat/accepted_hits.bw' % out_dir
return_code = os.system('bedGraphToBigWig %s %s %s' %
(bedgraph_fname, chrom_size_fname,
bigwig_fname)) >> 8
if return_code:
sys.exit('Error: cannot convert bedGraph to BigWig!')
else:
print('Could not find bedGraphToBigWig, so skip this step!')
示例9: splitByStrand
def splitByStrand(bamfile, pe):
bam_prefix = bamfile.split(".bam")[0]
if pe:
flags = [('-f 0x40 -F 0x10', 'plus'), ('-f 0x40 -F 0x20', 'minus')]
cmd_args = [['samtools', 'view',
'-b', flag[0], bamfile,
bam_prefix + "_" + flag[1] + ".bam"]for flag in flags]
else:
flags = [('-F 0x10', 'plus'), ('-f 0x10', 'minus')]
cmd_args = [['samtools', 'view',
'-b', flag[0], bamfile,
bam_prefix + "_" + flag[1] + ".bam"]for flag in flags]
for cmd_arg in cmd_args:
print cmd_arg
if os.path.exists(cmd_arg[5]):
continue
outfile = open(cmd_arg[5], 'w')
p = Popen(cmd_arg[:5], stdout=outfile)
p.wait()
pysam.index(cmd_arg[5])
# Return split BAM names
return([cmd_arg[5] for cmd_arg in cmd_args])
示例10: indexed_bam
def indexed_bam(bam_filename):
import pysam
if not os.path.exists(bam_filename + ".bai"):
pysam.index(bam_filename)
sam_reader = pysam.Samfile(bam_filename, "rb")
yield sam_reader
sam_reader.close()
示例11: convert_sam_to_bam
def convert_sam_to_bam():
"""
This method should take a newly create .sam file from alignment and
- convert it to .bam
- sort .bam
- index .bam
"""
ids = generate_ids()
for id in ids:
start_time = time()
print 'converting: %s'%id
base_path = os.path.join(SAMPLE_DIR, id)
sam_path = os.path.join(base_path, id+'-bwape.sam')
bam_path = os.path.join(base_path, id+'-bwape.bam')
bam_content = pysam.view('-bS', sam_path)
bam_file = open(bam_path, 'w+')
bam_file.writelines(bam_content)
bam_file.close()
pysam.sort(bam_path, bam_path+'_sorted')
pysam.index(bam_path+'_sorted.bam')
# indexing creates file.bam.bam. Move it to file.bam
bam_call = "mv {0} {1}".format(bam_path+'_sorted.bam', bam_path)
index_call = "mv {0} {1}".format(bam_path+'_sorted.bam.bai',
bam_path+'.bam.bai')
subprocess.call(bam_call, shell=True)
subprocess.call(index_call, shell=True)
end_time = time()
print 'completed: %.3fs'%(end_time-start_time)
示例12: sort_bam
def sort_bam(in_bam, sort_fn, to_include=None):
out_file = "%s-ksort%s" % os.path.splitext(in_bam)
index_file = "%s.bai" % in_bam
if not os.path.exists(index_file):
pysam.index(in_bam)
orig = pysam.Samfile(in_bam, "rb")
chroms = [(c["SN"], c) for c in orig.header["SQ"]]
new_chroms = chroms[:]
if to_include:
new_chroms = [(c, x) for (c, x) in new_chroms if c in to_include]
new_chroms.sort(sort_fn)
remapper = _id_remapper(chroms, new_chroms)
new_header = orig.header
new_header["SQ"] = [h for (_, h) in new_chroms]
new = pysam.Samfile(out_file, "wb", header=new_header)
for (chrom, _) in new_chroms:
for read in orig.fetch(chrom):
write = True
read.rname = remapper[read.rname]
try:
read.mrnm = remapper[read.mrnm]
# read pair is on a chromosome we are not using
except KeyError:
assert to_include is not None
write = False
if write:
new.write(read)
示例13: extractRegion
def extractRegion(bamfile,start,stop,output,exact):
pysam.index(bamfile) # must create a .bai index for any bam file to be read or fetch won't work
bam = pysam.Samfile(bamfile,'rb') # and must be done before bamfile is opened
ref = bam.references[0] # Get name of reference reads aligned to in bam
outfile = open(bamfile+".extracted."+output,'w')
# Get the reads in region of interest
read_pool = bam.fetch(bam.references[0], start,stop)
# Process reads
for read in read_pool:
if exact != '':
if read.pos <= start and read.aend >= stop:
if output == 'fastq':
outfile.write(writeFastQ(read))
elif output =='fasta':
output.write(writeFastA(read))
else:
if output == 'fastq':
outfile.write(writeFastQ(read))
elif output == 'fasta':
outfile.write(writeFastA(read))
outfile.close()
return
示例14: extractRegion
def extractRegion(bamfile,start,stop,output):
pysam.index(bamfile) # must create a .bai index for any bam file to be read or fetch won't work
bam = pysam.Samfile(bamfile,'rb') # and must be done before bamfile is opened
ref = bam.references[0] # Get name of reference reads aligned to in bam
outfile = open(bamfile+".extracted."+output,'w')
# Get the reads in region of interest
read_pool = bam.fetch(bam.references[0], start,stop)
# Process reads
for read in read_pool:
if read.is_reverse == True: # all reverse reads in a bam file have been reverse
seq = Seq(read.query) # complemented already so they need to be reverse
rc = seq.reverse_complement().tostring() # complemented again, along with the quality scores
rq = reverseString(read.qqual) # to write correctly to the fastq
if output == 'fastq':
outfile.write("@"+read.qname+"\n"+rc+"\n+\n"+rq+"\n")
elif output == 'fasta':
outfile.write('>'+read.qname+'\n'+rc+'\n')
else:
if output == 'fastq':
outfile.write("@"+read.qname+"\n"+read.query+"\n+\n"+read.qqual+"\n")
elif output == 'fasta':
outfile.write('>'+read.qname+'\n'+read.query+'\n')
outfile.close()
return
示例15: read_directions_count
def read_directions_count(bam_file):
"""
get the reads directions count from a bam file
@args bam_file: binary file formt for storing sequencing reads information
@type bam_file: str
"""
## indexing the in bam file
if not os.path.exists(bam_file + ".bai"):
pysam.index(bam_file)
reverse_cnt = 0
forward_cnt = 0
bam_fh = pysam.Samfile(bam_file, "rb")
for read in bam_fh.fetch():
if read.is_proper_pair and read.is_read1:
if read.is_reverse:
reverse_cnt += 1
else:
forward_cnt += 1
bam_fh.close()
return {'forward_reads_count': forward_cnt, 'reverse_reads_count': reverse_cnt}