本文整理汇总了Python中pysam.sort函数的典型用法代码示例。如果您正苦于以下问题:Python sort函数的具体用法?Python sort怎么用?Python sort使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了sort函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: map_paired_reads
def map_paired_reads(pe1_path, pe2_path, genome_path, output_path):
work_dir = tempfile.mkdtemp()
genome_db = os.path.join(work_dir, "genome")
pe1_output = os.path.join(work_dir, "pe1.sai")
pe2_output = os.path.join(work_dir, "pe2.sai")
bwa_output = os.path.join(work_dir, "output.sam")
null = open("/dev/null")
subprocess.check_call([ "bwa", "index", "-p", genome_db, genome_path ], stderr=null)
with open(pe1_output, "w") as pe1_file:
subprocess.check_call([ "bwa", "aln", genome_db, pe1_path ], stdout=pe1_file, stderr=null)
with open(pe2_output, "w") as pe2_file:
subprocess.check_call([ "bwa", "aln", genome_db, pe2_path ], stdout=pe2_file, stderr=null)
with open(bwa_output, "w") as bwa_file:
subprocess.check_call([ "bwa", "sampe",
"-r", "@RG\tID:ILLUMINA\tSM:48_2\tPL:ILLUMINA\tLB:LIB1",
genome_db,
pe1_output, pe2_output,
pe1_path, pe2_path ], stdout=bwa_file, stderr=null)
sam_to_bam(bwa_output, bwa_output + ".bam")
pysam.sort(bwa_output + ".bam", output_path)
pysam.index(output_path + '.bam')
示例2: convertSortAlign
def convertSortAlign(output_filename):
# Pregenerate file names for all the intermediate steps (output_filename is the output of the Bowtie2 alignment)
# Note that the file extension is not always given depending on the input conventions of the tool being called
sam_filename=output_filename+'.sam'
bam_filename=output_filename+'.bam'
sorted_filename_input=output_filename+'_sorted'
sorted_filename_output=output_filename+'_sorted.bam'
# convert sam to bam
print 'Converting {0} to {1} . . .'.format(sam_filename,bam_filename)
try:
SamtoBam(sam_filename,bam_filename)
except Exception as ex:
print "Error converting sam to bam ({0}): {1}".format(ex.errno, ex.strerror)
return False
# sort
print 'Sorting {0} -> {1}'.format(bam_filename,sorted_filename_output)
try:
pysam.sort(bam_filename,sorted_filename_input)
except Exception as ex:
print "Error sorting bam file ({0}): {1}".format(ex.errno, ex.strerror)
return False
# index
print 'Indexing {0} . . .'.format(sorted_filename_output)
try:
pysam.index(sorted_filename_output)
except Exception as ex:
print "Error indexing bam file ({0}): {1}".format(ex.errno, ex.strerror)
return False
print
print 'Done'
return True
示例3: map_paired_reads
def map_paired_reads(pe1_path, pe2_path, genome_path, output_path, args):
work_dir = tempfile.mkdtemp( )
genome_db = os.path.join( work_dir, "genome" )
pe1_output = os.path.join( work_dir, "pe1.sai" )
pe2_output = os.path.join( work_dir, "pe2.sai" )
bwa_output = os.path.join( work_dir, "output.sam" )
null = open( "/dev/null" ) #open("/tmp/bwa_out")#
subprocess.check_call( [ "bwa", "index", "-p", genome_db, genome_path ], stderr = null )
with open( pe1_output, "w" ) as pe1_file:
subprocess.check_call( [ "bwa", "aln", genome_db, pe1_path ], stdout = pe1_file, stderr = null )
with open( pe2_output, "w" ) as pe2_file:
subprocess.check_call( [ "bwa", "aln", genome_db, pe2_path ], stdout = pe2_file, stderr = null )
with open( bwa_output, "w" ) as bwa_file:
subprocess.check_call( [ "bwa", "sampe",
"-r", "@RG\tID:ILLUMINA\tSM:48_2\tPL:ILLUMINA\tLB:LIB1",
genome_db,
pe1_output, pe2_output,
pe1_path, pe2_path ], stdout = bwa_file, stderr = null )
if args.sam:
shutil.move(bwa_output ,output_path+'.sam')
#os.rename(bwa_output ,output_path+'.sam')
else:
sam_to_bam( bwa_output, bwa_output + ".bam" )
if args.sort:
# coordinate sort the file
pysam.sort( bwa_output + ".bam", output_path )
pysam.index(output_path+'.bam')
else:
shutil.move(bwa_output +".bam",output_path+'.bam')
示例4: sort_by_position
def sort_by_position(bam_file, dir):
## get the file prefix
prefix = ""
prefix_match = re.match(r"(.*).bam", bam_file)
try:
prefix = prefix_match.group(1)
except:
print "Existing: Invalid bam file -i %s" %(bam_file)
sys.exit(2)
# sort the bam file
bam_input = dir + bam_file
sort_bam = dir + prefix + "_sorted"
pysam.sort(bam_input, sort_bam)
sort_bam = sort_bam + ".bam"
# index the sort bam file
pysam.index(sort_bam)
print ""
print "Writing Sorted Bam File : %s" %(sort_bam)
print "Writing Index Sorted Bam File : %s.bai" %(sort_bam)
return sort_bam
示例5: run
def run(self):
AbstractAnalysis.run(self) #Call base method to do some logging
localBamFile = os.path.join(self.getLocalTempDir(), "mapping.bam")
localSortedBamFile = os.path.join(self.getLocalTempDir(), "mapping.sorted")
samToBamFile(self.samFile, localBamFile)
pysam.sort(localBamFile, localSortedBamFile)
pysam.index(localSortedBamFile + ".bam")
pysam.faidx(self.referenceFastaFile)
file_header = self.readFastqFile.split(".fastq")[0].split("/")[-1] + "_" + self.referenceFastaFile.split(".fa")[0].split("/")[-1]
consensus_vcf = os.path.join(self.outputDir, file_header + "_Consensus.vcf")
consensus_fastq = os.path.join(self.outputDir, file_header + "_Consensus.fastq")
system("samtools mpileup -Q 0 -uf %s %s | bcftools view -cg - > %s" \
% (self.referenceFastaFile, localSortedBamFile + ".bam", consensus_vcf))
system("vcfutils.pl vcf2fq %s > %s" % (consensus_vcf, consensus_fastq))
system("rm -rf %s" % (self.referenceFastaFile + ".fai"))
formatted_consensus_fastq = os.path.join(self.getLocalTempDir(), "Consensus.fastq")
formatConsensusFastq(consensus_fastq, formatted_consensus_fastq)
system("mv %s %s" % (formatted_consensus_fastq, consensus_fastq))
self.finish()
示例6: check_bam
def check_bam(bam, p, make_new_index=False):
"""
Sort and index bam file
returns dictionary of chromosome names and lengths
"""
# check if sorted
test_head = pysam.AlignmentFile(bam, 'rb')
chrom_sizes = {}
p = str(p)
for i in test_head.header['SQ']:
chrom_sizes[i['SN']] = int(i['LN'])
try:
test_head.header['HD']['SO']
except KeyError:
print ' sorting bam file'
pysam.sort('[email protected]', p, bam, 'sorted.temp')
os.remove(bam)
os.rename('sorted.temp.bam', bam)
else:
if test_head.header['HD']['SO'] == 'coordinate':
pass
else:
print ' sorting bam file'
pysam.sort('[email protected]', p, bam, 'sorted.temp')
os.remove(bam)
os.rename('sorted.temp.bam', bam)
test_head.close()
# check if indexed
if '{}.bai'.format(bam) in os.listdir('.') and make_new_index is False:
pass
else:
print ' indexing bam file'
pysam.index(bam)
return chrom_sizes
示例7: main
def main(infile, snp_dir, max_window=MAX_WINDOW_DEFAULT,
is_paired_end=False, is_sorted=False):
name_split = infile.split(".")
if len(name_split) > 1:
pref = ".".join(name_split[:-1])
else:
pref = name_split[0]
if not is_sorted:
pysam.sort(infile, pref + ".sort")
infile = pref + ".sort"
sort_file_name = pref + ".sort.bam"
else:
sort_file_name = infile
keep_file_name = pref + ".keep.bam"
remap_name = pref + ".to.remap.bam"
remap_num_name = pref + ".to.remap.num.gz"
if is_paired_end:
fastq_names = [pref + ".remap.fq1.gz",
pref + ".remap.fq2.gz"]
else:
fastq_names = [pref + ".remap.fq.gz"]
bam_data = BamScanner(is_paired_end, max_window,
sort_file_name, keep_file_name, remap_name,
remap_num_name, fastq_names, snp_dir)
bam_data.run()
示例8: disciple
def disciple(bam_fname, bam_hdr, rg_id, long_qname_table, cigar_v2, in_queue):
"""Create a BAM file from the FASTQ lines fed to it via in_queue
:param bam_fname:
:param bam_hdr:
:param rg_id:
:param long_qname_table:
:param cigar_v2:
:param in_queue:
:return:
"""
logger.debug('Writing to {} ...'.format(bam_fname))
t0 = time.time()
fp = pysam.AlignmentFile(bam_fname, 'wb', header=bam_hdr)
ref_dict = {k['SN']: n for n, k in enumerate(bam_hdr['SQ'])}
cnt = 0
for cnt, (qname, read_data) in enumerate(iter(in_queue.get, __process_stop_code__)):
write_perfect_reads(qname, rg_id, long_qname_table, ref_dict, read_data, cigar_v2, fp)
fp.close()
t1 = time.time()
logger.debug('... {}: {} reads in {:0.2f}s ({:0.2f} t/s)'.format(bam_fname, cnt, t1 - t0, cnt/(t1 - t0)))
logger.debug('Sorting {} -> {}'.format(bam_fname, bam_fname + '.sorted'))
t0 = time.time()
pysam.sort('-m', '1G', '-o', bam_fname + '.sorted', bam_fname)
os.remove(bam_fname)
t1 = time.time()
logger.debug('... {:0.2f}s'.format(t1 - t0))
logger.debug('Shutting down thread for {}'.format(bam_fname))
示例9: miraligner
def miraligner(args):
"""
Realign BAM hits to miRBAse to get better accuracy and annotation
"""
hairpin, mirna = _download_mirbase(args)
precursors = _read_precursor(args.hairpin, args.sps)
matures = _read_mature(args.mirna, args.sps)
gtf = _read_gtf(args.gtf)
out_dts = []
for bam_fn in args.files:
sample = op.splitext(op.basename(bam_fn))[0]
if bam_fn.endswith("bam") or bam_fn.endswith("sam"):
logger.info("Reading %s" % bam_fn)
bam_fn = _sam_to_bam(bam_fn)
bam_sort_by_n = op.splitext(bam_fn)[0] + "_sort"
pysam.sort("-n", bam_fn, bam_sort_by_n)
reads = _read_bam(bam_sort_by_n + ".bam", precursors)
elif bam_fn.endswith("fasta") or bam_fn.endswith("fa") or bam_fn.endswith("fastq"):
out_file = op.join(args.out, sample + ".premirna")
bam_fn = _filter_seqs(bam_fn)
if args.miraligner:
_cmd_miraligner(bam_fn, out_file, args.sps, args.hairpin)
reads = _read_miraligner(out_file)
else:
if bam_fn.endswith("fastq"):
bam_fn = _convert_to_fasta(bam_fn)
logger.info("Aligning %s" % bam_fn)
if not file_exists(out_file):
pyMatch.Miraligner(hairpin, bam_fn, out_file, 1, 4)
reads = _read_pyMatch(out_file, precursors)
else:
raise ValueError("Format not recognized.")
if not args.miraligner:
reads = _annotate(reads, matures, precursors)
out_file = op.join(args.out, sample + ".mirna")
out_file, dt, dt_pre= _tab_output(reads, out_file, sample)
try:
vcf_file = op.join(args.out, sample + ".vcf")
if not file_exists(vcf_file):
# if True:
create_vcf(dt_pre, matures, gtf, vcf_file)
try:
import vcf
vcf.Reader(filename=vcf_file)
except Exception as e:
logger.warning(e.__doc__)
logger.warning(e.message)
except Exception as e:
# traceback.print_exc()
logger.warning(e.__doc__)
logger.warning(e.message)
if isinstance(dt, pd.DataFrame):
out_dts.append(dt)
if out_dts:
_create_counts(out_dts, args.out)
# _summarize(out_dts)
else:
print "No files analyzed!"
示例10: main
def main():
# Read options, args.
parser = optparse.OptionParser()
(options, args) = parser.parse_args()
input_fname, output_fname = args
slots = os.getenv('GALAXY_SLOTS', 1)
pysam.sort("[email protected]%s" % slots, '-o', output_fname, '-O', 'bam', '-T', '.', input_fname)
示例11: run_cufflinks
def run_cufflinks(org_db, num_cpus=4):
"""
run cufflinks program on mapped reads
"""
try:
subprocess.call(["cufflinks"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except:
exit("Please make sure that the `Cufflinks` binary is in your $PATH")
org_name = org_db['short_name']
print("preparing for cufflinks run for organism %s" % org_name)
min_intron_length = 20
min_isoform_frac = 0.25
max_intron_length = org_db['max_intron_len']
result_dir = org_db['read_assembly_dir']
bam_file = "%s/%s_Aligned_mmr_sortbyCoord.bam" % (org_db['read_map_dir'], org_name)
if not os.path.isfile(bam_file):
sys.stdout.write("failed to fetch sorted mmr BAM file for organism: %s, trying to get the mmr file...\n" % org_name)
bam_file = "%s/%s_Aligned_mmr.bam" % (org_db['read_map_dir'], org_name)
if not os.path.isfile(bam_file):
exit("error: failed to fetch mmr BAM file for organism %s" % org_name)
## sorting, indexing the bam file
file_prefix, ext = os.path.splitext(bam_file)
sorted_bam = "%s_sortbyCoord" % file_prefix
sys.stdout.write("trying to sort based by the coordinates with output prefix as: %s\n" % sorted_bam)
if not os.path.isfile("%s.bam" % sorted_bam):
pysam.sort(bam_file, sorted_bam)
bam_file = "%s.bam" % sorted_bam
print('using bam file from %s' % bam_file)
if not os.path.exists(bam_file + ".bai"):
pysam.index(bam_file)
## always use quiet mode to avoid problems with storing log output.
cli_cuff = "cufflinks -q --no-update-check \
-F %.2f \
-I %d \
--min-intron-length %d \
--library-type fr-unstranded \
-p %d \
-o %s \
%s" % (min_isoform_frac, max_intron_length, min_intron_length, num_cpus, result_dir, bam_file)
sys.stdout.write('\trun cufflinks as: %s \n' % cli_cuff)
try:
os.chdir(result_dir)
process = subprocess.Popen(cli_cuff, shell=True)
returncode = process.wait()
if returncode !=0:
raise Exception, "Exit status return code = %i" % returncode
except Exception, e:
print 'Error running cufflinks.\n%s' % str( e )
示例12: align_to_bam_file
def align_to_bam_file(self, reference_fasta_path, query_fasta_path, output_bam_path, multiple=False, assert_record=None):
logging.debug('LastzRunner: running on reference %s and query %s' %
(reference_fasta_path, query_fasta_path))
output_sam_path = os.path.abspath(
os.path.expandvars(output_bam_path.replace('.bam', '.sam')))
output_bam_unsorted_path = os.path.abspath(
os.path.expandvars(output_bam_path + '.unsorted'))
logging.debug(
'LastzRunner: aligning with output in temporary sam file %s' %
output_sam_path)
with open(output_sam_path, 'w') as output_sam_handler:
for line in self._align(reference_fasta_path, query_fasta_path, multiple):
output_sam_handler.write(line)
logging.debug(
'LastzRunner: transforming sam into unsorted bam file %s' %
output_bam_unsorted_path)
input_sam_handler = pysam.Samfile(output_sam_path, "r")
output_bam_file = pysam.Samfile(
output_bam_unsorted_path, "wb", template=input_sam_handler)
logging.debug(
'LastzRunner: copying from sam file to bam file')
for s in input_sam_handler:
output_bam_file.write(s)
output_bam_file.close()
logging.debug('LastzRunner: sorting and indexing bam file %s' %
output_bam_path)
pysam.sort(output_bam_unsorted_path,
output_bam_path.replace('.bam', ''))
pysam.index(output_bam_path)
示例13: makeAggregate
def makeAggregate(cells, directory, suffix, output):
"""
Create aggregate sample.
Make an aggregate bam file from a list of cells, sorts and indexes
the file for easy use in IGV. Suffix is required to prevent non
0-padded numbers matching the wrong files. Return final file name.
Parameters
----------
cells : list
List of cell names to create aggregate from.
directory : string
Directory path with the bam files from each cell.
suffix : string
String to match the end of the bam file, use to add file extension
and to anchor the extension after file numbers - this will prevent
cell_4 matching cell_4*.
output : string
String containing output file location.
"""
from glob import glob
cells = set(cells)
fileList = []
for cell in cells:
fileList.append(glob(os.path.join(directory, "*" + cell + suffix))[0])
pysam.cat("-o", output + ".bam", *fileList, catch_stdout=False)
pysam.sort(output + ".bam", output + ".sorted", catch_stdout=False)
pysam.index(output + ".sorted.bam", catch_stdout=False)
return output + ".sorted.bam"
示例14: saveReads
def saveReads(dataHub, nameExtra=None):
if dataHub.args.save_reads:
logging.info("* Saving relevant reads *")
for i, sample in enumerate(dataHub):
outbam_path = dataHub.args.save_reads
if not outbam_path.endswith(".bam"):
outbam_path += ".bam"
if len(dataHub.samples) > 1:
logging.debug("Using i = {}".format(i))
outbam_path = outbam_path.replace(".bam", ".{}.bam".format(i))
if nameExtra is not None:
outbam_path = outbam_path.replace(".bam", ".{}.bam".format(nameExtra))
logging.info(" Outpath: {}".format(outbam_path))
# print out just the reads we're interested for use later
bam_small = pysam.Samfile(outbam_path, "wb", template=sample.bam)
for read in sample.reads:
bam_small.write(read)
for read in sample.readStatistics.reads:
bam_small.write(read)
bam_small.close()
sorted_path = outbam_path.replace(".bam", ".sorted")
pysam.sort(outbam_path, sorted_path)
pysam.index(sorted_path+".bam")
示例15: bwa_sampe
def bwa_sampe(pe1_path, pe2_path, genome_path, output_path):
print 'Aligning with bwa aln/sampe'
start = time()
work_dir = tempfile.mkdtemp()
genome_db = os.path.join(work_dir, "genome")
pe1_output = os.path.join(work_dir, "pe1.sai")
pe2_output = os.path.join(work_dir, "pe2.sai")
bwa_output = os.path.join(work_dir, "output.sam")
null = open("/dev/null")
subprocess.check_call([ "bwa", "index", "-p", genome_db, genome_path ], stderr=null)
with open(pe1_output, "w") as pe1_file:
subprocess.check_call([ "bwa", "aln", genome_db, pe1_path ], stdout=pe1_file, stderr=null)
with open(pe2_output, "w") as pe2_file:
subprocess.check_call([ "bwa", "aln", genome_db, pe2_path ], stdout=pe2_file, stderr=null)
with open(bwa_output, "w") as bwa_file:
subprocess.check_call([ "bwa", "sampe",
genome_db,
pe1_output, pe2_output,
pe1_path, pe2_path ], stdout=bwa_file, stderr=null)
elapsed = time() - start
print 'Time elapsed for bwa aln/sampe: ', elapsed
sam_to_bam(bwa_output, bwa_output + ".bam")
pysam.sort(bwa_output + ".bam", output_path)
pysam.index(output_path + '.bam')