本文整理汇总了Python中pysam.sort方法的典型用法代码示例。如果您正苦于以下问题:Python pysam.sort方法的具体用法?Python pysam.sort怎么用?Python pysam.sort使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pysam
的用法示例。
在下文中一共展示了pysam.sort方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: check_name_sorted
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def check_name_sorted(bam, p, prefix):
"""
Sort bam file by name if position sorted
"""
test_head = pysam.AlignmentFile(bam, 'rb')
p = str(p)
try:
test_head.header['HD']['SO']
except KeyError:
print ' sorting bam file'
pysam.sort('-@', p, '-n', bam, prefix + 'sorted.temp')
os.remove(bam)
os.rename(prefix + 'sorted.temp.bam', bam)
else:
if test_head.header['HD']['SO'] == 'queryname':
pass
else:
print ' sorting bam file'
pysam.sort('-@', p, '-n', bam, prefix + 'sorted.temp')
os.remove(bam)
os.rename(prefix + 'sorted.temp.bam', bam)
test_head.close()
示例2: sam_to_bam
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def sam_to_bam(self, sam_path, bam_path_prefix):
if self._sam_file_is_empty(sam_path) is True:
# pysam will generate an error if an emtpy SAM file will
# be converted. Due to this an empty bam file with the
# same header information will be generated from scratch
self._generate_empty_bam_file(sam_path, bam_path_prefix)
# Remove SAM file
os.remove(sam_path)
return
temp_unsorted_bam_path = self._temp_unsorted_bam_path(
bam_path_prefix)
# Generate unsorted BAM file
pysam.samtools.view(
"-b", "-o{}".format(temp_unsorted_bam_path), sam_path,
catch_stdout=False)
# Generate sorted BAM file
pysam.sort(temp_unsorted_bam_path, "-o", bam_path_prefix + ".bam")
# Generate index for BAM file
pysam.index("%s.bam" % bam_path_prefix)
# Remove unsorted BAM file
os.remove(temp_unsorted_bam_path)
# Remove SAM file
os.remove(sam_path)
示例3: bamSort
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def bamSort(outputBAM, log, newHeader, verbose):
tmp = outputBAM + "_tmp"
if(newHeader != None):
pyOutputBAM = pysam.AlignmentFile(outputBAM, "rb")
pyTmp = pysam.AlignmentFile(tmp, "wb", header=newHeader)
for read in pyOutputBAM:
pyTmp.write(read)
pyOutputBAM.close()
pyTmp.close()
else:
os.rename(outputBAM, tmp)
#run(" ".join(["samtools", "sort", "-@", str(threads) , tmp, replaceExtension(outFile, "")]), log, verbose=verbose, dry=dry)
run(" ".join(["samtools sort", "-o", outputBAM, tmp]), log, verbose=verbose, dry=False)
#pysam.sort(tmp, outputBAM) # @UndefinedVariable
removeFile(tmp)
示例4: sam_to_bam
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def sam_to_bam(filename):
"""Convert sam to bam"""
import pysam
infile = pysam.AlignmentFile(filename, "r")
name = os.path.splitext(filename)[0]+'.bam'
bamfile = pysam.AlignmentFile(name, "wb", template=infile)
for s in infile:
bamfile.write(s)
pysam.sort("-o", name, name)
pysam.index(name)
bamfile = pysam.AlignmentFile(name, "rb")
return
示例5: saveReads
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def saveReads(dataHub, nameExtra=None):
if dataHub.args.save_reads:
logging.info("* Saving relevant reads *")
for i, sample in enumerate(dataHub):
outbam_path = dataHub.args.save_reads
if not outbam_path.endswith(".bam"):
outbam_path += ".bam"
if len(dataHub.samples) > 1:
logging.debug("Using i = {}".format(i))
outbam_path = outbam_path.replace(".bam", ".{}.bam".format(i))
if nameExtra is not None:
outbam_path = outbam_path.replace(".bam", ".{}.bam".format(nameExtra))
logging.info(" Outpath: {}".format(outbam_path))
# print out just the reads we're interested for use later
bam_small = pysam.Samfile(outbam_path, "wb", template=sample.bam)
for read in sample.reads:
bam_small.write(read)
for read in sample.readStatistics.reads:
bam_small.write(read)
bam_small.close()
sorted_path = outbam_path.replace(".bam", ".sorted")
pysam.sort(outbam_path, sorted_path)
pysam.index(sorted_path+".bam")
示例6: print_read_stack
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def print_read_stack(reads, refseq=None, cutoff=0, by=None, label=''):
"""Print local read alignments from a sam file against the mapped sequence.
Args:
reads: dataframe of read counts with position info
refseq: sequence segment or gene that reads are mapped to
cutoff: don't display read with <=cutoff counts
by: column to sort reads by, e.g. 'reads', 'start' or 'end'
Returns:
string of printed read stacks to display or write to a file
"""
if refseq != None:
seqlen = len(refseq)
else:
seqlen = reads.end.max()
f = None
reads = reads[reads.reads>=cutoff]
if by is not None:
reads = reads.sort_values(by, ascending=False)
l = len(reads)
if l==0: return
s = ''
s += '%s unique reads\n' %l
s += '-------------------------------------\n'
s += refseq+'\n'
for idx,r in reads.iterrows():
seq = r.seq
count = r.reads
pos = find_subseq(refseq, seq)
if pos == -1: continue
i = len(seq)+pos
s += "{:>{w}} ({c})\n".format(seq,w=i,c=count)
s += '\n'
return s
示例7: featurecounts
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def featurecounts(samfile, gtffile):
"""Count aligned features with the featureCounts program.
Returns: a dataframe of counts"""
params = '-T 5 -t exon -g transcript_id'
cmd = 'featureCounts %s -a %s -o counts.txt %s' %(params, gtffile, samfile)
print (cmd)
result = subprocess.check_output(cmd, shell=True, executable='/bin/bash')
counts = pd.read_csv('counts.txt', sep='\t', comment='#')
counts = counts.rename(columns={samfile:'reads'})
counts = counts.sort('reads', ascending=False)
return counts
示例8: preprocess_sam
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def preprocess_sam(sam_files, datasets, tmp_dir = "talon_tmp/", n_threads = 0):
""" Copy and rename the provided SAM/BAM file(s), merge them, and index.
This is necessary in order to use Pybedtools commands on the reads.
The renaming is necessary in order to label the reads according to
their dataset."""
# Create the tmp dir
os.system("mkdir -p %s " % (tmp_dir))
# Copy and rename SAM files with dataset names to ensure correct RG tags
renamed_sams = []
for sam, dataset in zip(sam_files, datasets):
suffix = "." + sam.split(".")[-1]
if suffix == ".sam":
bam_copy = tmp_dir + dataset + "_unsorted.bam"
convert_to_bam(sam, bam_copy)
sam = bam_copy
sorted_bam = tmp_dir + dataset + ".bam"
pysam.sort("-@", str(n_threads), "-o", sorted_bam, sam)
renamed_sams.append(sorted_bam)
merged_bam = tmp_dir + "merged.bam"
merge_args = [merged_bam] + renamed_sams + ["-f", "-r", "-@", str(n_threads)]
# index_args = [merged_bam, "-@", str(n_threads)]
# Merge datasets and use -r option to include a read group tag
try:
pysam.merge(*merge_args)
pysam.index(merged_bam)
ts = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
print("[ %s ] Merged input SAM/BAM files" % (ts))
except:
raise RuntimeError(("Problem merging and indexing SAM/BAM files. "
"Check your file paths and make sure that all "
"files have headers."))
return merged_bam
示例9: partition_reads
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def partition_reads(sam_files, datasets, tmp_dir = "talon_tmp/", n_threads = 0):
""" Use bedtools merge to create non-overlapping intervals from all of the
transcripts in a series of SAM/BAM files. Then, iterate over the intervals
to extract all reads inside of them from the pysam object.
Returns:
- List of lists: sublists contain pysam reads from a given interval
- List of tuple intervals
- filename of merged bam file (to keep track of the header)
"""
merged_bam = preprocess_sam(sam_files, datasets, tmp_dir = tmp_dir,
n_threads = n_threads)
try:
all_reads = pybedtools.BedTool(merged_bam).bam_to_bed()
except Exception as e:
print(e)
raise RuntimeError("Problem opening sam file %s" % (merged_bam))
# Must sort the Bedtool object
sorted_reads = all_reads.sort()
intervals = sorted_reads.merge(d = 100000000)
# Now open each sam file using pysam and extract the reads
coords = []
read_groups = []
with pysam.AlignmentFile(merged_bam) as bam: # type: pysam.AlignmentFile
for interval in intervals:
reads = get_reads_in_interval(bam, interval.chrom,
interval.start, interval.end)
read_groups.append(reads)
coords.append((interval.chrom, interval.start + 1, interval.end))
return read_groups, coords, merged_bam
示例10: create_simple_bam
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def create_simple_bam(fname, calls):
"""Create a small bam file with RLE encoding coded in the qscores."""
ref_len = len(simple_data['ref'])
header = {'HD': {'VN': '1.0'},
'SQ': [{'LN': ref_len, 'SN': 'ref'}]}
tmp_file = '{}.tmp'.format(fname)
with pysam.AlignmentFile(
tmp_file, 'wb', reference_names=['ref', ],
reference_lengths=[ref_len, ], header=header) as output:
for index, basecall in enumerate(calls):
a =common.initialise_alignment(
query_name=basecall['query_name'],
reference_id=0,
reference_start=0,
query_sequence=basecall['seq'],
cigarstring=basecall['cigarstring'],
flag=basecall['flag'],
query_qualities=basecall['quality'],
tags=basecall['tags'])
output.write(a)
pysam.sort("-o", fname, tmp_file)
os.remove(tmp_file)
pysam.index(fname)
示例11: check_te_overlaps_dels
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def check_te_overlaps_dels(te, bamfile, te_list, prefix):
pybedtools.BedTool(bamfile).bam_to_bed().saveas(prefix + 'split.temp')
convert_split_pairbed(prefix + 'split.temp', prefix + 'split_bedpe.temp')
split = pybedtools.BedTool(prefix + 'split_bedpe.temp').each(append_origin, word='split').saveas()
create_deletion_coords(split, prefix + 'second_pass_del_coords.temp')
dels = pybedtools.BedTool(prefix + 'second_pass_del_coords.temp').sort()
intersections = dels.intersect(te, wb=True, nonamecheck=True, sorted=True)
os.remove(prefix + 'second_pass_del_coords.temp')
reads = []
for r in intersections:
if r[-3] in te_list:
reads.append(r[3])
else:
pass
return reads
示例12: refine
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def refine(options):
"""
Refine TE insertion and deletion calls within a group of related samples
Use indel calls from other samples in the group, inspect areas of the genome in samples where
indel was not called, and look for evidence of the same indel with much lower
read count threshold
"""
te = pybedtools.BedTool(options.te).sort()
names = readNames(options.all_samples)
if options.insertions is not False:
insertions = getOtherLines(names, options.insertions)
if options.deletions is not False:
deletions = getOtherLines(names, options.deletions) # format ([data], [inverse_accessions])
print "Processing "+options.name
chrom_sizes = check_bam(options.conc, options.proc, options.prefix)
check_bam(options.split, options.proc, options.prefix, make_new_index=True)
cov = calc_cov(options.conc, 100000, 120000)
concordant = pysam.AlignmentFile(options.conc, 'rb')
split_alignments = pysam.AlignmentFile(options.split, 'rb')
name_indexed = pysam.IndexedReads(split_alignments)
name_indexed.build()
if options.deletions is not False:
print " checking deletions"
process_missed(deletions, "deletion", concordant, split_alignments, name_indexed, options.name, te, cov/5, chrom_sizes, options.prefix)
else:
pass
if options.insertions is not False:
print " checking insertions"
process_missed(insertions, "insertion", concordant, split_alignments, name_indexed, options.name, te, cov/10, chrom_sizes, options.prefix)
else:
pass
示例13: check_bam
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def check_bam(bam, p, prefix, make_new_index=False):
"""
Sort and index bam file
returns dictionary of chromosome names and lengths
"""
# check if sorted
test_head = pysam.AlignmentFile(bam, 'rb')
chrom_sizes = {}
p = str(p)
for i in test_head.header['SQ']:
chrom_sizes[i['SN']] = int(i['LN'])
try:
test_head.header['HD']['SO']
except KeyError:
print ' sorting bam file'
pysam.sort('-@', p, bam, prefix + 'sorted.temp')
os.remove(bam)
os.rename(prefix + 'sorted.temp.bam', bam)
else:
if test_head.header['HD']['SO'] == 'coordinate':
pass
else:
print ' sorting bam file'
pysam.sort('-@', p, bam, prefix + 'sorted.temp')
os.remove(bam)
os.rename(prefix + 'sorted.temp.bam', bam)
test_head.close()
# check if indexed
if '{}.bai'.format(bam) in os.listdir('.') and make_new_index is False:
pass
else:
print ' indexing bam file'
pysam.index(bam)
return chrom_sizes
示例14: prepareBED
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def prepareBED(bed, slamSimBed, minLength):
utrs = []
for utr in BedIterator(bed):
utrs.append(utr)
utrs.sort(key=lambda x: (x.name, -x.getLength()))
outBed = open(slamSimBed, "w")
partList = []
lastUtr = None
for utr in utrs:
if utr.hasStrand() and utr.hasNonEmptyName():
currentUtr = utr.name
if currentUtr == lastUtr:
partList.append(utr)
else:
if(not lastUtr is None):
printUTR(partList[0], outBed, minLength)
partList = [utr]
lastUtr = currentUtr
else:
print("Warning: Invalid BED entry found: " + str(utr))
if(not lastUtr is None):
printUTR(partList[0], outBed, minLength)
outBed.close()
示例15: createGenomeWindows
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import sort [as 别名]
def createGenomeWindows(genome, outfile, windows):
statement = '''
cgat windows2gff
--genome=%(genome)s
--output-format=bed
--fixed-width-windows=%(windows)s
--log=%(outfile)s.log |
bedtools sort -i stdin |
gzip > %(outfile)s
'''
P.run()