本文整理汇总了Python中pysam.AlignmentFile方法的典型用法代码示例。如果您正苦于以下问题:Python pysam.AlignmentFile方法的具体用法?Python pysam.AlignmentFile怎么用?Python pysam.AlignmentFile使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pysam
的用法示例。
在下文中一共展示了pysam.AlignmentFile方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: learn_shift_pair
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def learn_shift_pair(self, bam_file):
""" Learn the optimal fragment shift from paired end fragments. """
t0 = time.time()
print('Learning shift from paired-end sequences.', end='', flush=True)
# read proper pair template lengths
template_lengths = []
for align in pysam.AlignmentFile(bam_file):
if align.is_proper_pair and align.is_read1:
template_lengths.append(abs(align.template_length))
# compute mean
self.shift_forward = int(np.round(np.mean(template_lengths) / 2))
print(' Done in %ds.' % (time.time() - t0))
print('Shift: %d' % self.shift_forward, flush=True)
示例2: retrieve_loc_reads
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def retrieve_loc_reads(sv, bam, max_dep, threshold):
bp_dtype = [('chrom', '<U20'), ('start', int), ('end', int), ('dir', '<U1')]
sv_id, chr1, pos1, dir1, chr2, pos2, dir2, \
sv_class, orig_id, orig_pos1, orig_pos2 = [h[0] for h in dtypes.sv_dtype]
bp1 = np.array((sv[chr1], sv[pos1]-(threshold*2), \
sv[pos1]+(threshold*2), sv[dir1]), dtype=bp_dtype)
bp2 = np.array((sv[chr2], sv[pos2]-(threshold*2), \
sv[pos2]+(threshold*2), sv[dir2]), dtype=bp_dtype)
bamf = pysam.AlignmentFile(bam, "rb")
loc1_reads, err_code1 = count.get_loc_reads(bp1, bamf, max_dep)
loc2_reads, err_code2 = count.get_loc_reads(bp2, bamf, max_dep)
bamf.close()
sv_class = str(sv['classification'])
if err_code1 == 1 or err_code2 == 1:
sv['classification'] = 'HIDEP' if sv_class == '' else sv_class+';HIDEP'
elif err_code1 == 2 or err_code2 == 2:
sv['classification'] = 'READ_FETCH_FAILED' if sv_class == '' \
else sv_class+';READ_FETCH_FAILED'
return sv, loc1_reads, loc2_reads, err_code1, err_code2
示例3: isPaired
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def isPaired(bamfile, alignments=1000):
'''check if a *bamfile* contains paired end data
The method reads at most the first *alignments* and returns
True if any of the alignments are paired.
'''
samfile = pysam.AlignmentFile(bamfile)
n = 0
for read in samfile:
if read.is_paired:
break
n += 1
if n == alignments:
break
samfile.close()
return n != alignments
示例4: estimateInsertSizeDistribution
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def estimateInsertSizeDistribution(bamfile,
alignments=50000):
'''estimate insert size from first alignments in bam file.
returns mean and stddev of insert sizes.
'''
assert isPaired(bamfile), \
'can only estimate insert size from' \
'paired bam files'
samfile = pysam.AlignmentFile(bamfile)
# only get positive to avoid double counting
inserts = numpy.array(
[read.tlen for read in samfile.head(alignments)
if read.is_proper_pair and read.tlen > 0])
insert_mean, insert_std = numpy.mean(inserts), numpy.std(inserts)
print('Insert mean of %f, with standard deviation of %f inferred' %
(insert_mean, insert_std))
if insert_mean > 10000 or insert_std > 1000 or \
insert_mean < 1 or insert_std < 1:
print('''WARNING: anomalous insert sizes detected. Please
double check or consider setting values manually.''')
return insert_mean, insert_std
示例5: estimateTagSize
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def estimateTagSize(bamfile,
alignments=10,
multiple="error"):
'''estimate tag size from first alignments in file.'''
samfile = pysam.AlignmentFile(bamfile)
sizes = [read.rlen for read in samfile.head(alignments)]
mi, ma = min(sizes), max(sizes)
if mi == 0 and ma == 0:
sizes = [read.inferred_length for read in samfile.head(alignments)]
# remove 0 sizes (unaligned reads?)
sizes = [x for x in sizes if x > 0]
mi, ma = min(sizes), max(sizes)
if mi != ma:
if multiple == "error":
raise ValueError('multiple tag sizes in %s: %s' % (bamfile, sizes))
elif multiple == "mean":
mi = int(sum(sizes) / len(sizes))
return mi
示例6: write_anomalous_read_to_bam
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def write_anomalous_read_to_bam(bam,split_reads,span_reads,anom_reads,out):
print('Writing anom reads to file')
split_reads = np.unique(split_reads['query_name'])
span_reads = np.unique(span_reads['query_name'])
anom_reads = np.unique(anom_reads['query_name'])
# need to filter out any reads that were at any point marked as valid supporting reads
anom_reads = np.array([x for x in anom_reads if x not in split_reads])
anom_reads = np.array([x for x in anom_reads if x not in span_reads])
bamf = pysam.AlignmentFile(bam, "rb")
index = pysam.IndexedReads(bamf)
index.build()
anom_bam = pysam.AlignmentFile("%s_anom_reads.bam" % out, "wb", template=bamf)
for read_name in anom_reads:
for read in index.find(read_name):
anom_bam.write(read)
anom_bam.close()
示例7: pysam_open
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def pysam_open(alignment_file, in_format='BAM'):
"""Open SAM/BAM file using pysam.
:param alignment_file: Input file.
:param in_format: Format (SAM or BAM).
:returns: pysam.AlignmentFile
:rtype: pysam.AlignmentFile
"""
if in_format == 'BAM':
mode = "rb"
elif in_format == 'SAM':
mode = "r"
else:
raise Exception("Invalid format: {}".format(in_format))
aln_iter = pysam.AlignmentFile(alignment_file, mode)
return aln_iter
示例8: __init__
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def __init__(self, *args):
"""A class that is essentially pysam.AlignmentFile, with some added bonuses
This class inherits pysam.AlignmentFile and adds a little flair. Init such an object the
way you would an AlignmentFile, i.e. bam = bamops.BAMFileObject(path_to_bam)
"""
self.input_bam_path = args[0]
filesnpaths.is_file_exists(self.input_bam_path)
try:
pysam.AlignmentFile.__init__(self)
except ValueError as e:
raise ConfigError('Are you sure "%s" is a BAM file? Because samtools is not happy with it: """%s"""' % (self.input_bam_path, e))
try:
self.mapped
except ValueError:
raise ConfigError("It seems the BAM file is not indexed. See 'anvi-init-bam' script.")
示例9: write_reads_to_file
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def write_reads_to_file(read_groups, intervals, header_template, tmp_dir = "talon_tmp/"):
""" For each read group, iterate over the reads and write them to a file
named for the interval they belong to. This step is necessary because
Pysam objects cannot be pickled. """
tmp_dir = tmp_dir + "interval_files/"
os.system("mkdir -p %s " % (tmp_dir))
outbam_names = []
with pysam.AlignmentFile(header_template, "rb") as template:
for group, interval in zip(read_groups, intervals):
fname = tmp_dir + "_".join([str(x) for x in interval]) + ".bam"
outbam_names.append(fname)
with pysam.AlignmentFile(fname, "wb", template = template) as f:
for read in group:
f.write(read)
return outbam_names
示例10: test_read_labels
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def test_read_labels(self):
""" Given two sam files and two corresponding dataset labels, ensure
that the merged BAM file preserves the RB tags assigned by
pysam.merge """
sams = ["input_files/preprocess_sam/read1.sam",
"input_files/preprocess_sam/read2.sam" ]
datasets = ["dataset1", "dataset2"]
tmp_dir = "scratch/test_read_labels/test1/"
merged_bam = procsam.preprocess_sam(sams, datasets, tmp_dir = tmp_dir)
with pysam.AlignmentFile(merged_bam) as bam:
for entry in bam:
if entry.query_name == "read_1":
assert entry.get_tag("RG") == "dataset1"
elif entry.query_name == "read_2":
assert entry.get_tag("RG") == "dataset1"
elif entry.query_name == "read_3":
assert entry.get_tag("RG") == "dataset2"
else:
pytest.fail("Unexpected read encountered")
示例11: test_parse_custom_SAM_tags
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def test_parse_custom_SAM_tags():
""" Test that custom SAM tags are handled as expected """
sam_file = "input_files/test_parse_custom_SAM_tags/toy_reads.sam"
with pysam.AlignmentFile(sam_file, "rb") as sam:
for sam_record in sam:
fraction_As, custom_label, allelic_label, \
start_support, end_support = talon.parse_custom_SAM_tags(sam_record)
if sam_record.query_name == "read_1":
assert round(fraction_As,1) == 0.2
assert custom_label == "yes"
assert allelic_label == "paternal"
assert start_support == "yes"
assert end_support == "no"
elif sam_record.query_name == "read_4":
assert fraction_As == custom_label == allelic_label == None
assert start_support == end_support == None
else:
pytest.fail("Did not recognize read name")
示例12: get_bam_coverages
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def get_bam_coverages(options, sample, dataset):
window_skip = int(1e6)
window_size = 1000
skip_at_ends = int(1e6)
bam = pysam.AlignmentFile(dataset.bam)
print "getting coverages"
coverages = []
count = 0
for chrom, start, end in get_search_regions(options.reference,
window_skip, window_size, skip_at_ends):
coverages.append(bam.count(chrom, start, end))
if count % 1000 == 0:
print count
count += 1
return coverages
示例13: count_ref_reads
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def count_ref_reads(bampath, reference, chrom, start, end):
ref_counts = numpy.zeros(end-start)
non_ref_counts = numpy.zeros(end-start)
bam = pysam.AlignmentFile(bampath)
# stepper = "all" skips dupe, unmapped, secondary, and qcfail reads
start = max(0, start)
for col in bam.pileup(chrom, start, end, truncate=True, stepper="all"):
refnuc = reference.fasta[chrom][col.reference_pos].upper()
nuc_counts = collections.Counter()
for read in col.pileups:
if read.query_position is None:
# nuc_counts["indel"] += 1
pass
else:
nuc_counts[read.alignment.query_sequence[read.query_position]] += 1
ref_counts[col.reference_pos-start] = nuc_counts[refnuc]
non_ref_counts[col.reference_pos-start] = sum(nuc_counts.values()) - nuc_counts[refnuc]
return ref_counts, non_ref_counts
示例14: test_quick_consensus
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def test_quick_consensus():
bam = pysam.AlignmentFile("data/quick_consensus_test.bam")
consensus = MismatchCounts("4", 96549060, 96567077)
consensus.tally_reads(bam)
示例15: test_compare_quick_consensus
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignmentFile [as 别名]
def test_compare_quick_consensus():
from genomeview import quickconsensus
from genomeview import _quickconsensus
bam = pysam.AlignmentFile("data/quick_consensus_test.bam")
cython_consensus = _quickconsensus.MismatchCounts("4", 96549060, 96549060+1000)
python_consensus = quickconsensus.MismatchCounts("4", 96549060, 96549060+1000)
python_consensus.tally_reads(bam)
cython_consensus.tally_reads(bam)
assert (cython_consensus.counts == python_consensus.counts).all()
assert (cython_consensus.insertions == python_consensus.insertions).all()