本文整理汇总了Python中pysam.AlignedSegment方法的典型用法代码示例。如果您正苦于以下问题:Python pysam.AlignedSegment方法的具体用法?Python pysam.AlignedSegment怎么用?Python pysam.AlignedSegment使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pysam
的用法示例。
在下文中一共展示了pysam.AlignedSegment方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignedSegment [as 别名]
def __init__(self, read):
"""Class for manipulating reads
Parameters
==========
read : pysam.AlignedSegment
"""
# redefine all properties of interest explicitly from pysam.AlignedSegment object as
# attributes of this class. The reason for this is that some of the AlignedSegment
# attributes have no __set__ methods, so are read only. Since this class is designed to
# modify some of these attributes, and since we want to maintain consistency across
# attributes, all attributes of interest are redefined here
self.cigartuples = np.array(read.cigartuples)
self.query_sequence = np.frombuffer(read.query_sequence.encode('ascii'), np.uint8)
self.reference_start = read.reference_start
self.reference_end = read.reference_end
if read.has_tag('MD'):
self.reference_sequence = np.frombuffer(read.get_reference_sequence().upper().encode('ascii'), np.uint8)
else:
self.reference_sequence = np.array([ord('N')] * (self.reference_end - self.reference_start))
# See self.vectorize
self.v = None
示例2: write_bam
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignedSegment [as 别名]
def write_bam(fname, alignments, header, bam=True):
"""Write a `.bam` file for a set of alignments.
:param fname: output filename.
:param alignments: a list of `Alignment` tuples.
:param header: bam header
:param bam: write bam, else sam
"""
mode = 'wb' if bam else 'w'
with pysam.AlignmentFile(fname, mode, header=header) as fh:
for ref_id, subreads in enumerate(alignments):
for aln in sorted(subreads, key=lambda x: x.rstart):
a = pysam.AlignedSegment()
a.reference_id = ref_id
a.query_name = aln.qname
a.query_sequence = aln.seq
a.reference_start = aln.rstart
a.cigarstring = aln.cigar
a.flag = aln.flag
a.mapping_quality = 60
fh.write(a)
if mode == 'wb':
pysam.index(fname)
示例3: printFastaEntry
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignedSegment [as 别名]
def printFastaEntry(sequence, name, index, conversions, readOutSAM, conversionRate):
#a = pysam.AlignedSegment()
print(name + "_" + str(index) + "_" + str(conversions),
"4",
"*",
"0",
"0",
"*",
"*",
"0",
"0",
sequence,
"F" * len(sequence),
"TC:i:" + str(conversions),
"ID:i:" + str(index),
"CR:f" + str(conversionRate),
file=readOutSAM, sep="\t")
示例4: get_blocks
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignedSegment [as 别名]
def get_blocks(self):
"""Mimic the get_blocks function from AlignedSegment.
Notes
=====
- Takes roughly 200us
"""
blocks = []
block_start = self.reference_start
block_length = 0
for _, length, consumes_read, consumes_ref in iterate_cigartuples(self.cigartuples, constants.cigar_consumption):
if consumes_read and consumes_ref:
block_length += length
elif consumes_read and not consumes_ref:
if block_length:
blocks.append((block_start, block_start + block_length))
block_start = block_start + block_length
block_length = 0
elif not consumes_read and consumes_ref:
if block_length:
blocks.append((block_start, block_start + block_length))
block_start = block_start + block_length + length
block_length = 0
else:
pass
if block_length:
blocks.append((block_start, block_start + block_length))
return blocks
示例5: parse_custom_SAM_tags
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignedSegment [as 别名]
def parse_custom_SAM_tags(sam_record: pysam.AlignedSegment):
""" Looks for the following tags in the read. Will be set to None if no tag
is found
fA: fraction As in the 10-bp interval following the alignment end
lC: custom label (type = string)
lA: custom allele label (type = string)
tS: flag indicating start site support (type = string)
tE: flag indicating end site support (typ = string)
"""
try:
fraction_As = sam_record.get_tag("fA")
except:
fraction_As = None
try:
custom_label = sam_record.get_tag("lC")
except:
custom_label = None
try:
allelic_label = sam_record.get_tag("lA")
except:
allelic_label = None
try:
start_support = sam_record.get_tag("tS")
except:
start_support = None
try:
end_support = sam_record.get_tag("tE")
except:
end_support = None
return fraction_As, custom_label, allelic_label, start_support, end_support
示例6: compute_transcript_end
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignedSegment [as 别名]
def compute_transcript_end(transcript=pysam.AlignedSegment):
""" Compute the position of the final transcript base relative to the genome,
taking strand into account. Position is 1-based. """
strand = "-" if transcript.is_reverse else "+"
if strand == '+':
return transcript.reference_end
if strand == '-':
return transcript.reference_start + 1 # (make 1-based)
示例7: check_read_quality
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignedSegment [as 别名]
def check_read_quality(sam_record: pysam.AlignedSegment, run_info):
""" Process an individual sam read and return quality attributes. """
read_ID = sam_record.query_name
flag = sam_record.flag
cigar = sam_record.cigarstring
seq = sam_record.query
read_length = sam_record.query_length
dataset = sam_record.get_tag('RG')
# Only use uniquely mapped transcripts
if flag not in [0, 16]:
return [dataset, read_ID, 0, 0, read_length, "NA", "NA"]
# Only use reads that are greater than or equal to length threshold
if read_length < run_info.min_length:
return [dataset, read_ID, 0, 1, read_length, "NA", "NA"]
# Locate the MD field of the sam transcript
try:
md_tag = sam_record.get_tag('MD')
except KeyError:
raise ValueError("SAM transcript %s lacks an MD tag" % read_ID)
# Only use reads where alignment coverage and identity exceed
# cutoffs
coverage = compute_alignment_coverage(cigar)
identity = compute_alignment_identity(md_tag, seq)
if coverage < run_info.min_coverage or \
identity < run_info.min_identity:
return [dataset, read_ID, 0, 1, read_length, coverage, identity]
# At this point, the read has passed the quality control
return [dataset, read_ID, 1, 1, read_length, coverage, identity]
示例8: get_introns
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignedSegment [as 别名]
def get_introns(sam_record: pysam.AlignedSegment, start, cigar):
""" Locates the jI field in a list of SAM fields or computes
it from the CIGAR string and start position if it isn't found.
Note that positions refer to start and endpoints of introns, not exons,
so adjustments are needed to avoid an off-by-one error if you want exons.
Example jI strings:
no introns: jI:B:i,-1
two introns: jI:B:i,167936516,167951806,167951862,167966628
Args:
sam_record: a pysam AlignedSegment
start: The start position of the transcript with respect to the
forward strand
cigar: SAM CIGAR string describing match operations to the reference
genome
Returns:
intron_list: intron starts and ends in a list (sorted order)
"""
try:
intron_list = sam_record.get_tag("jI").tolist()
except KeyError:
jI = compute_jI(start, cigar)
intron_list = [int(x) for x in jI.split(",")[1:]]
if intron_list[0] == -1:
return []
else:
return intron_list
示例9: _no_extension
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignedSegment [as 别名]
def _no_extension(self, read: pysam.AlignedSegment) -> str:
return read.get_tag(self.umibarcode_str)
示例10: _extension_Nbp
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignedSegment [as 别名]
def _extension_Nbp(self, read: pysam.AlignedSegment) -> str:
return read.get_tag(self.umibarcode_str) + read.query_alignment_sequence[:self.umi_bp]
示例11: _extension_Gene
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignedSegment [as 别名]
def _extension_Gene(self, read: pysam.AlignedSegment) -> str:
try:
return read.get_tag(self.umibarcode_str) + "_" + read.get_tag("GX") # catch the error
except KeyError:
return read.get_tag(self.umibarcode_str) + "_withoutGX"
示例12: _placeolder_umi
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignedSegment [as 别名]
def _placeolder_umi(self, read: pysam.AlignedSegment) -> str:
return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(vcy.PLACEHOLDER_UMI_LEN))
示例13: _extension_chr
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignedSegment [as 别名]
def _extension_chr(self, read: pysam.AlignedSegment) -> str:
return read.get_tag(self.umibarcode_str) + f"_{read.rname}:{read.reference_start // 10000000}" # catch the error
示例14: _bam_id_barcode
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignedSegment [as 别名]
def _bam_id_barcode(self, read: pysam.AlignedSegment) -> str:
return f"{self._current_bamfile}"
示例15: bam_iterator
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import AlignedSegment [as 别名]
def bam_iterator(bam):
"""Returns an iterator for the given SAM/BAM file (must be query-sorted).
In each call, the alignments of a single read are yielded as a 3-tuple: (list of primary pysam.AlignedSegment, list of supplementary pysam.AlignedSegment, list of secondary pysam.AlignedSegment)."""
alignments = bam.fetch(until_eof=True)
current_aln = next(alignments)
current_read_name = current_aln.query_name
current_prim = []
current_suppl = []
current_sec = []
if current_aln.is_secondary:
current_sec.append(current_aln)
elif current_aln.is_supplementary:
current_suppl.append(current_aln)
else:
current_prim.append(current_aln)
while True:
try:
next_aln = next(alignments)
next_read_name = next_aln.query_name
if next_read_name != current_read_name:
yield (current_prim, current_suppl, current_sec)
current_read_name = next_read_name
current_prim = []
current_suppl = []
current_sec = []
if next_aln.is_secondary:
current_sec.append(next_aln)
elif next_aln.is_supplementary:
current_suppl.append(next_aln)
else:
current_prim.append(next_aln)
except StopIteration:
break
yield (current_prim, current_suppl, current_sec)