本文整理汇总了Python中Bio.SearchIO._model.HSPFragment.alphabet方法的典型用法代码示例。如果您正苦于以下问题:Python HSPFragment.alphabet方法的具体用法?Python HSPFragment.alphabet怎么用?Python HSPFragment.alphabet使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio.SearchIO._model.HSPFragment
的用法示例。
在下文中一共展示了HSPFragment.alphabet方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse_hsps
# 需要导入模块: from Bio.SearchIO._model import HSPFragment [as 别名]
# 或者: from Bio.SearchIO._model.HSPFragment import alphabet [as 别名]
def parse_hsps(self, hit_placeholders):
"""Parse a HMMER2 hsp block, beginning with the hsp table."""
# HSPs may occur in different order than the hits
# so store Hit objects separately first
unordered_hits = {}
while self.read_next():
if self.line.startswith('Alignments') or \
self.line.startswith('Histogram') or \
self.line == '//':
break
if self.line.startswith('Model') or \
self.line.startswith('Sequence') or \
self.line.startswith('--------'):
continue
id_, domain, seq_f, seq_t, seq_compl, hmm_f, hmm_t, hmm_compl, \
score, evalue = self.line.split()
frag = HSPFragment(id_, self.qresult.id)
frag.alphabet = generic_protein
if self._meta['program'] == 'hmmpfam':
frag.hit_start = int(hmm_f) - 1
frag.hit_end = int(hmm_t)
frag.query_start = int(seq_f) - 1
frag.query_end = int(seq_t)
elif self._meta['program'] == 'hmmsearch':
frag.query_start = int(hmm_f) - 1
frag.query_end = int(hmm_t)
frag.hit_start = int(seq_f) - 1
frag.hit_end = int(seq_t)
hsp = HSP([frag])
hsp.evalue = float(evalue)
hsp.bitscore = float(score)
hsp.domain_index = int(domain.split('/')[0])
if self._meta['program'] == 'hmmpfam':
hsp.hit_endtype = hmm_compl
hsp.query_endtype = seq_compl
elif self._meta['program'] == 'hmmsearch':
hsp.query_endtype = hmm_compl
hsp.hit_endtype = seq_compl
if id_ not in unordered_hits:
placeholder = [ p for p in hit_placeholders if p.id_ == id_][0]
hit = placeholder.createHit([hsp])
unordered_hits[id_] = hit
else:
hit = unordered_hits[id_]
hsp.hit_description = hit.description
hit.append(hsp)
# The placeholder list is in the correct order, so use that order for
# the Hit objects in the qresult
for p in hit_placeholders:
self.qresult.append(unordered_hits[p.id_])
示例2: _create_qresult
# 需要导入模块: from Bio.SearchIO._model import HSPFragment [as 别名]
# 或者: from Bio.SearchIO._model.HSPFragment import alphabet [as 别名]
def _create_qresult(self, hit_blocks):
"""Create the Biopython data structures from the parsed data (PRIVATE)."""
query_id = self.query_id
hit_dict = OrderedDict()
for output_index, block in enumerate(hit_blocks):
hit_id = block['hit_id']
frag = HSPFragment(hit_id, query_id)
frag.alphabet = generic_protein
frag.query_start = block['query_start'] - 1
frag.query_end = block['query_end']
frag.hit_start = block['hit_start'] - 1
frag.hit_end = block['hit_end']
frag.hit = block['hit_seq']
frag.query = block['query_seq']
hsp = HSP([frag])
hsp.hit_id = hit_id
hsp.output_index = output_index
hsp.query_id = query_id
hsp.hit_description = block['description']
is_included = True # Should everything should be included?
hsp.is_included = is_included
hsp.evalue = block['evalue']
hsp.score = block['score']
hsp.prob = block['prob']
if hit_id not in hit_dict:
hit = Hit([hsp], hit_id)
hit.description = block['description']
hit.is_included = is_included
hit.evalue = block['evalue']
hit.score = block['score']
hit_dict[hit_id] = hit
else:
hit_dict[hit_id].append(hsp)
qresult = QueryResult(hit_dict.values(), query_id)
qresult.program = _PROGRAM
qresult.seq_len = self.seq_len
return [qresult]
示例3: _parse_hsp
# 需要导入模块: from Bio.SearchIO._model import HSPFragment [as 别名]
# 或者: from Bio.SearchIO._model.HSPFragment import alphabet [as 别名]
def _parse_hsp(self, root_hsp_frag_elem, query_id, hit_id):
"""Iterator that transforms Hit_hsps XML elements into HSP objects.
Arguments:
root_hsp_frag_elem -- Element object of the Hit_hsps tag.
query_id -- Query ID string.
hit_id -- Hit ID string.
"""
# Hit_hsps DTD:
# <!ELEMENT Hsp (
# Hsp_num,
# Hsp_bit-score,
# Hsp_score,
# Hsp_evalue,
# Hsp_query-from,
# Hsp_query-to,
# Hsp_hit-from,
# Hsp_hit-to,
# Hsp_pattern-from?,
# Hsp_pattern-to?,
# Hsp_query-frame?,
# Hsp_hit-frame?,
# Hsp_identity?,
# Hsp_positive?,
# Hsp_gaps?,
# Hsp_align-len?,
# Hsp_density?,
# Hsp_qseq,
# Hsp_hseq,
# Hsp_midline?)>
# if value is None, feed the loop below an empty list
if root_hsp_frag_elem is None:
root_hsp_frag_elem = []
for hsp_frag_elem in root_hsp_frag_elem:
coords = {} # temporary container for coordinates
frag = HSPFragment(hit_id, query_id)
for key, val_info in _ELEM_FRAG.items():
value = hsp_frag_elem.findtext(key)
caster = val_info[1]
# adjust 'from' and 'to' coordinates to 0-based ones
if value is not None:
if key.endswith('-from') or key.endswith('-to'):
# store coordinates for further processing
coords[val_info[0]] = caster(value)
continue
# recast only if value is not intended to be str
elif caster is not str:
value = caster(value)
setattr(frag, val_info[0], value)
# set the homology characters into aln_annotation dict
frag.aln_annotation['homology'] = \
hsp_frag_elem.findtext('Hsp_midline')
# process coordinates
# since 'x-from' could be bigger than 'x-to', we need to figure
# out which one is smaller/bigger since 'x_start' is always smaller
# than 'x_end'
for coord_type in ('query', 'hit', 'pattern'):
start_type = coord_type + '_start'
end_type = coord_type + '_end'
try:
start = coords[start_type]
end = coords[end_type]
except KeyError:
continue
else:
# convert to python range and setattr
setattr(frag, start_type, min(start, end) - 1)
setattr(frag, end_type, max(start, end))
# set alphabet, based on program
prog = self._meta.get('program')
if prog == 'blastn':
frag.alphabet = generic_dna
elif prog in ['blastp', 'blastx', 'tblastn', 'tblastx']:
frag.alphabet = generic_protein
hsp = HSP([frag])
for key, val_info in _ELEM_HSP.items():
value = hsp_frag_elem.findtext(key)
caster = val_info[1]
if value is not None:
if caster is not str:
value = caster(value)
setattr(hsp, val_info[0], value)
# delete element after we finish parsing it
hsp_frag_elem.clear()
yield hsp
示例4: _create_hits
# 需要导入模块: from Bio.SearchIO._model import HSPFragment [as 别名]
# 或者: from Bio.SearchIO._model.HSPFragment import alphabet [as 别名]
def _create_hits(self, hit_attrs, qid, qdesc):
"""Parses a HMMER3 hsp block, beginning with the hsp table."""
# read through until the beginning of the hsp block
self._read_until(lambda line: line.startswith('Internal pipeline')
or line.startswith('>>'))
# start parsing the hsp block
hit_list = []
while True:
if self.line.startswith('Internal pipeline'):
# by this time we should've emptied the hit attr list
assert len(hit_attrs) == 0
return hit_list
assert self.line.startswith('>>')
hid, hdesc = self.line[len('>> '):].split(' ', 1)
hdesc = hdesc.strip()
# read through the hsp table header and move one more line
self._read_until(lambda line:
line.startswith(' --- ------ ----- --------') or
line.startswith(' [No individual domains'))
self.line = read_forward(self.handle)
# parse the hsp table for the current hit
hsp_list = []
while True:
# break out of hsp parsing if there are no hits, it's the last hsp
# or it's the start of a new hit
if self.line.startswith(' [No targets detected that satisfy') or \
self.line.startswith(' [No individual domains') or \
self.line.startswith('Internal pipeline statistics summary:') or \
self.line.startswith(' Alignments for each domain:') or \
self.line.startswith('>>'):
hit_attr = hit_attrs.pop(0)
hit = Hit(hsp_list)
for attr, value in hit_attr.items():
if attr == "description":
cur_val = getattr(hit, attr)
if cur_val and value and cur_val.startswith(value):
continue
setattr(hit, attr, value)
if not hit:
hit.query_description = qdesc
hit_list.append(hit)
break
parsed = [x for x in self.line.strip().split(' ') if x]
assert len(parsed) == 16
# parsed column order:
# index, is_included, bitscore, bias, evalue_cond, evalue
# hmmfrom, hmmto, query_ends, hit_ends, alifrom, alito,
# envfrom, envto, acc_avg
frag = HSPFragment(hid, qid)
# set query and hit descriptions if they are defined / nonempty string
if qdesc:
frag.query_description = qdesc
if hdesc:
frag.hit_description = hdesc
# HMMER3 alphabets are always protein alphabets
frag.alphabet = generic_protein
# depending on whether the program is hmmsearch, hmmscan, or phmmer
# {hmm,ali}{from,to} can either be hit_{from,to} or query_{from,to}
# for hmmscan, hit is the hmm profile, query is the sequence
if self._meta.get('program') == 'hmmscan':
# adjust 'from' and 'to' coordinates to 0-based ones
frag.hit_start = int(parsed[6]) - 1
frag.hit_end = int(parsed[7])
frag.query_start = int(parsed[9]) - 1
frag.query_end = int(parsed[10])
elif self._meta.get('program') in ['hmmsearch', 'phmmer']:
# adjust 'from' and 'to' coordinates to 0-based ones
frag.hit_start = int(parsed[9]) - 1
frag.hit_end = int(parsed[10])
frag.query_start = int(parsed[6]) - 1
frag.query_end = int(parsed[7])
# strand is always 0, since HMMER now only handles protein
frag.hit_strand = frag.query_strand = 0
hsp = HSP([frag])
hsp.domain_index = int(parsed[0])
hsp.is_included = parsed[1] == '!'
hsp.bitscore = float(parsed[2])
hsp.bias = float(parsed[3])
hsp.evalue_cond = float(parsed[4])
hsp.evalue = float(parsed[5])
if self._meta.get('program') == 'hmmscan':
# adjust 'from' and 'to' coordinates to 0-based ones
hsp.hit_endtype = parsed[8]
hsp.query_endtype = parsed[11]
elif self._meta.get('program') in ['hmmsearch', 'phmmer']:
# adjust 'from' and 'to' coordinates to 0-based ones
hsp.hit_endtype = parsed[11]
hsp.query_endtype = parsed[8]
# adjust 'from' and 'to' coordinates to 0-based ones
hsp.env_start = int(parsed[12]) - 1
hsp.env_end = int(parsed[13])
hsp.env_endtype = parsed[14]
hsp.acc_avg = float(parsed[15])
#.........这里部分代码省略.........
示例5: _create_hsp
# 需要导入模块: from Bio.SearchIO._model import HSPFragment [as 别名]
# 或者: from Bio.SearchIO._model.HSPFragment import alphabet [as 别名]
def _create_hsp(hid, qid, psl):
# protein flag
is_protein = _is_protein(psl)
# strand
#if query is protein, strand is 0
if is_protein:
qstrand = 0
else:
qstrand = 1 if psl['strand'][0] == '+' else -1
# try to get hit strand, if it exists
try:
hstrand = 1 if psl['strand'][1] == '+' else -1
except IndexError:
hstrand = 1 # hit strand defaults to plus
# query block starts
qstarts = _reorient_starts(psl['qstarts'], \
psl['blocksizes'], psl['qsize'], qstrand)
# hit block starts
if len(psl['strand']) == 2:
hstarts = _reorient_starts(psl['tstarts'], \
psl['blocksizes'], psl['tsize'], hstrand)
else:
hstarts = psl['tstarts']
# set query and hit coords
# this assumes each block has no gaps (which seems to be the case)
assert len(qstarts) == len(hstarts) == len(psl['blocksizes'])
query_range_all = zip(qstarts, [x + y for x, y in \
zip(qstarts, psl['blocksizes'])])
hit_range_all = zip(hstarts, [x + y for x, y in \
zip(hstarts, psl['blocksizes'])])
# check length of sequences and coordinates, all must match
if 'tseqs' in psl and 'qseqs' in psl:
assert len(psl['tseqs']) == len(psl['qseqs']) == \
len(query_range_all) == len(hit_range_all)
else:
assert len(query_range_all) == len(hit_range_all)
frags = []
# iterating over query_range_all, but hit_range_all works just as well
for idx, qcoords in enumerate(query_range_all):
hseqlist = psl.get('tseqs')
hseq = '' if not hseqlist else hseqlist[idx]
qseqlist = psl.get('qseqs')
qseq = '' if not qseqlist else qseqlist[idx]
frag = HSPFragment(hid, qid, hit=hseq, query=qseq)
# set alphabet
frag.alphabet = generic_dna
# set coordinates
frag.query_start = qcoords[0]
frag.query_end = qcoords[1]
frag.hit_start = hit_range_all[idx][0]
frag.hit_end = hit_range_all[idx][1]
# and strands
frag.query_strand = qstrand
frag.hit_strand = hstrand
frags.append(frag)
# create hsp object
hsp = HSP(frags)
# check if start and end are set correctly
assert hsp.query_start == psl['qstart']
assert hsp.query_end == psl['qend']
assert hsp.hit_start == psl['tstart']
assert hsp.hit_end == psl['tend']
# and check block spans as well
assert hsp.query_span_all == hsp.hit_span_all == psl['blocksizes']
# set its attributes
hsp.match_num = psl['matches']
hsp.mismatch_num = psl['mismatches']
hsp.match_rep_num = psl['repmatches']
hsp.n_num = psl['ncount']
hsp.query_gapopen_num = psl['qnuminsert']
hsp.query_gap_num = psl['qbaseinsert']
hsp.hit_gapopen_num = psl['tnuminsert']
hsp.hit_gap_num = psl['tbaseinsert']
hsp.ident_num = psl['matches'] + psl['repmatches']
hsp.gapopen_num = psl['qnuminsert'] + psl['tnuminsert']
hsp.gap_num = psl['qbaseinsert'] + psl['tbaseinsert']
hsp.query_is_protein = is_protein
hsp.ident_pct = 100.0 - _calc_millibad(psl, is_protein) * 0.1
hsp.score = _calc_score(psl, is_protein)
# helper flag, for writing
hsp._has_hit_strand = len(psl['strand']) == 2
return hsp
示例6: __iter__
# 需要导入模块: from Bio.SearchIO._model import HSPFragment [as 别名]
# 或者: from Bio.SearchIO._model.HSPFragment import alphabet [as 别名]
def __iter__(self):
for rec in self.blast_iter:
# set attributes to SearchIO's
# get id and desc
if rec.query.startswith('>'):
rec.query = rec.query[1:]
try:
qid, qdesc = rec.query.split(' ', 1)
except ValueError:
qid, qdesc = rec.query, ''
qdesc = qdesc.replace('\n', '').replace('\r', '')
qresult = QueryResult(id=qid)
qresult.program = rec.application.lower()
qresult.target = rec.database
qresult.seq_len = rec.query_letters
qresult.version = rec.version
# determine alphabet based on program
if qresult.program == 'blastn':
alphabet = generic_dna
elif qresult.program in ['blastp', 'blastx', 'tblastn', 'tblastx']:
alphabet = generic_protein
# iterate over the 'alignments' (hits) and the hit table
for idx, aln in enumerate(rec.alignments):
# get id and desc
if aln.title.startswith('> '):
aln.title = aln.title[2:]
elif aln.title.startswith('>'):
aln.title = aln.title[1:]
try:
hid, hdesc = aln.title.split(' ', 1)
except ValueError:
hid, hdesc = aln.title, ''
hdesc = hdesc.replace('\n', '').replace('\r', '')
# iterate over the hsps and group them in a list
hsp_list = []
for bhsp in aln.hsps:
frag = HSPFragment(hid, qid)
frag.alphabet = alphabet
# set alignment length
frag.aln_span = bhsp.identities[1]
# set frames
try:
frag.query_frame = int(bhsp.frame[0])
except IndexError:
if qresult.program in ('blastp', 'tblastn'):
frag.query_frame = 0
else:
frag.query_frame = 1
try:
frag.hit_frame = int(bhsp.frame[1])
except IndexError:
if qresult.program in ('blastp', 'tblastn'):
frag.hit_frame = 0
else:
frag.hit_frame = 1
# set query coordinates
frag.query_start = min(bhsp.query_start,
bhsp.query_end) - 1
frag.query_end = max(bhsp.query_start, bhsp.query_end)
# set hit coordinates
frag.hit_start = min(bhsp.sbjct_start,
bhsp.sbjct_end) - 1
frag.hit_end = max(bhsp.sbjct_start, bhsp.sbjct_end)
# set query, hit sequences and its annotation
qseq = ''
hseq = ''
midline = ''
for seqtrio in zip(bhsp.query, bhsp.sbjct, bhsp.match):
qchar, hchar, mchar = seqtrio
if qchar == ' ' or hchar == ' ':
assert all(' ' == x for x in seqtrio)
else:
qseq += qchar
hseq += hchar
midline += mchar
frag.query, frag.hit = qseq, hseq
frag.aln_annotation['similarity'] = midline
# create HSP object with the fragment
hsp = HSP([frag])
hsp.evalue = bhsp.expect
hsp.bitscore = bhsp.bits
hsp.bitscore_raw = bhsp.score
# set gap
try:
hsp.gap_num = bhsp.gaps[0]
except IndexError:
hsp.gap_num = 0
# set identity
hsp.ident_num = bhsp.identities[0]
hsp.pos_num = bhsp.positives[0]
if hsp.pos_num is None:
hsp.pos_num = hsp[0].aln_span
hsp_list.append(hsp)
#.........这里部分代码省略.........