本文整理汇总了Python中Bio.SearchIO._model.Hit类的典型用法代码示例。如果您正苦于以下问题:Python Hit类的具体用法?Python Hit怎么用?Python Hit使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Hit类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _parse_hit
def _parse_hit(self, root_hit_elem, query_id):
"""Yield a generator object that transforms Iteration_hits XML elements into Hit objects (PRIVATE).
:param root_hit_elem: root element of the Iteration_hits tag.
:type root_hit_elem: XML element tag
:param query_id: QueryResult ID of this Hit
:type query_id: string
"""
# Hit level processing
# Hits are stored in the Iteration_hits tag, with the following
# DTD
# <!ELEMENT Hit (
# Hit_num,
# Hit_id,
# Hit_def,
# Hit_accession,
# Hit_len,
# Hit_hsps?)>
# feed the loop below an empty list so iteration still works
if root_hit_elem is None:
root_hit_elem = []
for hit_elem in root_hit_elem:
# BLAST sometimes mangles the sequence IDs and descriptions, so we need
# to extract the actual values.
raw_hit_id = hit_elem.findtext('Hit_id')
raw_hit_desc = hit_elem.findtext('Hit_def')
if not self._use_raw_hit_ids:
ids, descs, blast_hit_id = _extract_ids_and_descs(raw_hit_id, raw_hit_desc)
else:
ids, descs, blast_hit_id = [raw_hit_id], [raw_hit_desc], raw_hit_id
hit_id, alt_hit_ids = ids[0], ids[1:]
hit_desc, alt_hit_descs = descs[0], descs[1:]
hsps = [hsp for hsp in
self._parse_hsp(hit_elem.find('Hit_hsps'),
query_id, hit_id)]
hit = Hit(hsps)
hit.description = hit_desc
hit._id_alt = alt_hit_ids
hit._description_alt = alt_hit_descs
hit.blast_id = blast_hit_id
for key, val_info in _ELEM_HIT.items():
value = hit_elem.findtext(key)
if value is not None:
caster = val_info[1]
# recast only if value is not intended to be str
if value is not None and caster is not str:
value = caster(value)
setattr(hit, val_info[0], value)
# delete element after we finish parsing it
hit_elem.clear()
yield hit
示例2: _create_qresult
def _create_qresult(self, hit_blocks):
"""Create the Biopython data structures from the parsed data (PRIVATE)."""
query_id = self.query_id
hit_dict = OrderedDict()
for output_index, block in enumerate(hit_blocks):
hit_id = block['hit_id']
frag = HSPFragment(hit_id, query_id)
frag.alphabet = generic_protein
frag.query_start = block['query_start'] - 1
frag.query_end = block['query_end']
frag.hit_start = block['hit_start'] - 1
frag.hit_end = block['hit_end']
frag.hit = block['hit_seq']
frag.query = block['query_seq']
hsp = HSP([frag])
hsp.hit_id = hit_id
hsp.output_index = output_index
hsp.query_id = query_id
hsp.hit_description = block['description']
is_included = True # Should everything should be included?
hsp.is_included = is_included
hsp.evalue = block['evalue']
hsp.score = block['score']
hsp.prob = block['prob']
if hit_id not in hit_dict:
hit = Hit([hsp], hit_id)
hit.description = block['description']
hit.is_included = is_included
hit.evalue = block['evalue']
hit.score = block['score']
hit_dict[hit_id] = hit
else:
hit_dict[hit_id].append(hsp)
qresult = QueryResult(hit_dict.values(), query_id)
qresult.program = _PROGRAM
qresult.seq_len = self.seq_len
return [qresult]
示例3: createHit
def createHit(self, hsp_list):
hit = Hit(hsp_list)
hit.id_ = self.id_
hit.evalue = self.evalue
hit.bitscore = self.bitscore
if self.description:
hit.description = self.description
hit.domain_obs_num = self.domain_obs_num
return hit
示例4: _parse_hit
def _parse_hit(self, root_hit_elem, query_id):
"""Generator that transforms Iteration_hits XML elements into Hit objects.
Arguments:
root_hit_elem -- Element object of the Iteration_hits tag.
query_id -- String of QueryResult ID of this Hit
"""
# Hit level processing
# Hits are stored in the Iteration_hits tag, with the following
# DTD
# <!ELEMENT Hit (
# Hit_num,
# Hit_id,
# Hit_def,
# Hit_accession,
# Hit_len,
# Hit_hsps?)>
# feed the loop below an empty list so iteration still works
if root_hit_elem is None:
root_hit_elem = []
for hit_elem in root_hit_elem:
# create empty hit object
hit_id = hit_elem.findtext('Hit_id')
hit_desc = hit_elem.findtext('Hit_def')
# handle blast searches against databases with Blast's IDs
if hit_id.startswith('gnl|BL_ORD_ID|'):
blast_hit_id = hit_id
id_desc = hit_desc.split(' ', 1)
hit_id = id_desc[0]
try:
hit_desc = id_desc[1]
except IndexError:
hit_desc = ''
else:
blast_hit_id = ''
hsps = [hsp for hsp in
self._parse_hsp(hit_elem.find('Hit_hsps'),
query_id, hit_id)]
hit = Hit(hsps)
hit.description = hit_desc
# blast_hit_id is only set if the hit ID is Blast-generated
hit._blast_id = blast_hit_id
for key, val_info in _ELEM_HIT.items():
value = hit_elem.findtext(key)
if value is not None:
caster = val_info[1]
# recast only if value is not intended to be str
if value is not None and caster is not str:
value = caster(value)
setattr(hit, val_info[0], value)
# delete element after we finish parsing it
hit_elem.clear()
yield hit
示例5: _create_hits
def _create_hits(self, hit_attrs, qid, qdesc):
"""Parses a HMMER3 hsp block, beginning with the hsp table."""
# read through until the beginning of the hsp block
self._read_until(lambda line: line.startswith('Internal pipeline')
or line.startswith('>>'))
# start parsing the hsp block
hit_list = []
while True:
if self.line.startswith('Internal pipeline'):
# by this time we should've emptied the hit attr list
assert len(hit_attrs) == 0
return hit_list
assert self.line.startswith('>>')
hid, hdesc = self.line[len('>> '):].split(' ', 1)
hdesc = hdesc.strip()
# read through the hsp table header and move one more line
self._read_until(lambda line:
line.startswith(' --- ------ ----- --------') or
line.startswith(' [No individual domains'))
self.line = read_forward(self.handle)
# parse the hsp table for the current hit
hsp_list = []
while True:
# break out of hsp parsing if there are no hits, it's the last hsp
# or it's the start of a new hit
if self.line.startswith(' [No targets detected that satisfy') or \
self.line.startswith(' [No individual domains') or \
self.line.startswith('Internal pipeline statistics summary:') or \
self.line.startswith(' Alignments for each domain:') or \
self.line.startswith('>>'):
hit_attr = hit_attrs.pop(0)
hit = Hit(hsp_list)
for attr, value in hit_attr.items():
if attr == "description":
cur_val = getattr(hit, attr)
if cur_val and value and cur_val.startswith(value):
continue
setattr(hit, attr, value)
if not hit:
hit.query_description = qdesc
hit_list.append(hit)
break
parsed = [x for x in self.line.strip().split(' ') if x]
assert len(parsed) == 16
# parsed column order:
# index, is_included, bitscore, bias, evalue_cond, evalue
# hmmfrom, hmmto, query_ends, hit_ends, alifrom, alito,
# envfrom, envto, acc_avg
frag = HSPFragment(hid, qid)
# set query and hit descriptions if they are defined / nonempty string
if qdesc:
frag.query_description = qdesc
if hdesc:
frag.hit_description = hdesc
# HMMER3 alphabets are always protein alphabets
frag.alphabet = generic_protein
# depending on whether the program is hmmsearch, hmmscan, or phmmer
# {hmm,ali}{from,to} can either be hit_{from,to} or query_{from,to}
# for hmmscan, hit is the hmm profile, query is the sequence
if self._meta.get('program') == 'hmmscan':
# adjust 'from' and 'to' coordinates to 0-based ones
frag.hit_start = int(parsed[6]) - 1
frag.hit_end = int(parsed[7])
frag.query_start = int(parsed[9]) - 1
frag.query_end = int(parsed[10])
elif self._meta.get('program') in ['hmmsearch', 'phmmer']:
# adjust 'from' and 'to' coordinates to 0-based ones
frag.hit_start = int(parsed[9]) - 1
frag.hit_end = int(parsed[10])
frag.query_start = int(parsed[6]) - 1
frag.query_end = int(parsed[7])
# strand is always 0, since HMMER now only handles protein
frag.hit_strand = frag.query_strand = 0
hsp = HSP([frag])
hsp.domain_index = int(parsed[0])
hsp.is_included = parsed[1] == '!'
hsp.bitscore = float(parsed[2])
hsp.bias = float(parsed[3])
hsp.evalue_cond = float(parsed[4])
hsp.evalue = float(parsed[5])
if self._meta.get('program') == 'hmmscan':
# adjust 'from' and 'to' coordinates to 0-based ones
hsp.hit_endtype = parsed[8]
hsp.query_endtype = parsed[11]
elif self._meta.get('program') in ['hmmsearch', 'phmmer']:
# adjust 'from' and 'to' coordinates to 0-based ones
hsp.hit_endtype = parsed[11]
hsp.query_endtype = parsed[8]
# adjust 'from' and 'to' coordinates to 0-based ones
hsp.env_start = int(parsed[12]) - 1
hsp.env_end = int(parsed[13])
hsp.env_endtype = parsed[14]
hsp.acc_avg = float(parsed[15])
#.........这里部分代码省略.........
示例6: _parse_qresult
def _parse_qresult(self):
"""Generator function that returns QueryResult objects."""
# state values, determines what to do for each line
state_EOF = 0
state_QRES_NEW = 1
state_QRES_SAME = 3
state_HIT_NEW = 2
state_HIT_SAME = 4
# initial dummy values
qres_state = None
file_state = None
prev_qid, prev_hid = None, None
cur, prev = None, None
hit_list, hsp_list = [], []
while True:
# store previous line's parsed values for all lines after the first
if cur is not None:
prev = cur
prev_qid = cur_qid
prev_hid = cur_hid
# only parse the result row if it's not EOF
if self.line:
cur = self._parse_row()
cur_qid = cur['qname']
cur_hid = cur['tname']
else:
file_state = state_EOF
# mock values, since we have nothing to parse
cur_qid, cur_hid = None, None
# get the state of hit and qresult
if prev_qid != cur_qid:
qres_state = state_QRES_NEW
else:
qres_state = state_QRES_SAME
# new hits are hits with different ids or hits in a new qresult
if prev_hid != cur_hid or qres_state == state_QRES_NEW:
hit_state = state_HIT_NEW
else:
hit_state = state_HIT_SAME
if prev is not None:
# create fragment and HSP and set their attributes
hsp = _create_hsp(prev_hid, prev_qid, prev)
hsp_list.append(hsp)
if hit_state == state_HIT_NEW:
# create Hit and set its attributes
hit = Hit(hsp_list)
hit.seq_len = prev['tsize']
hit_list.append(hit)
hsp_list = []
# create qresult and yield if we're at a new qresult or at EOF
if qres_state == state_QRES_NEW or file_state == state_EOF:
qresult = QueryResult(prev_qid)
for hit in hit_list:
qresult.absorb(hit)
qresult.seq_len = prev['qsize']
yield qresult
# if we're at EOF, break
if file_state == state_EOF:
break
hit_list = []
self.line = self.handle.readline()
示例7: _parse_hit
def _parse_hit(self, root_hit_elem, query_id):
"""Generator that transforms Iteration_hits XML elements into Hit objects.
:param root_hit_elem: root element of the Iteration_hits tag.
:type root_hit_elem: XML element tag
:param query_id: QueryResult ID of this Hit
:type query_id: string
"""
# Hit level processing
# Hits are stored in the Iteration_hits tag, with the following
# DTD
# <!ELEMENT Hit (
# Hit_num,
# Hit_id,
# Hit_def,
# Hit_accession,
# Hit_len,
# Hit_hsps?)>
# feed the loop below an empty list so iteration still works
if root_hit_elem is None:
root_hit_elem = []
for hit_elem in root_hit_elem:
# create empty hit object
hit_id = hit_elem.findtext('Hit_id')
hit_desc = hit_elem.findtext('Hit_def')
# handle blast searches against databases with Blast's IDs
if hit_id.startswith('gnl|BL_ORD_ID|'):
blast_hit_id = hit_id
id_desc = hit_desc.split(' ', 1)
hit_id = id_desc[0]
try:
hit_desc = id_desc[1]
except IndexError:
hit_desc = ''
else:
blast_hit_id = ''
# combine primary ID and defline first before splitting
full_id_desc = hit_id + ' ' + hit_desc
id_descs = [(x.strip(), y.strip()) for x, y in \
[a.split(' ', 1) for a in full_id_desc.split(' >')]]
hit_id, hit_desc = id_descs[0]
hsps = [hsp for hsp in
self._parse_hsp(hit_elem.find('Hit_hsps'),
query_id, hit_id)]
hit = Hit(hsps)
hit.description = hit_desc
hit._id_alt = [x[0] for x in id_descs[1:]]
hit._description_alt = [x[1] for x in id_descs[1:]]
# blast_hit_id is only set if the hit ID is Blast-generated
hit._blast_id = blast_hit_id
for key, val_info in _ELEM_HIT.items():
value = hit_elem.findtext(key)
if value is not None:
caster = val_info[1]
# recast only if value is not intended to be str
if value is not None and caster is not str:
value = caster(value)
setattr(hit, val_info[0], value)
# delete element after we finish parsing it
hit_elem.clear()
yield hit
示例8: _parse_hit
def _parse_hit(self, query_id):
while True:
self.line = self.handle.readline()
if self.line.startswith('>>'):
break
state = _STATE_NONE
strand = None
hsp_list = []
while True:
peekline = self.handle.peekline()
# yield hit if we've reached the start of a new query or
# the end of the search
if peekline.strip() in [">>><<<", ">>>///"] or \
(not peekline.startswith('>>>') and '>>>' in peekline):
# append last parsed_hsp['hit']['seq'] line
if state == _STATE_HIT_BLOCK:
parsed_hsp['hit']['seq'] += self.line.strip()
elif state == _STATE_CONS_BLOCK:
hsp.aln_annotation['similarity'] += \
self.line.strip('\r\n')
# process HSP alignment and coordinates
_set_hsp_seqs(hsp, parsed_hsp, self._preamble['program'])
hit = Hit(hsp_list)
hit.description = hit_desc
hit.seq_len = seq_len
yield hit, strand
hsp_list = []
break
# yield hit and create a new one if we're still in the same query
elif self.line.startswith('>>'):
# try yielding, if we have hsps
if hsp_list:
_set_hsp_seqs(hsp, parsed_hsp, self._preamble['program'])
hit = Hit(hsp_list)
hit.description = hit_desc
hit.seq_len = seq_len
yield hit, strand
hsp_list = []
# try to get the hit id and desc, and handle cases without descs
try:
hit_id, hit_desc = self.line[2:].strip().split(' ', 1)
except ValueError:
hit_id = self.line[2:].strip().split(' ', 1)[0]
hit_desc = ''
# create the HSP object for Hit
frag = HSPFragment(hit_id, query_id)
hsp = HSP([frag])
hsp_list.append(hsp)
# set or reset the state to none
state = _STATE_NONE
parsed_hsp = {'query': {}, 'hit': {}}
# create and append a new HSP if line starts with '>--'
elif self.line.startswith('>--'):
# set seq attributes of previous hsp
_set_hsp_seqs(hsp, parsed_hsp, self._preamble['program'])
# and create a new one
frag = HSPFragment(hit_id, query_id)
hsp = HSP([frag])
hsp_list.append(hsp)
# set the state ~ none yet
state = _STATE_NONE
parsed_hsp = {'query': {}, 'hit': {}}
# this is either query or hit data in the HSP, depending on the state
elif self.line.startswith('>'):
if state == _STATE_NONE:
# make sure it's the correct query
assert query_id.startswith(self.line[1:].split(' ')[0]), \
"%r vs %r" % (query_id, self.line)
state = _STATE_QUERY_BLOCK
parsed_hsp['query']['seq'] = ''
elif state == _STATE_QUERY_BLOCK:
# make sure it's the correct hit
assert hit_id.startswith(self.line[1:].split(' ')[0])
state = _STATE_HIT_BLOCK
parsed_hsp['hit']['seq'] = ''
# check for conservation block
elif self.line.startswith('; al_cons'):
state = _STATE_CONS_BLOCK
hsp.fragment.aln_annotation['similarity'] = ''
elif self.line.startswith(';'):
# Fasta outputs do not make a clear distinction between Hit
# and HSPs, so we check the attribute names to determine
# whether it belongs to a Hit or HSP
regx = re.search(_RE_ATTR, self.line.strip())
name = regx.group(1)
value = regx.group(2)
# for values before the '>...' query block
if state == _STATE_NONE:
if name in _HSP_ATTR_MAP:
attr_name, caster = _HSP_ATTR_MAP[name]
if caster is not str:
value = caster(value)
if name in ['_ident', '_sim']:
value *= 100
setattr(hsp, attr_name, value)
# otherwise, pool the values for processing later
elif state == _STATE_QUERY_BLOCK:
parsed_hsp['query'][name] = value
#.........这里部分代码省略.........
示例9: __iter__
#.........这里部分代码省略.........
qid, qdesc = rec.query.split(' ', 1)
except ValueError:
qid, qdesc = rec.query, ''
qdesc = qdesc.replace('\n', '').replace('\r', '')
qresult = QueryResult(id=qid)
qresult.program = rec.application.lower()
qresult.target = rec.database
qresult.seq_len = rec.query_letters
qresult.version = rec.version
# determine alphabet based on program
if qresult.program == 'blastn':
alphabet = generic_dna
elif qresult.program in ['blastp', 'blastx', 'tblastn', 'tblastx']:
alphabet = generic_protein
# iterate over the 'alignments' (hits) and the hit table
for idx, aln in enumerate(rec.alignments):
# get id and desc
if aln.title.startswith('> '):
aln.title = aln.title[2:]
elif aln.title.startswith('>'):
aln.title = aln.title[1:]
try:
hid, hdesc = aln.title.split(' ', 1)
except ValueError:
hid, hdesc = aln.title, ''
hdesc = hdesc.replace('\n', '').replace('\r', '')
# iterate over the hsps and group them in a list
hsp_list = []
for bhsp in aln.hsps:
frag = HSPFragment(hid, qid)
frag.alphabet = alphabet
# set alignment length
frag.aln_span = bhsp.identities[1]
# set frames
try:
frag.query_frame = int(bhsp.frame[0])
except IndexError:
if qresult.program in ('blastp', 'tblastn'):
frag.query_frame = 0
else:
frag.query_frame = 1
try:
frag.hit_frame = int(bhsp.frame[1])
except IndexError:
if qresult.program in ('blastp', 'tblastn'):
frag.hit_frame = 0
else:
frag.hit_frame = 1
# set query coordinates
frag.query_start = min(bhsp.query_start,
bhsp.query_end) - 1
frag.query_end = max(bhsp.query_start, bhsp.query_end)
# set hit coordinates
frag.hit_start = min(bhsp.sbjct_start,
bhsp.sbjct_end) - 1
frag.hit_end = max(bhsp.sbjct_start, bhsp.sbjct_end)
# set query, hit sequences and its annotation
qseq = ''
hseq = ''
midline = ''
for seqtrio in zip(bhsp.query, bhsp.sbjct, bhsp.match):
qchar, hchar, mchar = seqtrio
if qchar == ' ' or hchar == ' ':
assert all(' ' == x for x in seqtrio)
else:
qseq += qchar
hseq += hchar
midline += mchar
frag.query, frag.hit = qseq, hseq
frag.aln_annotation['similarity'] = midline
# create HSP object with the fragment
hsp = HSP([frag])
hsp.evalue = bhsp.expect
hsp.bitscore = bhsp.bits
hsp.bitscore_raw = bhsp.score
# set gap
try:
hsp.gap_num = bhsp.gaps[0]
except IndexError:
hsp.gap_num = 0
# set identity
hsp.ident_num = bhsp.identities[0]
hsp.pos_num = bhsp.positives[0]
if hsp.pos_num is None:
hsp.pos_num = hsp[0].aln_span
hsp_list.append(hsp)
hit = Hit(hsp_list)
hit.seq_len = aln.length
hit.description = hdesc
qresult.append(hit)
qresult.description = qdesc
yield qresult