本文整理汇总了Python中Bio.SearchIO._model.QueryResult类的典型用法代码示例。如果您正苦于以下问题:Python QueryResult类的具体用法?Python QueryResult怎么用?Python QueryResult使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了QueryResult类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _parse_qresult
def _parse_qresult(self):
"""Parses a HMMER3 query block."""
self._read_until(lambda line: line.startswith('Query:'))
while self.line:
# get query id and length
regx = re.search(_QRE_ID_LEN, self.line)
qid = regx.group(1).strip()
# store qresult attributes
qresult_attrs = {
'seq_len': int(regx.group(2)),
'program': self._meta.get('program'),
'version': self._meta.get('version'),
'target': self._meta.get('target'),
}
# get description and accession, if they exist
desc = '' # placeholder
while not self.line.startswith('Scores for '):
self.line = read_forward(self.handle)
if self.line.startswith('Accession:'):
acc = self.line.strip().split(' ', 1)[1]
qresult_attrs['accession'] = acc.strip()
elif self.line.startswith('Description:'):
desc = self.line.strip().split(' ', 1)[1]
qresult_attrs['description'] = desc.strip()
# parse the query hits
while self.line and '//' not in self.line:
hit_list = self._parse_hit(qid)
# read through the statistics summary
# TODO: parse and store this information?
if self.line.startswith('Internal pipeline'):
while self.line and '//' not in self.line:
self.line = read_forward(self.handle)
# create qresult, set its attributes and yield
# not initializing hit_list directly to handle empty hits
# (i.e. need to set its query description manually)
qresult = QueryResult(id=qid)
for hit in hit_list:
if not hit:
hit.query_description = qresult.description
qresult.append(hit)
for attr, value in qresult_attrs.items():
setattr(qresult, attr, value)
yield qresult
self.line = read_forward(self.handle)
示例2: parse_qresult
def parse_qresult(self):
"""Parse a HMMER2 query block."""
while self.read_next():
if not self.line.startswith('Query'):
raise StopIteration()
_, id_ = self.parse_key_value()
self.qresult = QueryResult(id=id_)
description = None
while self.read_next() and not self.line.startswith('Scores'):
if self.line.startswith('Accession'):
self.qresult.accession = self.parse_key_value()[1]
if self.line.startswith('Description'):
description = self.parse_key_value()[1]
hit_placeholders = self.parse_hits()
if len(hit_placeholders) > 0:
self.parse_hsps(hit_placeholders)
self.parse_hsp_alignments()
while not self.line.startswith('Query'):
self.read_next()
if not self.line:
break
self.buf.append(self.line)
if description is not None:
self.qresult.description = description
yield self.qresult
示例3: _create_qresult
def _create_qresult(self, hit_blocks):
"""Create the Biopython data structures from the parsed data (PRIVATE)."""
query_id = self.query_id
hit_dict = OrderedDict()
for output_index, block in enumerate(hit_blocks):
hit_id = block['hit_id']
frag = HSPFragment(hit_id, query_id)
frag.alphabet = generic_protein
frag.query_start = block['query_start'] - 1
frag.query_end = block['query_end']
frag.hit_start = block['hit_start'] - 1
frag.hit_end = block['hit_end']
frag.hit = block['hit_seq']
frag.query = block['query_seq']
hsp = HSP([frag])
hsp.hit_id = hit_id
hsp.output_index = output_index
hsp.query_id = query_id
hsp.hit_description = block['description']
is_included = True # Should everything should be included?
hsp.is_included = is_included
hsp.evalue = block['evalue']
hsp.score = block['score']
hsp.prob = block['prob']
if hit_id not in hit_dict:
hit = Hit([hsp], hit_id)
hit.description = block['description']
hit.is_included = is_included
hit.evalue = block['evalue']
hit.score = block['score']
hit_dict[hit_id] = hit
else:
hit_dict[hit_id].append(hsp)
qresult = QueryResult(hit_dict.values(), query_id)
qresult.program = _PROGRAM
qresult.seq_len = self.seq_len
return [qresult]
示例4: _parse_qresult
def _parse_qresult(self):
"""Parses query results."""
# parse the queries
for event, qresult_elem in self.xml_iter:
# </Iteration> marks the end of a single query
# which means we can process it
if event == 'end' and qresult_elem.tag == 'Iteration':
# we'll use the following schema
# <!ELEMENT Iteration (
# Iteration_iter-num,
# Iteration_query-ID?,
# Iteration_query-def?,
# Iteration_query-len?,
# Iteration_hits?,
# Iteration_stat?,
# Iteration_message?)>
# assign query attributes with fallbacks
query_id = qresult_elem.findtext('Iteration_query-ID')
if query_id is None:
query_id = self._fallback['id']
query_desc = qresult_elem.findtext('Iteration_query-def')
if query_desc is None:
query_desc = self._fallback['description']
query_len = qresult_elem.findtext('Iteration_query-len')
if query_len is None:
query_len = self._fallback['len']
# handle blast searches against databases with Blast's IDs
# 'Query_' marks the beginning of a BLAST+-generated ID,
# 'lcl|' marks the beginning of a BLAST legacy-generated ID
if query_id.startswith('Query_') or query_id.startswith('lcl|'):
# store the Blast-generated query ID
blast_query_id = query_id
id_desc = query_desc.split(' ', 1)
query_id = id_desc[0]
try:
query_desc = id_desc[1]
except IndexError:
query_desc = ''
else:
blast_query_id = ''
hit_list, key_list = [], []
for hit in self._parse_hit(qresult_elem.find('Iteration_hits'),
query_id):
if hit:
# need to keep track of hit IDs, since there could be duplicates,
if hit.id in key_list:
warnings.warn("Adding hit with BLAST-generated ID "
"%r since hit ID %r is already present "
"in query %r. Your BLAST database may contain "
"duplicate entries." %
(hit._blast_id, hit.id, query_id), BiopythonParserWarning)
# fallback to Blast-generated IDs, if the ID is already present
# and restore the desc, too
hit.description = '%s %s' % (hit.id, hit.description)
hit.id = hit._blast_id
# and change the hit_id of the HSPs contained
for hsp in hit:
hsp.hit_id = hit._blast_id
else:
key_list.append(hit.id)
hit_list.append(hit)
# create qresult and assign its attributes
qresult = QueryResult(hit_list, query_id)
qresult.description = query_desc
qresult.seq_len = int(query_len)
qresult._blast_id = blast_query_id
for key, value in self._meta.items():
setattr(qresult, key, value)
# statistics are stored in Iteration_stat's 'grandchildren' with the
# following DTD
# <!ELEMENT Statistics (
# Statistics_db-num,
# Statistics_db-len,
# Statistics_hsp-len,
# Statistics_eff-space,
# Statistics_kappa,
# Statistics_lambda,
# Statistics_entropy)>
stat_iter_elem = qresult_elem.find('Iteration_stat')
if stat_iter_elem is not None:
stat_elem = stat_iter_elem.find('Statistics')
for key, val_info in _ELEM_QRESULT_OPT.items():
value = stat_elem.findtext(key)
if value is not None:
caster = val_info[1]
# recast only if value is not intended to be str
if value is not None and caster is not str:
value = caster(value)
setattr(qresult, val_info[0], value)
#.........这里部分代码省略.........
示例5: _parse_qresult
def _parse_qresult(self):
"""Generator function that returns QueryResult objects."""
# state values, determines what to do for each line
state_EOF = 0
state_QRES_NEW = 1
state_QRES_SAME = 3
state_HIT_NEW = 2
state_HIT_SAME = 4
# initial dummy values
qres_state = None
file_state = None
prev_qid, prev_hid = None, None
cur, prev = None, None
hit_list, hsp_list = [], []
while True:
# store previous line's parsed values for all lines after the first
if cur is not None:
prev = cur
prev_qid = cur_qid
prev_hid = cur_hid
# only parse the result row if it's not EOF
if self.line:
cur = self._parse_row()
cur_qid = cur['qname']
cur_hid = cur['tname']
else:
file_state = state_EOF
# mock values, since we have nothing to parse
cur_qid, cur_hid = None, None
# get the state of hit and qresult
if prev_qid != cur_qid:
qres_state = state_QRES_NEW
else:
qres_state = state_QRES_SAME
# new hits are hits with different ids or hits in a new qresult
if prev_hid != cur_hid or qres_state == state_QRES_NEW:
hit_state = state_HIT_NEW
else:
hit_state = state_HIT_SAME
if prev is not None:
# create fragment and HSP and set their attributes
hsp = _create_hsp(prev_hid, prev_qid, prev)
hsp_list.append(hsp)
if hit_state == state_HIT_NEW:
# create Hit and set its attributes
hit = Hit(hsp_list)
hit.seq_len = prev['tsize']
hit_list.append(hit)
hsp_list = []
# create qresult and yield if we're at a new qresult or at EOF
if qres_state == state_QRES_NEW or file_state == state_EOF:
qresult = QueryResult(prev_qid)
for hit in hit_list:
qresult.absorb(hit)
qresult.seq_len = prev['qsize']
yield qresult
# if we're at EOF, break
if file_state == state_EOF:
break
hit_list = []
self.line = self.handle.readline()
示例6: _parse_qresult
def _parse_qresult(self):
# initial qresult value
qresult = None
hit_rows = []
# state values
state_QRES_NEW = 1
state_QRES_HITTAB = 3
state_QRES_CONTENT = 5
state_QRES_END = 7
while True:
# one line before the hit table
if self.line.startswith('The best scores are:'):
qres_state = state_QRES_HITTAB
# the end of a query or the file altogether
elif self.line.strip() == '>>>///' or not self.line:
qres_state = state_QRES_END
# the beginning of a new query
elif not self.line.startswith('>>>') and '>>>' in self.line:
qres_state = state_QRES_NEW
# the beginning of the query info and its hits + hsps
elif self.line.startswith('>>>') and not \
self.line.strip() == '>>><<<':
qres_state = state_QRES_CONTENT
# default qres mark
else:
qres_state = None
if qres_state is not None:
if qres_state == state_QRES_HITTAB:
# parse hit table if flag is set
hit_rows = self.__parse_hit_table()
elif qres_state == state_QRES_END:
yield _set_qresult_hits(qresult, hit_rows)
break
elif qres_state == state_QRES_NEW:
# if qresult is filled, yield it first
if qresult is not None:
yield _set_qresult_hits(qresult, hit_rows)
regx = re.search(_RE_ID_DESC_SEQLEN, self.line)
query_id = regx.group(1)
seq_len = regx.group(3)
desc = regx.group(2)
qresult = QueryResult(id=query_id)
qresult.seq_len = int(seq_len)
# get target from the next line
self.line = self.handle.readline()
qresult.target = [x for x in self.line.split(' ') if x][1].strip()
if desc is not None:
qresult.description = desc
# set values from preamble
for key, value in self._preamble.items():
setattr(qresult, key, value)
elif qres_state == state_QRES_CONTENT:
assert self.line[3:].startswith(qresult.id), self.line
for hit, strand in self._parse_hit(query_id):
# HACK: re-set desc, for hsp hit and query description
hit.description = hit.description
hit.query_description = qresult.description
# if hit is not in qresult, append it
if hit.id not in qresult:
qresult.append(hit)
# otherwise, it might be the same hit with a different strand
else:
# make sure strand is different and then append hsp to
# existing hit
for hsp in hit.hsps:
assert strand != hsp.query_strand
qresult[hit.id].append(hsp)
self.line = self.handle.readline()
示例7: _parse_qresult
def _parse_qresult(self):
# state values
state_EOF = 0
state_QRES_NEW = 1
state_QRES_SAME = 3
state_HIT_NEW = 2
state_HIT_SAME = 4
# initial dummies
qres_state, hit_state = None, None
file_state = None
cur_qid, cur_hid = None, None
prev_qid, prev_hid = None, None
cur, prev = None, None
hit_list, hsp_list = [], []
# if the file has c4 alignments, use that as the alignment mark
if self.has_c4_alignment:
self._ALN_MARK = 'C4 Alignment:'
while True:
self.read_until(lambda line: line.startswith(self._ALN_MARK))
if cur is not None:
prev = cur
prev_qid = cur_qid
prev_hid = cur_hid
# only parse the result row if it's not EOF
if self.line:
assert self.line.startswith(self._ALN_MARK), self.line
# create temp dicts for storing parsed values
header = {'qresult': {}, 'hit': {}, 'hsp': {}}
# if the file has c4 alignments, try to parse the header
if self.has_c4_alignment:
self.read_until(lambda line:
line.strip().startswith('Query:'))
header = self._parse_alignment_header()
# parse the block contents
cur = self.parse_alignment_block(header)
cur_qid = cur['qresult']['id']
cur_hid = cur['hit']['id']
elif not self.line or self.line.startswith('-- completed '):
file_state = state_EOF
cur_qid, cur_hid = None, None
# get the state of hit and qresult
if prev_qid != cur_qid:
qres_state = state_QRES_NEW
else:
qres_state = state_QRES_SAME
# new hits are hits with different ids or hits in a new query
if prev_hid != cur_hid or qres_state == state_QRES_NEW:
hit_state = state_HIT_NEW
else:
hit_state = state_HIT_SAME
if prev is not None:
hsp = _create_hsp(prev_hid, prev_qid, prev['hsp'])
hsp_list.append(hsp)
if hit_state == state_HIT_NEW:
hit = Hit(hsp_list)
for attr, value in prev['hit'].items():
setattr(hit, attr, value)
hit_list.append(hit)
hsp_list = []
if qres_state == state_QRES_NEW or file_state == state_EOF:
qresult = QueryResult(id=prev_qid)
for hit in hit_list:
# not using append since Exonerate may separate the
# same hit if it has different strands
qresult.absorb(hit)
for attr, value in prev['qresult'].items():
setattr(qresult, attr, value)
yield qresult
if file_state == state_EOF:
break
hit_list = []
# only readline() here if we're not parsing C4 alignments
# C4 alignments readline() is handled by its parse_alignment_block
# function
if not self.has_c4_alignment:
self.line = self.handle.readline()
示例8: Hmmer2TextParser
class Hmmer2TextParser(object):
"""Iterator for the HMMER 2.0 text output."""
def __init__(self, handle):
self.handle = handle
self.buf = []
self._meta = self.parse_preamble()
def __iter__(self):
for qresult in self.parse_qresult():
qresult.program = self._meta.get('program')
qresult.target = self._meta.get('target')
qresult.version = self._meta.get('version')
yield qresult
def read_next(self, rstrip=True):
"""Return the next non-empty line, trailing whitespace removed"""
if len(self.buf) > 0:
return self.buf.pop()
self.line = self.handle.readline()
while self.line and rstrip and not self.line.strip():
self.line = self.handle.readline()
if self.line:
if rstrip:
self.line = self.line.rstrip()
return self.line
def push_back(self, line):
"""Un-read a line that should not be parsed yet"""
self.buf.append(line)
def parse_key_value(self):
"""Parse key-value pair separated by colon (:)"""
key, value = self.line.split(':', 1)
return key.strip(), value.strip()
def parse_preamble(self):
"""Parse HMMER2 preamble."""
meta = {}
state = "GENERIC"
while self.read_next():
if state == "GENERIC":
if self.line.startswith('hmm'):
meta['program'] = self.line.split('-')[0].strip()
elif self.line.startswith('HMMER is'):
continue
elif self.line.startswith('HMMER'):
meta['version'] = self.line.split()[1]
elif self.line.count('-') == 36:
state = "OPTIONS"
continue
assert state == "OPTIONS"
assert 'program' in meta
if self.line.count('-') == 32:
break
key, value = self.parse_key_value()
if meta['program'] == 'hmmsearch':
if key == 'Sequence database':
meta['target'] = value
continue
elif meta['program'] == 'hmmpfam':
if key == 'HMM file':
meta['target'] = value
continue
meta[key] = value
return meta
def parse_qresult(self):
"""Parse a HMMER2 query block."""
while self.read_next():
if not self.line.startswith('Query'):
raise StopIteration()
_, id_ = self.parse_key_value()
self.qresult = QueryResult(id=id_)
description = None
while self.read_next() and not self.line.startswith('Scores'):
if self.line.startswith('Accession'):
self.qresult.accession = self.parse_key_value()[1]
if self.line.startswith('Description'):
description = self.parse_key_value()[1]
hit_placeholders = self.parse_hits()
if len(hit_placeholders) > 0:
self.parse_hsps(hit_placeholders)
self.parse_hsp_alignments()
while not self.line.startswith('Query'):
self.read_next()
if not self.line:
break
self.buf.append(self.line)
if description is not None:
self.qresult.description = description
#.........这里部分代码省略.........
示例9: __iter__
def __iter__(self):
for rec in self.blast_iter:
# set attributes to SearchIO's
# get id and desc
if rec.query.startswith('>'):
rec.query = rec.query[1:]
try:
qid, qdesc = rec.query.split(' ', 1)
except ValueError:
qid, qdesc = rec.query, ''
qdesc = qdesc.replace('\n', '').replace('\r', '')
qresult = QueryResult(id=qid)
qresult.program = rec.application.lower()
qresult.target = rec.database
qresult.seq_len = rec.query_letters
qresult.version = rec.version
# determine alphabet based on program
if qresult.program == 'blastn':
alphabet = generic_dna
elif qresult.program in ['blastp', 'blastx', 'tblastn', 'tblastx']:
alphabet = generic_protein
# iterate over the 'alignments' (hits) and the hit table
for idx, aln in enumerate(rec.alignments):
# get id and desc
if aln.title.startswith('> '):
aln.title = aln.title[2:]
elif aln.title.startswith('>'):
aln.title = aln.title[1:]
try:
hid, hdesc = aln.title.split(' ', 1)
except ValueError:
hid, hdesc = aln.title, ''
hdesc = hdesc.replace('\n', '').replace('\r', '')
# iterate over the hsps and group them in a list
hsp_list = []
for bhsp in aln.hsps:
frag = HSPFragment(hid, qid)
frag.alphabet = alphabet
# set alignment length
frag.aln_span = bhsp.identities[1]
# set frames
try:
frag.query_frame = int(bhsp.frame[0])
except IndexError:
if qresult.program in ('blastp', 'tblastn'):
frag.query_frame = 0
else:
frag.query_frame = 1
try:
frag.hit_frame = int(bhsp.frame[1])
except IndexError:
if qresult.program in ('blastp', 'tblastn'):
frag.hit_frame = 0
else:
frag.hit_frame = 1
# set query coordinates
frag.query_start = min(bhsp.query_start,
bhsp.query_end) - 1
frag.query_end = max(bhsp.query_start, bhsp.query_end)
# set hit coordinates
frag.hit_start = min(bhsp.sbjct_start,
bhsp.sbjct_end) - 1
frag.hit_end = max(bhsp.sbjct_start, bhsp.sbjct_end)
# set query, hit sequences and its annotation
qseq = ''
hseq = ''
midline = ''
for seqtrio in zip(bhsp.query, bhsp.sbjct, bhsp.match):
qchar, hchar, mchar = seqtrio
if qchar == ' ' or hchar == ' ':
assert all(' ' == x for x in seqtrio)
else:
qseq += qchar
hseq += hchar
midline += mchar
frag.query, frag.hit = qseq, hseq
frag.aln_annotation['similarity'] = midline
# create HSP object with the fragment
hsp = HSP([frag])
hsp.evalue = bhsp.expect
hsp.bitscore = bhsp.bits
hsp.bitscore_raw = bhsp.score
# set gap
try:
hsp.gap_num = bhsp.gaps[0]
except IndexError:
hsp.gap_num = 0
# set identity
hsp.ident_num = bhsp.identities[0]
hsp.pos_num = bhsp.positives[0]
if hsp.pos_num is None:
hsp.pos_num = hsp[0].aln_span
hsp_list.append(hsp)
#.........这里部分代码省略.........