本文整理汇总了Python中Bio.Align.Generic.Alignment._annotations方法的典型用法代码示例。如果您正苦于以下问题:Python Alignment._annotations方法的具体用法?Python Alignment._annotations怎么用?Python Alignment._annotations使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio.Align.Generic.Alignment
的用法示例。
在下文中一共展示了Alignment._annotations方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: next
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import _annotations [as 别名]
#.........这里部分代码省略.........
IMTVEEARQRGARLPSMPHVRTFLRLLTGCSRINSDVARRIPGIHRDPKD
RLSSLKQVEEALDMLISSHGEYCPLPLTMDVQAENFPEVLHTRTVRRLKR
QDFAFTRKMRREARQVEQSW
"""
#Match identifier
if not (line.startswith(">") and line.strip().endswith("..")):
raise ValueError("Expected line starting '>' and ending '..', got '%s'" % repr(line))
#print '----->', line.strip(), match_descr
match_descr = line[1:].split()[0] + match_descr
#assert match_descr.startswith(line[1:].split()[0])
# assert self._match_descr.startswith(line[1:].split()[0])
#Tagged data,
line = handle.readline()
line = self._parse_tag_section(line, match_annotation)
assert not line.startswith("; ")
#Now should have the aligned query sequence with flanking region...
while not (line.startswith(">") or ">>>" in line) and not line.startswith('#'):
match_seq_parts.append(line.strip())
line = handle.readline()
if not line:
#End of file
return None
if line.startswith('>') or '>>>' in line:
self._header = line
#We built a list of strings and then joined them because
#its faster than appending to a string.
query_seq = "".join(query_seq_parts)
match_seq = "".join(match_seq_parts)
del query_seq_parts, match_seq_parts
#Note, query_seq and match_seq will usually be of different lengths, apparently
#because in the m10 format leading gaps are added but not trailing gaps!
#Remove the flanking regions,
query_align_seq = self._extract_alignment_region(query_seq, query_annotation)
match_align_seq = self._extract_alignment_region(match_seq, match_annotation)
#The "sq_offset" values can be specified with the -X command line option.
#The appear to just shift the origin used in the calculation of the coordinates.
if ("sq_offset" in query_annotation and query_annotation["sq_offset"] != "1") \
or ("sq_offset" in match_annotation and match_annotation["sq_offset"] != "1") :
#Note that until some point in the v35 series, FASTA always recorded one
#for the query offset, and ommitted the match offset (even when these were
#query_seq the -X command line option).
#TODO - Work out how exactly the use of -X offsets changes things.
#raise ValueError("Offsets from the -X command line option are not (yet) supported")
pass
# this is not useful when using stretcher
# if len(query_align_seq) != len(match_align_seq) :
# raise ValueError("Problem parsing the alignment sequence coordinates")
if "sw_overlap" in alignment_annotation :
if int(alignment_annotation["sw_overlap"]) != len(query_align_seq) :
raise ValueError("Specified sw_overlap = %s does not match expected value %i" \
% (alignment_annotation["sw_overlap"],
len(query_align_seq)))
#TODO - Look at the "sq_type" to assign a sensible alphabet?
alignment = Alignment(self.alphabet)
#TODO - Introduce an annotated alignment class?
#For now, store the annotation a new private property:
alignment._annotations = {}
#Want to record both the query header tags, and the alignment tags.
for key, value in self._query_header_annotation.iteritems() :
alignment._annotations[key] = value
for key, value in alignment_annotation.iteritems() :
alignment._annotations[key] = value
#TODO - Once the alignment object gets an append method, use it.
#(i.e. an add SeqRecord method)
alignment.add_sequence(self._query_descr, query_align_seq)
record = alignment.get_all_seqs()[-1]
assert record.id == self._query_descr or record.description == self._query_descr
assert record.seq.tostring() == query_align_seq
record.id = self._query_descr.split()[0].strip(",")
record.name = "query"
record.annotations["original_length"] = int(query_annotation["sq_len"])
# Roba mia
for k in query_annotation.keys():
record.annotations[k] = query_annotation[k]
alignment.add_sequence(match_descr, match_align_seq)
record = alignment.get_all_seqs()[-1]
assert record.id == match_descr or record.description == match_descr
assert record.seq.tostring() == match_align_seq
record.id = match_descr.split()[0].strip(",")
record.name = "match"
record.annotations["original_length"] = int(match_annotation["sq_len"])
# Roba mia
for k in query_annotation.keys():
record.annotations[k] = match_annotation[k]
return alignment
示例2: next
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import _annotations [as 别名]
#.........这里部分代码省略.........
assert not passed_end_alignment
parts = [x.strip() for x in line.split(" ", 1)]
if len(parts) != 2:
# This might be someone attempting to store a zero length sequence?
raise ValueError("Could not split line into identifier " + "and sequence:\n" + line)
id, seq = parts
if id not in ids:
ids.append(id)
seqs.setdefault(id, "")
seqs[id] += seq.replace(".", "-")
elif len(line) >= 5:
# Comment line or meta-data
if line[:5] == "#=GF ":
# Generic per-File annotation, free text
# Format: #=GF <feature> <free text>
feature, text = line[5:].strip().split(None, 1)
# Each feature key could be used more than once,
# so store the entries as a list of strings.
if feature not in gf:
gf[feature] = [text]
else:
gf[feature].append(text)
elif line[:5] == "#=GC ":
# Generic per-Column annotation, exactly 1 char per column
# Format: "#=GC <feature> <exactly 1 char per column>"
pass
elif line[:5] == "#=GS ":
# Generic per-Sequence annotation, free text
# Format: "#=GS <seqname> <feature> <free text>"
id, feature, text = line[5:].strip().split(None, 2)
# if id not in ids :
# ids.append(id)
if id not in gs:
gs[id] = {}
if feature not in gs[id]:
gs[id][feature] = [text]
else:
gs[id][feature].append(text)
elif line[:5] == "#=GR ":
# Generic per-Sequence AND per-Column markup
# Format: "#=GR <seqname> <feature> <exactly 1 char per column>"
id, feature, text = line[5:].strip().split(None, 2)
# if id not in ids :
# ids.append(id)
if id not in gr:
gr[id] = {}
if feature not in gr[id]:
gr[id][feature] = ""
gr[id][feature] += text.strip() # append to any previous entry
# TODO - Should we check the length matches the alignment length?
# For iterlaced sequences the GR data can be split over
# multiple lines
# Next line...
assert len(seqs) <= len(ids)
# assert len(gs) <= len(ids)
# assert len(gr) <= len(ids)
self.ids = ids
self.sequences = seqs
self.seq_annotation = gs
self.seq_col_annotation = gr
if ids and seqs:
if self.records_per_alignment is not None and self.records_per_alignment != len(ids):
raise ValueError(
"Found %i records in this alignment, told to expect %i" % (len(ids), self.records_per_alignment)
)
alignment = Alignment(self.alphabet)
# TODO - Introduce an annotated alignment class?
# For now, store the annotation a new private property:
alignment._annotations = gr
alignment_length = len(seqs.values()[0])
for id in ids:
seq = seqs[id]
if alignment_length != len(seq):
raise ValueError("Sequences have different lengths, or repeated identifier")
name, start, end = self._identifier_split(id)
alignment.add_sequence(id, seq, start=start, end=end)
record = alignment.get_all_seqs()[-1]
assert record.id == id or record.description == id
record.id = id
record.name = name
record.description = id
# will be overridden by _populate_meta_data if an explicit
# accession is provided:
record.annotations["accession"] = name
self._populate_meta_data(id, record)
return alignment
else:
return None
示例3: next
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import _annotations [as 别名]
#.........这里部分代码省略.........
#If we do anything with this in future, must remove any flanking region.
align_consensus = "".join(align_consensus_parts)
del align_consensus_parts
assert not line[0:2] == "; "
else :
align_consensus = None
assert (line[0] == ">" or ">>>" in line)
self._header = line
#We built a list of strings and then joined them because
#its faster than appending to a string.
query_seq = "".join(query_seq_parts)
match_seq = "".join(match_seq_parts)
del query_seq_parts, match_seq_parts
#Note, query_seq and match_seq will usually be of different lengths, apparently
#because in the m10 format leading gaps are added but not trailing gaps!
#Remove the flanking regions,
query_align_seq = self._extract_alignment_region(query_seq, query_annotation)
match_align_seq = self._extract_alignment_region(match_seq, match_annotation)
#How can we do this for the (optional) consensus?
#The "sq_offset" values can be specified with the -X command line option.
#They appear to just shift the origin used in the calculation of the coordinates.
if len(query_align_seq) != len(match_align_seq) :
raise ValueError("Problem parsing the alignment sequence coordinates, "
"following should be the same length but are not:\n"
"%s - len %i\n%s - len %i" % (query_align_seq,
len(query_align_seq),
match_align_seq,
len(match_align_seq)))
if "sw_overlap" in alignment_annotation :
if int(alignment_annotation["sw_overlap"]) != len(query_align_seq) :
raise ValueError("Specified sw_overlap = %s does not match expected value %i" \
% (alignment_annotation["sw_overlap"],
len(query_align_seq)))
#TODO - Look at the "sq_type" to assign a sensible alphabet?
alphabet = self.alphabet
alignment = Alignment(alphabet)
#TODO - Introduce an annotated alignment class?
#For now, store the annotation a new private property:
alignment._annotations = {}
#Want to record both the query header tags, and the alignment tags.
for key, value in self._query_header_annotation.iteritems() :
alignment._annotations[key] = value
for key, value in alignment_annotation.iteritems() :
alignment._annotations[key] = value
#TODO - Once the alignment object gets an append method, use it.
#(i.e. an add SeqRecord method)
alignment.add_sequence(self._query_descr, query_align_seq)
record = alignment.get_all_seqs()[-1]
assert record.id == self._query_descr or record.description == self._query_descr
#assert record.seq.tostring() == query_align_seq
record.id = self._query_descr.split(None,1)[0].strip(",")
record.name = "query"
record.annotations["original_length"] = int(query_annotation["sq_len"])
#TODO - handle start/end coordinates properly. Short term hack for now:
record._al_start = int(query_annotation["al_start"])
record._al_stop = int(query_annotation["al_stop"])
#TODO - What if a specific alphabet has been requested?
#TODO - Use an IUPAC alphabet?
#TODO - Can FASTA output RNA?
if alphabet == single_letter_alphabet and "sq_type" in query_annotation :
if query_annotation["sq_type"] == "D" :
record.seq.alphabet = generic_dna
elif query_annotation["sq_type"] == "p" :
record.seq.alphabet = generic_protein
if "-" in query_align_seq :
if not hasattr(record.seq.alphabet,"gap_char") :
record.seq.alphabet = Gapped(record.seq.alphabet, "-")
alignment.add_sequence(match_descr, match_align_seq)
record = alignment.get_all_seqs()[-1]
assert record.id == match_descr or record.description == match_descr
#assert record.seq.tostring() == match_align_seq
record.id = match_descr.split(None,1)[0].strip(",")
record.name = "match"
record.annotations["original_length"] = int(match_annotation["sq_len"])
#TODO - handle start/end coordinates properly. Short term hack for now:
record._al_start = int(query_annotation["al_start"])
record._al_stop = int(query_annotation["al_stop"])
#This is still a very crude way of dealing with the alphabet:
if alphabet == single_letter_alphabet and "sq_type" in match_annotation :
if match_annotation["sq_type"] == "D" :
record.seq.alphabet = generic_dna
elif match_annotation["sq_type"] == "p" :
record.seq.alphabet = generic_protein
if "-" in match_align_seq :
if not hasattr(record.seq.alphabet,"gap_char") :
record.seq.alphabet = Gapped(record.seq.alphabet, "-")
return alignment