本文整理汇总了Python中Bio.Align.Generic.Alignment.get_all_seqs方法的典型用法代码示例。如果您正苦于以下问题:Python Alignment.get_all_seqs方法的具体用法?Python Alignment.get_all_seqs怎么用?Python Alignment.get_all_seqs使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio.Align.Generic.Alignment
的用法示例。
在下文中一共展示了Alignment.get_all_seqs方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import get_all_seqs [as 别名]
class Record:
"""Hold Saf information in a format similar to the original record.
The Record class is meant to make data easy to get to when you are
just interested in looking at Saf data.
Attributes:
alignment
"""
def __init__(self):
self.alignment = Alignment( Bio.Alphabet.generic_alphabet )
def __str__( self ):
output = ''
sequences = self.alignment.get_all_seqs()
for sequence_record in sequences:
output = output + '%s\n' % sequence_record.description
output = output + out_sequence( sequence_record.seq.data )
return output
示例2: next
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import get_all_seqs [as 别名]
#.........这里部分代码省略.........
assert not passed_end_alignment
parts = [x.strip() for x in line.split(" ", 1)]
if len(parts) != 2:
# This might be someone attempting to store a zero length sequence?
raise ValueError("Could not split line into identifier " + "and sequence:\n" + line)
id, seq = parts
if id not in ids:
ids.append(id)
seqs.setdefault(id, "")
seqs[id] += seq.replace(".", "-")
elif len(line) >= 5:
# Comment line or meta-data
if line[:5] == "#=GF ":
# Generic per-File annotation, free text
# Format: #=GF <feature> <free text>
feature, text = line[5:].strip().split(None, 1)
# Each feature key could be used more than once,
# so store the entries as a list of strings.
if feature not in gf:
gf[feature] = [text]
else:
gf[feature].append(text)
elif line[:5] == "#=GC ":
# Generic per-Column annotation, exactly 1 char per column
# Format: "#=GC <feature> <exactly 1 char per column>"
pass
elif line[:5] == "#=GS ":
# Generic per-Sequence annotation, free text
# Format: "#=GS <seqname> <feature> <free text>"
id, feature, text = line[5:].strip().split(None, 2)
# if id not in ids :
# ids.append(id)
if id not in gs:
gs[id] = {}
if feature not in gs[id]:
gs[id][feature] = [text]
else:
gs[id][feature].append(text)
elif line[:5] == "#=GR ":
# Generic per-Sequence AND per-Column markup
# Format: "#=GR <seqname> <feature> <exactly 1 char per column>"
id, feature, text = line[5:].strip().split(None, 2)
# if id not in ids :
# ids.append(id)
if id not in gr:
gr[id] = {}
if feature not in gr[id]:
gr[id][feature] = ""
gr[id][feature] += text.strip() # append to any previous entry
# TODO - Should we check the length matches the alignment length?
# For iterlaced sequences the GR data can be split over
# multiple lines
# Next line...
assert len(seqs) <= len(ids)
# assert len(gs) <= len(ids)
# assert len(gr) <= len(ids)
self.ids = ids
self.sequences = seqs
self.seq_annotation = gs
self.seq_col_annotation = gr
if ids and seqs:
if self.records_per_alignment is not None and self.records_per_alignment != len(ids):
raise ValueError(
"Found %i records in this alignment, told to expect %i" % (len(ids), self.records_per_alignment)
)
alignment = Alignment(self.alphabet)
# TODO - Introduce an annotated alignment class?
# For now, store the annotation a new private property:
alignment._annotations = gr
alignment_length = len(seqs.values()[0])
for id in ids:
seq = seqs[id]
if alignment_length != len(seq):
raise ValueError("Sequences have different lengths, or repeated identifier")
name, start, end = self._identifier_split(id)
alignment.add_sequence(id, seq, start=start, end=end)
record = alignment.get_all_seqs()[-1]
assert record.id == id or record.description == id
record.id = id
record.name = name
record.description = id
# will be overridden by _populate_meta_data if an explicit
# accession is provided:
record.annotations["accession"] = name
self._populate_meta_data(id, record)
return alignment
else:
return None
示例3: next
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import get_all_seqs [as 别名]
#.........这里部分代码省略.........
IMTVEEARQRGARLPSMPHVRTFLRLLTGCSRINSDVARRIPGIHRDPKD
RLSSLKQVEEALDMLISSHGEYCPLPLTMDVQAENFPEVLHTRTVRRLKR
QDFAFTRKMRREARQVEQSW
"""
#Match identifier
if not (line.startswith(">") and line.strip().endswith("..")):
raise ValueError("Expected line starting '>' and ending '..', got '%s'" % repr(line))
#print '----->', line.strip(), match_descr
match_descr = line[1:].split()[0] + match_descr
#assert match_descr.startswith(line[1:].split()[0])
# assert self._match_descr.startswith(line[1:].split()[0])
#Tagged data,
line = handle.readline()
line = self._parse_tag_section(line, match_annotation)
assert not line.startswith("; ")
#Now should have the aligned query sequence with flanking region...
while not (line.startswith(">") or ">>>" in line) and not line.startswith('#'):
match_seq_parts.append(line.strip())
line = handle.readline()
if not line:
#End of file
return None
if line.startswith('>') or '>>>' in line:
self._header = line
#We built a list of strings and then joined them because
#its faster than appending to a string.
query_seq = "".join(query_seq_parts)
match_seq = "".join(match_seq_parts)
del query_seq_parts, match_seq_parts
#Note, query_seq and match_seq will usually be of different lengths, apparently
#because in the m10 format leading gaps are added but not trailing gaps!
#Remove the flanking regions,
query_align_seq = self._extract_alignment_region(query_seq, query_annotation)
match_align_seq = self._extract_alignment_region(match_seq, match_annotation)
#The "sq_offset" values can be specified with the -X command line option.
#The appear to just shift the origin used in the calculation of the coordinates.
if ("sq_offset" in query_annotation and query_annotation["sq_offset"] != "1") \
or ("sq_offset" in match_annotation and match_annotation["sq_offset"] != "1") :
#Note that until some point in the v35 series, FASTA always recorded one
#for the query offset, and ommitted the match offset (even when these were
#query_seq the -X command line option).
#TODO - Work out how exactly the use of -X offsets changes things.
#raise ValueError("Offsets from the -X command line option are not (yet) supported")
pass
# this is not useful when using stretcher
# if len(query_align_seq) != len(match_align_seq) :
# raise ValueError("Problem parsing the alignment sequence coordinates")
if "sw_overlap" in alignment_annotation :
if int(alignment_annotation["sw_overlap"]) != len(query_align_seq) :
raise ValueError("Specified sw_overlap = %s does not match expected value %i" \
% (alignment_annotation["sw_overlap"],
len(query_align_seq)))
#TODO - Look at the "sq_type" to assign a sensible alphabet?
alignment = Alignment(self.alphabet)
#TODO - Introduce an annotated alignment class?
#For now, store the annotation a new private property:
alignment._annotations = {}
#Want to record both the query header tags, and the alignment tags.
for key, value in self._query_header_annotation.iteritems() :
alignment._annotations[key] = value
for key, value in alignment_annotation.iteritems() :
alignment._annotations[key] = value
#TODO - Once the alignment object gets an append method, use it.
#(i.e. an add SeqRecord method)
alignment.add_sequence(self._query_descr, query_align_seq)
record = alignment.get_all_seqs()[-1]
assert record.id == self._query_descr or record.description == self._query_descr
assert record.seq.tostring() == query_align_seq
record.id = self._query_descr.split()[0].strip(",")
record.name = "query"
record.annotations["original_length"] = int(query_annotation["sq_len"])
# Roba mia
for k in query_annotation.keys():
record.annotations[k] = query_annotation[k]
alignment.add_sequence(match_descr, match_align_seq)
record = alignment.get_all_seqs()[-1]
assert record.id == match_descr or record.description == match_descr
assert record.seq.tostring() == match_align_seq
record.id = match_descr.split()[0].strip(",")
record.name = "match"
record.annotations["original_length"] = int(match_annotation["sq_len"])
# Roba mia
for k in query_annotation.keys():
record.annotations[k] = match_annotation[k]
return alignment
示例4: next
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import get_all_seqs [as 别名]
#.........这里部分代码省略.........
#If we do anything with this in future, must remove any flanking region.
align_consensus = "".join(align_consensus_parts)
del align_consensus_parts
assert not line[0:2] == "; "
else :
align_consensus = None
assert (line[0] == ">" or ">>>" in line)
self._header = line
#We built a list of strings and then joined them because
#its faster than appending to a string.
query_seq = "".join(query_seq_parts)
match_seq = "".join(match_seq_parts)
del query_seq_parts, match_seq_parts
#Note, query_seq and match_seq will usually be of different lengths, apparently
#because in the m10 format leading gaps are added but not trailing gaps!
#Remove the flanking regions,
query_align_seq = self._extract_alignment_region(query_seq, query_annotation)
match_align_seq = self._extract_alignment_region(match_seq, match_annotation)
#How can we do this for the (optional) consensus?
#The "sq_offset" values can be specified with the -X command line option.
#They appear to just shift the origin used in the calculation of the coordinates.
if len(query_align_seq) != len(match_align_seq) :
raise ValueError("Problem parsing the alignment sequence coordinates, "
"following should be the same length but are not:\n"
"%s - len %i\n%s - len %i" % (query_align_seq,
len(query_align_seq),
match_align_seq,
len(match_align_seq)))
if "sw_overlap" in alignment_annotation :
if int(alignment_annotation["sw_overlap"]) != len(query_align_seq) :
raise ValueError("Specified sw_overlap = %s does not match expected value %i" \
% (alignment_annotation["sw_overlap"],
len(query_align_seq)))
#TODO - Look at the "sq_type" to assign a sensible alphabet?
alphabet = self.alphabet
alignment = Alignment(alphabet)
#TODO - Introduce an annotated alignment class?
#For now, store the annotation a new private property:
alignment._annotations = {}
#Want to record both the query header tags, and the alignment tags.
for key, value in self._query_header_annotation.iteritems() :
alignment._annotations[key] = value
for key, value in alignment_annotation.iteritems() :
alignment._annotations[key] = value
#TODO - Once the alignment object gets an append method, use it.
#(i.e. an add SeqRecord method)
alignment.add_sequence(self._query_descr, query_align_seq)
record = alignment.get_all_seqs()[-1]
assert record.id == self._query_descr or record.description == self._query_descr
#assert record.seq.tostring() == query_align_seq
record.id = self._query_descr.split(None,1)[0].strip(",")
record.name = "query"
record.annotations["original_length"] = int(query_annotation["sq_len"])
#TODO - handle start/end coordinates properly. Short term hack for now:
record._al_start = int(query_annotation["al_start"])
record._al_stop = int(query_annotation["al_stop"])
#TODO - What if a specific alphabet has been requested?
#TODO - Use an IUPAC alphabet?
#TODO - Can FASTA output RNA?
if alphabet == single_letter_alphabet and "sq_type" in query_annotation :
if query_annotation["sq_type"] == "D" :
record.seq.alphabet = generic_dna
elif query_annotation["sq_type"] == "p" :
record.seq.alphabet = generic_protein
if "-" in query_align_seq :
if not hasattr(record.seq.alphabet,"gap_char") :
record.seq.alphabet = Gapped(record.seq.alphabet, "-")
alignment.add_sequence(match_descr, match_align_seq)
record = alignment.get_all_seqs()[-1]
assert record.id == match_descr or record.description == match_descr
#assert record.seq.tostring() == match_align_seq
record.id = match_descr.split(None,1)[0].strip(",")
record.name = "match"
record.annotations["original_length"] = int(match_annotation["sq_len"])
#TODO - handle start/end coordinates properly. Short term hack for now:
record._al_start = int(query_annotation["al_start"])
record._al_stop = int(query_annotation["al_stop"])
#This is still a very crude way of dealing with the alphabet:
if alphabet == single_letter_alphabet and "sq_type" in match_annotation :
if match_annotation["sq_type"] == "D" :
record.seq.alphabet = generic_dna
elif match_annotation["sq_type"] == "p" :
record.seq.alphabet = generic_protein
if "-" in match_align_seq :
if not hasattr(record.seq.alphabet,"gap_char") :
record.seq.alphabet = Gapped(record.seq.alphabet, "-")
return alignment
示例5: next
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import get_all_seqs [as 别名]
def next(self) :
handle = self.handle
try :
#Header we saved from when we were parsing
#the previous alignment.
line = self._header
del self._header
except AttributeError :
line = handle.readline()
if not line: return
line = line.strip()
parts = filter(None, line.split())
if len(parts)!=2 :
raise ValueError("First line should have two integers")
try :
number_of_seqs = int(parts[0])
length_of_seqs = int(parts[1])
except ValueError:
raise ValueError("First line should have two integers")
assert self._is_header(line)
if self.records_per_alignment is not None \
and self.records_per_alignment != number_of_seqs :
raise ValueError("Found %i records in this alignment, told to expect %i" \
% (number_of_seqs, self.records_per_alignment))
ids = []
seqs = []
#Expects STRICT truncation/padding to 10 characters
#Does not require any white space between name and seq.
for i in range(0,number_of_seqs) :
line = handle.readline().rstrip()
ids.append(line[:10].strip()) #first ten characters
seqs.append([line[10:].strip().replace(" ","")])
#Look for further blocks
line=""
while True :
#Skip any blank lines between blocks...
while ""==line.strip():
line = handle.readline()
if not line : break #end of file
if not line : break #end of file
if self._is_header(line) :
#Looks like the start of a concatenated alignment
self._header = line
break
#print "New block..."
for i in range(0,number_of_seqs) :
seqs[i].append(line.strip().replace(" ",""))
line = handle.readline()
if (not line) and i+1 < number_of_seqs :
raise ValueError("End of file mid-block")
if not line : break #end of file
alignment = Alignment(self.alphabet)
for i in range(0,number_of_seqs) :
seq = "".join(seqs[i])
if len(seq)!=length_of_seqs :
raise ValueError("Sequence %i length %i, expected length %i" \
% (i+1, len(seq), length_of_seqs))
alignment.add_sequence(ids[i], seq)
record = alignment.get_all_seqs()[-1]
assert ids[i] == record.id or ids[i] == record.description
record.id = ids[i]
record.name = ids[i]
record.description = ids[i]
return alignment
示例6: Alignment
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import get_all_seqs [as 别名]
# biopython
from Bio import Alphabet
from Bio import Seq
from Bio.Alphabet import IUPAC
from Bio import Clustalw
from Bio.Align.FormatConvert import FormatConverter
from Bio.Align import AlignInfo
from Bio.Fasta import FastaAlign
from Bio.SubsMat import FreqTable
from Bio.Align.Generic import Alignment
#Very simple tests on an empty alignment
alignment = Alignment(Alphabet.generic_alphabet)
assert alignment.get_alignment_length() == 0
assert alignment.get_all_seqs() == []
del alignment
#Basic tests on simple three string alignment
alignment = Alignment(Alphabet.generic_alphabet)
letters = "AbcDefGhiJklMnoPqrStuVwxYz"
alignment.add_sequence("mixed", letters)
alignment.add_sequence("lower", letters.lower())
alignment.add_sequence("upper", letters.upper())
assert alignment.get_alignment_length() == 26
assert len(alignment.get_all_seqs()) == 3
assert alignment.get_seq_by_num(0).tostring() == letters
assert alignment.get_seq_by_num(1).tostring() == letters.lower()
assert alignment.get_seq_by_num(2).tostring() == letters.upper()
assert alignment.get_all_seqs()[0].description == "mixed"
assert alignment.get_all_seqs()[1].description == "lower"
示例7: str
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import get_all_seqs [as 别名]
# annotations: dictionary with further info, can't be set on initialization
seqrec=SeqRecord(Seq('mdstnvrsgmksrkkkpkttvidddddcmtcsacqsklvkisditkvsldyintmrgntlacaacgsslkllndfas',Bio.Alphabet.generic_protein), id='P20994.1', name='P20994', description='Protein A19', dbxrefs=['Pfam:PF05077', 'InterPro:IPR007769', 'DIP:2186N'])
seqrec.annotations['note']='A simple note'
print seqrec
#tipo de dato alineamiento de secuencias, guarda no procesa
from Bio.Align.Generic import Alignment
seq1='MHQAIFIYQIGYPLKSGYIQSIRSPEYDNW'
seq2='MH--IFIYQIGYALKSGYIQSIRSPEY-NW'
align=Alignment(Bio.Alphabet.Gapped(IUPAC.protein)) #instance of Alignment class
align.add_sequence('asp',seq1)
align.add_sequence('unk',seq2)
print align
#Alignment methods
#get_all_seqs: return all sequences in the alignment as a list of SeqRecord
for s in align.get_all_seqs(): #in align: (the same)
print '->',s.seq
#get_seq_by_num(n): return only the selected sequence by index
print str(align.get_seq_by_num(1)) #Seq object
print align[0] #SeqRecord object
print str(align[0].seq)
#get_alignment_length(): get length of alignment
print align.get_alignment_length()
#get_column(n): return a string with all the letters in the n column
print align.get_column(0)
print align.get_column(2)
#AlignInfo module: to extract info from alignment objects
from Bio.Align import AlignInfo
#print_info_content function
#SummaryInfo,PSSM classes