当前位置: 首页>>代码示例>>Python>>正文


Python Alignment.get_all_seqs方法代码示例

本文整理汇总了Python中Bio.Align.Generic.Alignment.get_all_seqs方法的典型用法代码示例。如果您正苦于以下问题:Python Alignment.get_all_seqs方法的具体用法?Python Alignment.get_all_seqs怎么用?Python Alignment.get_all_seqs使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Bio.Align.Generic.Alignment的用法示例。


在下文中一共展示了Alignment.get_all_seqs方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import get_all_seqs [as 别名]
class Record:
    """Hold Saf information in a format similar to the original record.

    The Record class is meant to make data easy to get to when you are
    just interested in looking at Saf data.

    Attributes:
    alignment

    """
    def __init__(self):
        self.alignment = Alignment( Bio.Alphabet.generic_alphabet )

    def __str__( self ):
        output = ''
        sequences = self.alignment.get_all_seqs()
        for sequence_record in sequences:
            output = output + '%s\n' % sequence_record.description
            output = output + out_sequence( sequence_record.seq.data )
        return output
开发者ID:chapmanb,项目名称:biosqlweb,代码行数:22,代码来源:Record.py

示例2: next

# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import get_all_seqs [as 别名]

#.........这里部分代码省略.........
                assert not passed_end_alignment
                parts = [x.strip() for x in line.split(" ", 1)]
                if len(parts) != 2:
                    # This might be someone attempting to store a zero length sequence?
                    raise ValueError("Could not split line into identifier " + "and sequence:\n" + line)
                id, seq = parts
                if id not in ids:
                    ids.append(id)
                seqs.setdefault(id, "")
                seqs[id] += seq.replace(".", "-")
            elif len(line) >= 5:
                # Comment line or meta-data
                if line[:5] == "#=GF ":
                    # Generic per-File annotation, free text
                    # Format: #=GF <feature> <free text>
                    feature, text = line[5:].strip().split(None, 1)
                    # Each feature key could be used more than once,
                    # so store the entries as a list of strings.
                    if feature not in gf:
                        gf[feature] = [text]
                    else:
                        gf[feature].append(text)
                elif line[:5] == "#=GC ":
                    # Generic per-Column annotation, exactly 1 char per column
                    # Format: "#=GC <feature> <exactly 1 char per column>"
                    pass
                elif line[:5] == "#=GS ":
                    # Generic per-Sequence annotation, free text
                    # Format: "#=GS <seqname> <feature> <free text>"
                    id, feature, text = line[5:].strip().split(None, 2)
                    # if id not in ids :
                    #    ids.append(id)
                    if id not in gs:
                        gs[id] = {}
                    if feature not in gs[id]:
                        gs[id][feature] = [text]
                    else:
                        gs[id][feature].append(text)
                elif line[:5] == "#=GR ":
                    # Generic per-Sequence AND per-Column markup
                    # Format: "#=GR <seqname> <feature> <exactly 1 char per column>"
                    id, feature, text = line[5:].strip().split(None, 2)
                    # if id not in ids :
                    #    ids.append(id)
                    if id not in gr:
                        gr[id] = {}
                    if feature not in gr[id]:
                        gr[id][feature] = ""
                    gr[id][feature] += text.strip()  # append to any previous entry
                    # TODO - Should we check the length matches the alignment length?
                    #       For iterlaced sequences the GR data can be split over
                    #       multiple lines
            # Next line...

        assert len(seqs) <= len(ids)
        # assert len(gs)   <= len(ids)
        # assert len(gr)   <= len(ids)

        self.ids = ids
        self.sequences = seqs
        self.seq_annotation = gs
        self.seq_col_annotation = gr

        if ids and seqs:

            if self.records_per_alignment is not None and self.records_per_alignment != len(ids):
                raise ValueError(
                    "Found %i records in this alignment, told to expect %i" % (len(ids), self.records_per_alignment)
                )

            alignment = Alignment(self.alphabet)

            # TODO - Introduce an annotated alignment class?
            # For now, store the annotation a new private property:
            alignment._annotations = gr

            alignment_length = len(seqs.values()[0])
            for id in ids:
                seq = seqs[id]
                if alignment_length != len(seq):
                    raise ValueError("Sequences have different lengths, or repeated identifier")
                name, start, end = self._identifier_split(id)
                alignment.add_sequence(id, seq, start=start, end=end)

                record = alignment.get_all_seqs()[-1]

                assert record.id == id or record.description == id

                record.id = id
                record.name = name
                record.description = id

                # will be overridden by _populate_meta_data if an explicit
                # accession is provided:
                record.annotations["accession"] = name

                self._populate_meta_data(id, record)
            return alignment
        else:
            return None
开发者ID:nuin,项目名称:biopython,代码行数:104,代码来源:StockholmIO.py

示例3: next

# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import get_all_seqs [as 别名]

#.........这里部分代码省略.........
        IMTVEEARQRGARLPSMPHVRTFLRLLTGCSRINSDVARRIPGIHRDPKD
        RLSSLKQVEEALDMLISSHGEYCPLPLTMDVQAENFPEVLHTRTVRRLKR
        QDFAFTRKMRREARQVEQSW
        """
        #Match identifier
        if not (line.startswith(">") and line.strip().endswith("..")):
            raise ValueError("Expected line starting '>' and ending '..', got '%s'" % repr(line))
        #print '----->', line.strip(), match_descr
        match_descr = line[1:].split()[0] + match_descr
        
        #assert match_descr.startswith(line[1:].split()[0])
#        assert self._match_descr.startswith(line[1:].split()[0])

        #Tagged data,
        line = handle.readline()
        line = self._parse_tag_section(line, match_annotation)
        assert not line.startswith("; ")
        
        #Now should have the aligned query sequence with flanking region...
        while not (line.startswith(">") or ">>>" in line) and not line.startswith('#'):
            match_seq_parts.append(line.strip())
            line = handle.readline()
            if not line:
                #End of file
                return None
        if line.startswith('>') or '>>>' in line:
            self._header = line

        #We built a list of strings and then joined them because
        #its faster than appending to a string.
        query_seq = "".join(query_seq_parts)
        match_seq = "".join(match_seq_parts)
        del query_seq_parts, match_seq_parts
        #Note, query_seq and match_seq will usually be of different lengths, apparently
        #because in the m10 format leading gaps are added but not trailing gaps!

        #Remove the flanking regions,
        query_align_seq = self._extract_alignment_region(query_seq, query_annotation)
        match_align_seq = self._extract_alignment_region(match_seq, match_annotation)

        #The "sq_offset" values can be specified with the -X command line option.
        #The appear to just shift the origin used in the calculation of the coordinates.
        
        if ("sq_offset" in query_annotation and query_annotation["sq_offset"] != "1") \
        or ("sq_offset" in match_annotation and match_annotation["sq_offset"] != "1") :
            #Note that until some point in the v35 series, FASTA always recorded one
            #for the query offset, and ommitted the match offset (even when these were
            #query_seq the -X command line option).
            #TODO - Work out how exactly the use of -X offsets changes things.
            #raise ValueError("Offsets from the -X command line option are not (yet) supported")
            pass

# this is not useful when using stretcher
#        if len(query_align_seq) != len(match_align_seq) :
#            raise ValueError("Problem parsing the alignment sequence coordinates")
        if "sw_overlap" in alignment_annotation :
            if int(alignment_annotation["sw_overlap"]) != len(query_align_seq) :
                raise ValueError("Specified sw_overlap = %s does not match expected value %i" \
                                 % (alignment_annotation["sw_overlap"],
                                    len(query_align_seq)))

        #TODO - Look at the "sq_type" to assign a sensible alphabet?
        alignment = Alignment(self.alphabet)

        #TODO - Introduce an annotated alignment class?
        #For now, store the annotation a new private property:
        alignment._annotations = {}
        
        #Want to record both the query header tags, and the alignment tags.
        for key, value in self._query_header_annotation.iteritems() :
            alignment._annotations[key] = value
        for key, value in alignment_annotation.iteritems() :
            alignment._annotations[key] = value
            

        #TODO - Once the alignment object gets an append method, use it.
        #(i.e. an add SeqRecord method)
        alignment.add_sequence(self._query_descr, query_align_seq)
        record = alignment.get_all_seqs()[-1]
        assert record.id == self._query_descr or record.description == self._query_descr
        assert record.seq.tostring() == query_align_seq
        record.id = self._query_descr.split()[0].strip(",")
        record.name = "query"
        record.annotations["original_length"] = int(query_annotation["sq_len"])
        # Roba mia
        for k in query_annotation.keys():
            record.annotations[k] = query_annotation[k]

        alignment.add_sequence(match_descr, match_align_seq)
        record = alignment.get_all_seqs()[-1]
        assert record.id == match_descr or record.description == match_descr
        assert record.seq.tostring() == match_align_seq
        record.id = match_descr.split()[0].strip(",")
        record.name = "match"
        record.annotations["original_length"] = int(match_annotation["sq_len"])
        # Roba mia
        for k in query_annotation.keys():
            record.annotations[k] = match_annotation[k]

        return alignment
开发者ID:nlhepler,项目名称:shorah,代码行数:104,代码来源:MarkxIO.py

示例4: next

# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import get_all_seqs [as 别名]

#.........这里部分代码省略.........
            #If we do anything with this in future, must remove any flanking region.
            align_consensus = "".join(align_consensus_parts)
            del align_consensus_parts
            assert not line[0:2] == "; "
        else :
            align_consensus = None
        assert (line[0] == ">" or ">>>" in line)
        self._header = line

        #We built a list of strings and then joined them because
        #its faster than appending to a string.
        query_seq = "".join(query_seq_parts)
        match_seq = "".join(match_seq_parts)
        del query_seq_parts, match_seq_parts
        #Note, query_seq and match_seq will usually be of different lengths, apparently
        #because in the m10 format leading gaps are added but not trailing gaps!

        #Remove the flanking regions,
        query_align_seq = self._extract_alignment_region(query_seq, query_annotation)
        match_align_seq = self._extract_alignment_region(match_seq, match_annotation)
        #How can we do this for the (optional) consensus?

        #The "sq_offset" values can be specified with the -X command line option.
        #They appear to just shift the origin used in the calculation of the coordinates.
        
        if len(query_align_seq) != len(match_align_seq) :
            raise ValueError("Problem parsing the alignment sequence coordinates, " 
                             "following should be the same length but are not:\n"
                             "%s - len %i\n%s - len %i" % (query_align_seq,
                                                           len(query_align_seq),
                                                           match_align_seq,
                                                           len(match_align_seq)))
        if "sw_overlap" in alignment_annotation :
            if int(alignment_annotation["sw_overlap"]) != len(query_align_seq) :
                raise ValueError("Specified sw_overlap = %s does not match expected value %i" \
                                 % (alignment_annotation["sw_overlap"],
                                    len(query_align_seq)))

        #TODO - Look at the "sq_type" to assign a sensible alphabet?
        alphabet = self.alphabet
        alignment = Alignment(alphabet)

        #TODO - Introduce an annotated alignment class?
        #For now, store the annotation a new private property:
        alignment._annotations = {}
        
        #Want to record both the query header tags, and the alignment tags.
        for key, value in self._query_header_annotation.iteritems() :
            alignment._annotations[key] = value
        for key, value in alignment_annotation.iteritems() :
            alignment._annotations[key] = value
            

        #TODO - Once the alignment object gets an append method, use it.
        #(i.e. an add SeqRecord method)
        alignment.add_sequence(self._query_descr, query_align_seq)
        record = alignment.get_all_seqs()[-1]
        assert record.id == self._query_descr or record.description == self._query_descr
        #assert record.seq.tostring() == query_align_seq
        record.id = self._query_descr.split(None,1)[0].strip(",")
        record.name = "query"
        record.annotations["original_length"] = int(query_annotation["sq_len"])
        #TODO - handle start/end coordinates properly. Short term hack for now:
        record._al_start = int(query_annotation["al_start"])
        record._al_stop = int(query_annotation["al_stop"])

        #TODO - What if a specific alphabet has been requested?
        #TODO - Use an IUPAC alphabet?
        #TODO - Can FASTA output RNA?
        if alphabet == single_letter_alphabet and "sq_type" in query_annotation :
            if query_annotation["sq_type"] == "D" :
                record.seq.alphabet = generic_dna
            elif query_annotation["sq_type"] == "p" :
                record.seq.alphabet = generic_protein
        if "-" in query_align_seq :
            if not hasattr(record.seq.alphabet,"gap_char") :
                record.seq.alphabet = Gapped(record.seq.alphabet, "-")
        
        alignment.add_sequence(match_descr, match_align_seq)
        record = alignment.get_all_seqs()[-1]
        assert record.id == match_descr or record.description == match_descr
        #assert record.seq.tostring() == match_align_seq
        record.id = match_descr.split(None,1)[0].strip(",")
        record.name = "match"
        record.annotations["original_length"] = int(match_annotation["sq_len"])
        #TODO - handle start/end coordinates properly. Short term hack for now:
        record._al_start = int(query_annotation["al_start"])
        record._al_stop = int(query_annotation["al_stop"])

        #This is still a very crude way of dealing with the alphabet:
        if alphabet == single_letter_alphabet and "sq_type" in match_annotation :
            if match_annotation["sq_type"] == "D" :
                record.seq.alphabet = generic_dna
            elif match_annotation["sq_type"] == "p" :
                record.seq.alphabet = generic_protein
        if "-" in match_align_seq :
            if not hasattr(record.seq.alphabet,"gap_char") :
                record.seq.alphabet = Gapped(record.seq.alphabet, "-")

        return alignment
开发者ID:frankkl,项目名称:biopython,代码行数:104,代码来源:FastaIO.py

示例5: next

# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import get_all_seqs [as 别名]
    def next(self) :
        handle = self.handle

        try :
            #Header we saved from when we were parsing
            #the previous alignment.
            line = self._header
            del self._header
        except AttributeError :
            line = handle.readline()

        if not line: return
        line = line.strip()
        parts = filter(None, line.split())
        if len(parts)!=2 :
            raise ValueError("First line should have two integers")
        try :
            number_of_seqs = int(parts[0])
            length_of_seqs = int(parts[1])
        except ValueError:
            raise ValueError("First line should have two integers")

        assert self._is_header(line)

        if self.records_per_alignment is not None \
        and self.records_per_alignment != number_of_seqs :
            raise ValueError("Found %i records in this alignment, told to expect %i" \
                             % (number_of_seqs, self.records_per_alignment))

        ids = []
        seqs = []

        #Expects STRICT truncation/padding to 10 characters
        #Does not require any white space between name and seq.
        for i in range(0,number_of_seqs) :
            line = handle.readline().rstrip()
            ids.append(line[:10].strip()) #first ten characters
            seqs.append([line[10:].strip().replace(" ","")])

        #Look for further blocks
        line=""
        while True :
            #Skip any blank lines between blocks...
            while ""==line.strip():
                line = handle.readline()
                if not line : break #end of file
            if not line : break #end of file

            if self._is_header(line) :
                #Looks like the start of a concatenated alignment
                self._header = line
                break

            #print "New block..."
            for i in range(0,number_of_seqs) :
                seqs[i].append(line.strip().replace(" ",""))
                line = handle.readline()
                if (not line) and i+1 < number_of_seqs :
                    raise ValueError("End of file mid-block")
            if not line : break #end of file

        alignment = Alignment(self.alphabet)
        for i in range(0,number_of_seqs) :
            seq = "".join(seqs[i])
            if len(seq)!=length_of_seqs :
                raise ValueError("Sequence %i length %i, expected length %i" \
                                  % (i+1, len(seq), length_of_seqs))
            alignment.add_sequence(ids[i], seq)
            
            record = alignment.get_all_seqs()[-1]
            assert ids[i] == record.id or ids[i] == record.description
            record.id = ids[i]
            record.name = ids[i]
            record.description = ids[i]
        return alignment
开发者ID:andyoberlin,项目名称:biopython,代码行数:77,代码来源:PhylipIO.py

示例6: Alignment

# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import get_all_seqs [as 别名]
# biopython
from Bio import Alphabet
from Bio import Seq
from Bio.Alphabet import IUPAC
from Bio import Clustalw
from Bio.Align.FormatConvert import FormatConverter
from Bio.Align import AlignInfo
from Bio.Fasta import FastaAlign
from Bio.SubsMat import FreqTable
from Bio.Align.Generic import Alignment

#Very simple tests on an empty alignment
alignment = Alignment(Alphabet.generic_alphabet)
assert alignment.get_alignment_length() == 0
assert alignment.get_all_seqs() == []
del alignment

#Basic tests on simple three string alignment
alignment = Alignment(Alphabet.generic_alphabet)
letters = "AbcDefGhiJklMnoPqrStuVwxYz"
alignment.add_sequence("mixed", letters)
alignment.add_sequence("lower", letters.lower())
alignment.add_sequence("upper", letters.upper())
assert alignment.get_alignment_length() == 26
assert len(alignment.get_all_seqs()) == 3
assert alignment.get_seq_by_num(0).tostring() == letters
assert alignment.get_seq_by_num(1).tostring() == letters.lower()
assert alignment.get_seq_by_num(2).tostring() == letters.upper()
assert alignment.get_all_seqs()[0].description == "mixed"
assert alignment.get_all_seqs()[1].description == "lower"
开发者ID:grassa,项目名称:biopython,代码行数:32,代码来源:test_align.py

示例7: str

# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import get_all_seqs [as 别名]
#	annotations: dictionary with further info, can't be set on initialization
seqrec=SeqRecord(Seq('mdstnvrsgmksrkkkpkttvidddddcmtcsacqsklvkisditkvsldyintmrgntlacaacgsslkllndfas',Bio.Alphabet.generic_protein), id='P20994.1', name='P20994', description='Protein A19', dbxrefs=['Pfam:PF05077', 'InterPro:IPR007769', 'DIP:2186N'])
seqrec.annotations['note']='A simple note'
print seqrec

#tipo de dato alineamiento de secuencias, guarda no procesa
from Bio.Align.Generic import Alignment
seq1='MHQAIFIYQIGYPLKSGYIQSIRSPEYDNW'
seq2='MH--IFIYQIGYALKSGYIQSIRSPEY-NW'
align=Alignment(Bio.Alphabet.Gapped(IUPAC.protein))	#instance of Alignment class
align.add_sequence('asp',seq1)
align.add_sequence('unk',seq2)
print align
#Alignment methods
#get_all_seqs:	return all sequences in the alignment as a list of SeqRecord
for s in align.get_all_seqs():	#in align: (the same)
	print '->',s.seq
#get_seq_by_num(n): return only the selected sequence by index
print str(align.get_seq_by_num(1))	#Seq object
print align[0]	#SeqRecord object
print str(align[0].seq)
#get_alignment_length(): get length of alignment
print align.get_alignment_length()
#get_column(n): return a string with all the letters in the n column
print align.get_column(0)
print align.get_column(2)

#AlignInfo module: to extract info from alignment objects
from Bio.Align import AlignInfo
#print_info_content function
#SummaryInfo,PSSM classes
开发者ID:ajonjoli,项目名称:Mytest,代码行数:33,代码来源:biopython1.py


注:本文中的Bio.Align.Generic.Alignment.get_all_seqs方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。