当前位置: 首页>>代码示例>>Python>>正文


Python Seq.MutableSeq类代码示例

本文整理汇总了Python中Bio.Seq.MutableSeq的典型用法代码示例。如果您正苦于以下问题:Python MutableSeq类的具体用法?Python MutableSeq怎么用?Python MutableSeq使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了MutableSeq类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: create_genome_seq

def create_genome_seq(aligned):

    aligned_seq = aligned.seq if type(aligned.seq) == str else aligned.seq.decode('UTF-8')
        
    genome_seq = MutableSeq(aligned_seq)

    # see samtools documentation for MD string
    
    err = re.findall(MD_REGEX, aligned.opt("MD"))
    
    seq_ix = 0
    
    # step through sequence
    for matched_bases, curr_err in err:
        
        seq_ix += int(matched_bases)
        
        assert '^' not in curr_err
        assert curr_err != genome_seq[seq_ix]
        
        genome_seq[seq_ix] = curr_err
        seq_ix += 1
    
    if aligned.is_reverse:
        genome_seq.reverse_complement()

    return genome_seq
开发者ID:StoreyLab,项目名称:sparta,代码行数:27,代码来源:calculate_mismatch_probs.py

示例2: Gthg01471

def Gthg01471():
    ori=Seq("ATGAGCATAAGTTTATCGGTTCCAAAATGGTTATTAACAGTTTTATCAATTTTATCTTTAGTCGTAGCATTTATTTTCGGTACCGTTTCCAATGCATCAGCAACAATTAACTATGGGGAGGAAGTCGCGGCAGTAGCAAATGACTATGTAGGAAGCCCATATAAATATGGAGGTACAACGCCAAAAGGATTTGATGCGAGTGGCTTTACTCAGTATGTGTATAAAAATGCTGCAACCAAATTGGCTATTCCGCGAACGAGTGCCGCACAGTATAAAGTCGGTAAATTTGTTAAACAAAGTGCGTTACAAAGAGGCGATTTAGTGTTTTATGCAACAGGAGCAAAAGGAAAGGTATCCTTTGTGGGAATTTATAATGGAAATGGTACGTTTATTGGTGCCACATCAAAAGGGGTAAAAGTGGTTAAAATGAGTGATAAATATTGGAAAGACCGGTATATAGGGGCTAAGCGAGTCATTAAGTAA", IUPAC.unambiguous_dna)
    mut=MutableSeq("ATGAGCATAAGTTTATCGGTTCCAAAATGGTTATTAACAGTTTTATCAATTTTATCTTTAGTCGTAGCATTTATTTTCGGTACCGTTTCCAATGCATCAGCAACAATTAACTATGGGGAGGAAGTCGCGGCAGTAGCAAATGACTATGTAGGAAGCCCATATAAATATGGAGGTACAACGCCAAAAGGATTTGATGCGAGTGGCTTTACTCAGTATGTGTATAAAAATGCTGCAACCAAATTGGCTATTCCGCGAACGAGTGCCGCACAGTATAAAGTCGGTAAATTTGTTAAACAAAGTGCGTTACAAAGAGGCGATTTAGTGTTTTATGCAACAGGAGCAAAAGGAAAGGTATCCTTTGTGGGAATTTATAATGGAAATGGTACGTTTATTGGTGCCACATCAAAAGGGGTAAAAGTGGTTAAAATGAGTGATAAATATTGGAAAGACCGGTATATAGGGGCTAAGCGAGTCATTAAGTAA", IUPAC.unambiguous_dna)

    a="AGTCGA"
    b="GACTAG"
    for i,v in enumerate([259,277,282,295,299,306]):
        print(mut[v-1]+a[i])
        mut[v-1]=b[i]
    print(ori.translate())
    print(mut.toseq().translate())
开发者ID:matteoferla,项目名称:Geobacillus,代码行数:11,代码来源:geo_mutagenesis.py

示例3: generate_rolls

def generate_rolls(num_rolls):
    """Generate a bunch of rolls corresponding to the casino probabilities.

    Returns:

    - The generate roll sequence
    - The state sequence that generated the roll.

    """
    # start off in the fair state
    cur_state = 'F'
    roll_seq = MutableSeq('', DiceRollAlphabet())
    state_seq = MutableSeq('', DiceTypeAlphabet())

    # generate the sequence
    for roll in range(num_rolls):
        state_seq.append(cur_state)
        # generate a random number
        chance_num = random.random()

        # add on a new roll to the sequence
        new_roll = _loaded_dice_roll(chance_num, cur_state)
        roll_seq.append(new_roll)

        # now give us a chance to switch to a new state
        chance_num = random.random()
        if cur_state == 'F':
            if chance_num <= .05:
                cur_state = 'L'
        elif cur_state == 'L':
            if chance_num <= .1:
                cur_state = 'F'

    return roll_seq.toseq(), state_seq.toseq()
开发者ID:andrewguy,项目名称:biopython,代码行数:34,代码来源:test_HMMCasino.py

示例4: random_population

def random_population(genome_alphabet, genome_size, num_organisms,
                      fitness_calculator):
    """Generate a population of individuals with randomly set genomes.

    Arguments:

    o genome_alphabet -- An Alphabet object describing all of the
    possible letters that could potentially be in the genome of an
    organism.

    o genome_size -- The size of each organisms genome.

    o num_organism -- The number of organisms we want in the population.

    o fitness_calculator -- A function that will calculate the fitness
    of the organism when given the organisms genome.
    """
    all_orgs = []

    # a random number generator to get letters for the genome
    letter_rand = random.Random()

    # figure out what type of characters are in the alphabet
    if isinstance(genome_alphabet.letters[0], str):
        if sys.version_info[0] == 3:
            alphabet_type = "u"  # Use unicode string on Python 3
        else:
            alphabet_type = "c"  # Use byte string on Python 2
    elif isinstance(genome_alphabet.letters[0], int):
        alphabet_type = "i"
    elif isinstance(genome_alphabet.letters[0], float):
        alphabet_type = "d"
    else:
        raise ValueError(
            "Alphabet type is unsupported: %s" % genome_alphabet.letters)

    for org_num in range(num_organisms):
        new_genome = MutableSeq(array.array(alphabet_type), genome_alphabet)

        # generate the genome randomly
        for gene_num in range(genome_size):
            new_gene = letter_rand.choice(genome_alphabet.letters)
            new_genome.append(new_gene)

        # add the new organism with this genome
        all_orgs.append(Organism(new_genome, fitness_calculator))

    return all_orgs
开发者ID:DavidCain,项目名称:biopython,代码行数:48,代码来源:Organism.py

示例5: add_to_pileup_dict

def add_to_pileup_dict(sams, aligned_read_set, pileup_dict):
    
    # sanity check that all the qnames (RNA read IDs) are the same
    for read in aligned_read_set:
        assert read.qname == aligned_read_set[0].qname

    if not True in [read.is_unmapped for read in aligned_read_set]:
        
        # all alignments mapped
        for read in aligned_read_set:
            
            for op, op_len in read.cigar:
                
                if op > 0 and op < 7:
                    # do not sample reads where there are insertions or deletions   
                    return
                    
            assert len(read.seq) == len(aligned_read_set[0].seq)
          
        # if aligned reads are reversed, we reverse them and hold on to that info.
        
        pos_dicts = [dict(read.aligned_pairs) for read in aligned_read_set]
        genome_seqs = [create_genome_seq(read) for read in aligned_read_set]
        qual = bytearray(aligned_read_set[0].qual, 'utf-8')
        seq = MutableSeq(aligned_read_set[0].seq if type(aligned_read_set[0].seq) == str else aligned_read_set[0].seq.decode('UTF-8'))  
        if aligned_read_set[0].is_reverse:
            seq.reverse_complement()
            qual = qual[::-1]
        
        for genome_seq in genome_seqs:
            assert len(genome_seq) == len(seq)

        for i in range(0, len(seq)):
                        
            # need (chrom, pos, genome_seq[i]) tuples for each aligned_read
            chroms = [sam.getrname(a.tid) for sam, a in izip(sams, aligned_read_set)]
            positions = [d[i] if not a.is_reverse else d[len(seq) - i - 1] for d, a in zip(pos_dicts, aligned_read_set)]
            genome_seq_i = [g[i] for g in genome_seqs]
            
            genomic_locs = tuple(zip(chroms, positions, genome_seq_i))
            
            pileup_dict[genomic_locs][seq[i]][qual[i]] += 1
开发者ID:StoreyLab,项目名称:sparta,代码行数:42,代码来源:calculate_mismatch_probs.py

示例6: __init__

    def __init__(self, sequence, hmmLength, origSeqLength, evalue, seqStart=None, seqEnd=None, hmmStart=None, hmmEnd=None):
        """Intialise HMMSequence with the hmmer unit. Must run align and 
        determineGapPositions.

        Parameters:
        unit - HMMUnit object. 
        hmmLength - int. length of the HMM.
        align - boolean. Enables alignment algorithm based on HMM values.
                See HMMSequence.align() for more info.
        gaps - boolean. Enables gap counting algorithm to create pileup.
               See HMMSequence.determineGapPositions()
        """
        self.hmmLength = int(hmmLength)
        self.gaps = [0]*self.hmmLength
        self.origSeqLength = origSeqLength
        self.evalue = evalue
        self.seqStart = seqStart
        self.seqEnd = seqEnd
        self.hmmStart = hmmStart
        self.hmmEnd = hmmEnd
        HMMPileUp.total_seqs += 1
        MutableSeq.__init__(self, sequence)
开发者ID:edraizen,项目名称:HMMPileUp,代码行数:22,代码来源:HMMPileUp.py

示例7: seq_batch_query

def seq_batch_query():
    con = sqlite3.connect('./data/DB')
    cur = con.cursor()
    list_file = input('list file name:\n')
    with open(list_file, 'r') as In:
        organism_list = In.read().split(sep='\n')
    cur.execute('CREATE TABLE IF NOT EXISTS tasklist (Name TEXT);')
    for organism in organism_list:
        cur.execute('INSERT INTO tasklist (Name) VALUES (?);', (organism,))
    cur.execute(
        'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism IN (SELECT Name FROM tasklist) ORDER BY Head',
        (organism))
    result = cur.fetchall()
    cur.execute('DROP TABLE tasklist;')
    cur.close()
    con.close()
    query_result = []
    for i in result:
        title = '|'.join([str(i[0]), i[1], i[2], i[3]])
        filename = i[2]
        sequence = MutableSeq(i[5])
        if i[4] == '-1':
            sequence.seq = sequence.reverse_complement()
        record = [title, filename, sequence]
        query_result.append(record)
    for i in query_result:
        with open(''.join(['./out/', i[1], '.fasta']), 'a') as Fileout:
            Fileout.write('>%s\n%s\n' % (i[0], i[2]))
            # rps12 may have larger than 50k fragments,  here to filter it
    rps12 = SeqIO.parse('./out/rps12.fasta', 'fasta')
    rps12short = list()
    for item in rps12:
        if len(item.seq) < 4000:
            rps12short.append(item)
    SeqIO.write(rps12short, './out/rps12short.fasta', 'fasta')
    print('Done.\n')
开发者ID:jangwen,项目名称:python,代码行数:36,代码来源:portable.py

示例8: Seq

print CodonTable.unambiguous_dna_by_id[2].start_codons
print CodonTable.unambiguous_dna_by_id[1].forward_table['ACG']    # which aminoacid for this codon

#Comparing Sequences
seq1 = Seq('ACGT',IUPAC.unambiguous_dna)
seq2 = Seq('ACGT',IUPAC.unambiguous_dna)
seq3 = Seq('ACGT',IUPAC.protein)
print id(seq1) == id(seq2)    # seq1 == seq2 look for the same object
print str(seq1) == str(seq2)    # convert to string
print str(seq1) == str(seq3)    # dna similar enought to protein

#MutableSeq
from Bio.Seq import MutableSeq
mutseq = seq1.tomutable()    # convert to MutableSeq
print mutseq, type(mutseq)
mutSeq = MutableSeq('CGTTTAAGCTGC',IUPAC.unambiguous_dna)
print mutSeq, type(mutSeq)
mutseq[1]='T'    # imposible on simple Seq
print mutseq
seq1 = mutseq.toseq()    # convert to Seq
mutSeq.remove('A')    # remove first A
mutSeq[2:-5]='TTTT'
mutSeq.reverse()    # reverse() and reverse_complement() change object itself
print mutSeq
#MutableSeq can't be a dictionary key, Seq and string can

#UnknownSeq
# Subclass of Seq when you know length but not the characters to save memory
from Bio.Seq import UnknownSeq
unk = UnknownSeq(25)
print unk, len(unk), type(unk)
开发者ID:ajonjoli,项目名称:Mytest,代码行数:31,代码来源:biopython2.py

示例9: viterbi

    def viterbi(self, sequence, state_alphabet):
        """Calculate the most probable state path using the Viterbi algorithm.

        This implements the Viterbi algorithm (see pgs 55-57 in Durbin et
        al for a full explanation -- this is where I took my implementation
        ideas from), to allow decoding of the state path, given a sequence
        of emissions.

        Arguments:

        o sequence -- A Seq object with the emission sequence that we
        want to decode.

        o state_alphabet -- The alphabet of the possible state sequences
        that can be generated.
        """

        # calculate logarithms of the initial, transition, and emission probs
        log_initial = self._log_transform(self.initial_prob)
        log_trans = self._log_transform(self.transition_prob)
        log_emission = self._log_transform(self.emission_prob)

        viterbi_probs = {}
        pred_state_seq = {}
        state_letters = state_alphabet.letters

        # --- recursion
        # loop over the training squence (i = 1 .. L)
        # NOTE: My index numbers are one less than what is given in Durbin
        # et al, since we are indexing the sequence going from 0 to
        # (Length - 1) not 1 to Length, like in Durbin et al.
        for i in range(0, len(sequence)):
            # loop over all of the possible i-th states in the state path
            for cur_state in state_letters:
                # e_{l}(x_{i})
                emission_part = log_emission[(cur_state, sequence[i])]

                max_prob = 0
                if i == 0:
                    # for the first state, use the initial probability rather
                    # than looking back to previous states
                    max_prob = log_initial[cur_state]
                else:
                    # loop over all possible (i-1)-th previous states
                    possible_state_probs = {}
                    for prev_state in self.transitions_to(cur_state):
                        # a_{kl}
                        trans_part = log_trans[(prev_state, cur_state)]

                        # v_{k}(i - 1)
                        viterbi_part = viterbi_probs[(prev_state, i - 1)]
                        cur_prob = viterbi_part + trans_part

                        possible_state_probs[prev_state] = cur_prob

                    # calculate the viterbi probability using the max
                    max_prob = max(possible_state_probs.values())

                # v_{k}(i)
                viterbi_probs[(cur_state, i)] = (emission_part + max_prob)

                if i > 0:
                    # get the most likely prev_state leading to cur_state
                    for state in possible_state_probs:
                        if possible_state_probs[state] == max_prob:
                            pred_state_seq[(i - 1, cur_state)] = state
                            break
                    
        # --- termination
        # calculate the probability of the state path
        # loop over all states
        all_probs = {}
        for state in state_letters:
            # v_{k}(L)
            all_probs[state] = viterbi_probs[(state, len(sequence) - 1)]

        state_path_prob = max(all_probs.values())

        # find the last pointer we need to trace back from
        last_state = ''
        for state in all_probs:
            if all_probs[state] == state_path_prob:
                last_state = state

        assert last_state != '', "Didn't find the last state to trace from!"
                
        # --- traceback
        traceback_seq = MutableSeq('', state_alphabet)
        
        loop_seq = range(1, len(sequence))
        loop_seq.reverse()

        # last_state is the last state in the most probable state sequence.
        # Compute that sequence by walking backwards in time. From the i-th
        # state in the sequence, find the (i-1)-th state as the most
        # probable state preceding the i-th state.
        state = last_state
        traceback_seq.append(state)
        for i in loop_seq:
            state = pred_state_seq[(i - 1, state)]
#.........这里部分代码省略.........
开发者ID:wgillett,项目名称:biopython,代码行数:101,代码来源:MarkovModel.py

示例10: print

    "AATCGTGGCTATTACTGGGATGGAGGTCACTGGCGCGACCACGGCTGGTGGAAACAACAT" +
    "TATGAATGGCGAGGCAATCGCTGGCACCTACACGGACCGCCGCCACCGCCGCGCCACCAT" +
    "AAGAAAGCTCCTCATGATCATCACGGCGGTCATGGTCCAGGCAAACATCACCGCTAA",
    generic_dna)

print(gene.translate(table="Bacterial"))
print(gene.translate(table="Bacterial", cds=True))

##查看密码子表
from Bio.Data import CodonTable
standard_table = CodonTable.unambiguous_dna_by_name["Standard"]
mito_table = CodonTable.unambiguous_dna_by_id[2]

print(standard_table)
print(mito_table.start_codons)
print(mito_table.stop_codons)
print(mito_table.forward_table["ACG"])

##可变对象
from Bio.Seq import MutableSeq
mutable_seq = MutableSeq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA", IUPAC.unambiguous_dna)
print(mutable_seq)
mutable_seq[5] = "C"
print(mutable_seq)
mutable_seq.remove("T")
print(mutable_seq)
mutable_seq.reverse()
print(mutable_seq)
new_seq = mutable_seq.toseq()
print(new_seq)
开发者ID:guochangjiang,项目名称:Python.learn,代码行数:30,代码来源:03.code.py

示例11: seq_query

def seq_query():
    """Sequence query function,  to be continued.
    """
    query_type = input(
        '1.Specific fragment\n'
        '2.Specific Organism\n'
        '3.Specific gene\n'
        '4.All\n'
        '5.All cds\n'
    )
    organize = input('Organize output?(y/n)\n')
    if query_type not in ['1', '2', '3', '4', '5']:
        raise ValueError('wrong input!\n')
    con = sqlite3.connect('./data/DB')
    cur = con.cursor()
    if query_type == '1':
        organism = input('Organism:\n')
        gene = input('Gene:\n')
        frag_type = input('Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer):\n')
        cur.execute(
            'SELECT Taxon, Organism, Name, Type, Strand, Sequence FROM main WHERE Name LIKE ? AND Type = ? AND Organism=?',
            ('%' + gene + '%', frag_type, organism))
        result = cur.fetchall()
    elif query_type == '2':
        organism = input('Organism:\n')
        frag_type = input('Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer, whole, fragments):\n')
        if frag_type == 'fragments':
            cur.execute(
                'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism = ?  ORDER BY Head',
                (organism,))
        else:
            cur.execute(
                'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE Organism LIKE ? AND Type = ? ORDER BY Head',
                ('%' + organism + '%', frag_type))
        result = cur.fetchall()
    elif query_type == '3':
        gene = input('Gene:\n')
        frag_type = input('Fragment type(gene, cds, rRNA, tRNA, exon, intron, spacer):\n')
        cur.execute(
            'SELECT Taxon, Organism, Name, Type, Strand, Sequence FROM main WHERE Name LIKE ? AND Type = ? ORDER BY Taxon',
            ('%' + gene + '%', frag_type))
        result = cur.fetchall()
    elif query_type == '4':
        cur.execute('SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main ORDER BY Taxon')
        result = cur.fetchall()
    elif query_type == '5':
        cur.execute(
            'SELECT Taxon, Organism, Name, Type, Strand, Sequence, Head FROM main WHERE type = "cds" ORDER BY Taxon')
        result = cur.fetchall()

    query_result = []
    for i in result:
        title = '|'.join([str(i[0]), i[1], i[2], i[3]])
        sequence = MutableSeq(i[5])
        gene = i[2]
        if i[4] == '-1':
            sequence.seq = sequence.reverse_complement()
        record = [title, gene, sequence]
        query_result.append(record)

    if organize == 'y':
        if not exists('output'):
            makedirs('output')
        for i in query_result:
            file_name = ''.join([
                'output',
                '/',
                i[1].replace('/', ''),
                '.fasta'
            ])
            with open(file_name, 'a') as output_file:
                output_file.write('>%s\n%s\n' % (i[0], i[2]))
    else:
        output = input('Enter output filename:\n')
        with open('.'.join([output, 'fasta']), 'w') as output_file:
            for i in query_result:
                output_file.write('>%s\n%s\n' % (i[0], i[2]))

    cur.close()
    con.close()
    print('Done.\n')
开发者ID:jangwen,项目名称:python,代码行数:81,代码来源:portable.py

示例12: Seq

#print gene
#YAAX = yaaX.translate(table='Bacterial', cds=True, to_stop=True)
#print YAAX

#playing with codon usage tables
#from Bio.Data import CodonTable
#standard_table = CodonTable.unambiguous_dna_by_name["Standard"]
#mito_table = CodonTable.unambiguous_dna_by_name["Vertebrate Mitochondrial"]
#print standard_table

#mutable seq objects
from Bio.Seq import Seq
from Bio.Seq import MutableSeq
from Bio.Alphabet import IUPAC
#my_seq = Seq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA", IUPAC.unambiguous_dna)
#mutable_seq = my_seq.tomutable()
#Or just create a mutable seq!
my_seq = MutableSeq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA", IUPAC.unambiguous_dna)
print my_seq
#my_seq_div = my_seq
#my_seq_div[5:8] = 'tag' #how to do insertions????????  only can replace as many characters as indicated.  wait it works now.  
#why 5:8?
#print my_seq #why does this print as my_seq_div with SNP?  
#print my_seq_div
#my_seq_del = my_seq_div.remove("T")
#print my_seq_del
my_seq_rev = my_seq.reverse() #should be able to do my_seq.reverse_complement() as well
print my_seq_rev #this should be working, but it returning None

fin_seq = my_seq_div.toseq() #converts back to immutable Seq Object
开发者ID:cgregg,项目名称:codonmassager,代码行数:30,代码来源:318_test1.py

示例13: translation


# Direct translation (DNA -> Protein
from Bio.Seq import Seq, translate
from Bio.Alphabet import IUPAC
coding_dna = Seq("ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG", IUPAC.unambiguous_dna)
translate(coding_dna)

# we can specify other translation tables by name
translate(coding_dna, table="Vertebrate Mitochondrial")
# or by NCBI number
translate(coding_dna, table=2)

# 3.9 Transcription and Translation

# 3.10 Mutable Seqs
# convert existing sequence to mutable
mutable_seq = my_seq.tomutable()  
# or directly create a mutable one
from Bio.Seq import MutableSeq
from Bio.Alphabet import IUPAC
mutable_seq = MutableSeq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA", IUPAC.unambiguous_dna)

# now we can do
mutable_seq[5] = "T"

# and convert it back to an inmutable seq
new_seq = mutable_seq.toseq()


开发者ID:BlackAdder84,项目名称:Bioinformatics,代码行数:26,代码来源:BioPython.py

示例14: viterbi

    def viterbi(self, sequence, state_alphabet):
        """Calculate the most probable state path using the Viterbi algorithm.

        This implements the Viterbi algorithm (see pgs 55-57 in Durbin et
        al for a full explanation -- this is where I took my implementation
        ideas from), to allow decoding of the state path, given a sequence
        of emissions.

        Arguments:

        o sequence -- A Seq object with the emission sequence that we
        want to decode.

        o state_alphabet -- The alphabet of the possible state sequences
        that can be generated.
        """
        # calculate logarithms of the transition and emission probs
        log_trans = self._log_transform(self.transition_prob)
        log_emission = self._log_transform(self.emission_prob)

        viterbi_probs = {}
        pred_state_seq = {}
        state_letters = state_alphabet.letters
        # --- initialization
        #
        # NOTE: My index numbers are one less than what is given in Durbin
        # et al, since we are indexing the sequence going from 0 to
        # (Length - 1) not 1 to Length, like in Durbin et al.
        #
        # v_{0}(0) = 1
        viterbi_probs[(state_letters[0], -1)] = 1
        # v_{k}(0) = 0 for k > 0
        for state_letter in state_letters[1:]:
            viterbi_probs[(state_letter, -1)] = 0

        # --- recursion
        # loop over the training squence (i = 1 .. L)
        for i in range(0, len(sequence)):
            # now loop over all of the letters in the state path
            for main_state in state_letters:
                # e_{l}(x_{i})
                emission_part = log_emission[(main_state, sequence[i])]

                # loop over all possible states
                possible_state_probs = {}
                for cur_state in self.transitions_from(main_state):
                    # a_{kl}
                    trans_part = log_trans[(cur_state, main_state)]

                    # v_{k}(i - 1)
                    viterbi_part = viterbi_probs[(cur_state, i - 1)]
                    cur_prob = viterbi_part + trans_part

                    possible_state_probs[cur_state] = cur_prob

                # finally calculate the viterbi probability using the max
                max_prob = max(possible_state_probs.values())
                viterbi_probs[(main_state, i)] = (emission_part + max_prob)

                # now get the most likely state
                for state in possible_state_probs:
                    if possible_state_probs[state] == max_prob:
                        pred_state_seq[(i - 1, main_state)] = state
                        break
                    
        # --- termination
        # calculate the probability of the state path
        # loop over all letters
        all_probs = {}
        for state in state_letters:
            # v_{k}(L)
            viterbi_part = viterbi_probs[(state, len(sequence) - 1)]
            # a_{k0}
            transition_part = log_trans[(state, state_letters[0])]

            all_probs[state] = viterbi_part * transition_part

        state_path_prob = max(all_probs.values())

        # find the last pointer we need to trace back from
        last_state = ''
        for state in all_probs:
            if all_probs[state] == state_path_prob:
                last_state = state

        assert last_state != '', "Didn't find the last state to trace from!"
                
        # --- traceback
        traceback_seq = MutableSeq('', state_alphabet)
        
        loop_seq = range(0, len(sequence))
        loop_seq.reverse()

        cur_state = last_state
        for i in loop_seq:
            traceback_seq.append(cur_state)
            
            cur_state = pred_state_seq[(i - 1, cur_state)]

        # put the traceback sequence in the proper orientation
#.........这里部分代码省略.........
开发者ID:BlogomaticProject,项目名称:Blogomatic,代码行数:101,代码来源:MarkovModel.py

示例15: len

                if VERBOSE >= 1:
                    print pname+':', gene, 'not found or with problems: skipping.'
                continue
    
            gene_pos = gene_poss[gene]
            aft_der_gene = np.concatenate([aft_der[:, :, exon_pos[0]: exon_pos[1]]
                                           for exon_pos in gene_pos], axis=2)
            conss_gene = gene_seqs[gene]
            gene_len = len(conss_gene)

            hist += np.histogram(aft_der_gene.ravel(), bins=bins, density=False)[0]

            # Collect counts syn/nonsyn
            nu_syn = []
            nu_nonsyn = []
            cod_anc = MutableSeq('AAA', unambiguous_dna)
            cod_new = MutableSeq('AAA', unambiguous_dna)
            for j in xrange(gene_len // 3):
                for jcod in xrange(3):
                    for ai in xrange(4):
                        cod_anc[:] = conss_gene[3 * j: 3 * (j+1)]
                        # Ancestral allele, skip (we only look at propagation of MINOR alleles)
                        if alpha[ai] == cod_anc[jcod]:
                            continue
    
                        cod_new[:] = conss_gene[3 * j: 3 * (j+1)]
                        cod_new[jcod] = alpha[ai]
    
                        aftmp = aft_der_gene[:, ai, j + jcod]
                        aftmp = aftmp[(aftmp >= bins[0]) & (aftmp <= bins[-1])]
                        if not len(aftmp):
开发者ID:iosonofabio,项目名称:hivwholeseq,代码行数:31,代码来源:get_SFS_gene.py


注:本文中的Bio.Seq.MutableSeq类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。