當前位置: 首頁>>代碼示例>>Python>>正文


Python Genomics.translate方法代碼示例

本文整理匯總了Python中CGAT.Genomics.translate方法的典型用法代碼示例。如果您正苦於以下問題:Python Genomics.translate方法的具體用法?Python Genomics.translate怎麽用?Python Genomics.translate使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在CGAT.Genomics的用法示例。


在下文中一共展示了Genomics.translate方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: updateSNPs

# 需要導入模塊: from CGAT import Genomics [as 別名]
# 或者: from CGAT.Genomics import translate [as 別名]
    def updateSNPs(self, snp, is_negative_strand, pos):
        """update SNPs."""

        contig = snp.chromosome
        lcontig = self.mFasta.getLength(contig)
        reference_base = snp.reference_base

        if snp.genotype in "ACGTacgt":
            # homozygous substitution
            self.mVariantType.append("O")
        else:
            # heterozygous substitution
            self.mVariantType.append("E")

        # switch reference strand codon to correct strand
        if reference_base != "*" and is_negative_strand:
            reference_base = Genomics.complement(reference_base)

        # collect all possible variants of reference codons
        for reference_codon in self.mReferenceCodons:

            self.mReferenceAAs.append(Genomics.translate(reference_codon))

            # process single base changes
            variant_bases = Genomics.resolveAmbiguousNA(snp.genotype)

            if reference_codon[pos] != reference_base:
                raise ValueError(
                    "base mismatch at %i (codon=%s,%i): codon:%s != genome:%s; `%s`"
                    % (snp.pos, reference_codon, pos, reference_codon[pos], reference_base, ";".join(map(str, snp)))
                )

            for variant_base in variant_bases:
                if is_negative_strand:
                    variant_base = Genomics.complement(variant_base)

        self.mVariantAAs.extend([Genomics.translate(x) for x in self.mVariantCodons])
開發者ID:nishantthakur,項目名稱:cgat,代碼行數:39,代碼來源:snp2table.py

示例2: updateIndels

# 需要導入模塊: from CGAT import Genomics [as 別名]
# 或者: from CGAT.Genomics import translate [as 別名]
    def updateIndels(self, snp, is_negative_strand):

        contig = snp.chromosome
        lcontig = self.mFasta.getLength(contig)

        # get location of insertion/deletion. The location
        # is after position, hence get position and position + 1
        code = self.mAnnotations.getSequence(contig, "+", snp.pos, snp.pos + 2)
        self.mCode = code

        variants = snp.genotype.split("/")
        for variant in variants:

            if variant[0] == "*":
                self.mVariantType.append("W")

            elif variant[0] == "+":
                toinsert = variant[1:]
                self.mVariantType.append("I")

            elif variant[0] == "-":
                todelete = variant[1:]
                # deletions need to be looked at in a wider range
                self.mVariantType.append("D")

            else:
                raise ValueError("unknown variant sign '%s'" % variant[0])

        # ignore non-coding Indels
        if code[0] and code[1] not in 'abcABC':
            return

        if is_negative_strand:
            variants = [Genomics.complement(x) for x in variants]

        for reference_codon in self.mReferenceCodons:

            variants = snp.genotype.split("/")
            variants = [x[1:] for x in variants]

            for variant in variants:
                if len(variant) % 3 != 0:
                    self.mVariantCodons.append("!")
                else:
                    self.mVariantCodons.append(variant)

            self.mVariantAAs.extend(
                [Genomics.translate(x) for x in self.mVariantCodons])
開發者ID:CGATOxford,項目名稱:cgat,代碼行數:50,代碼來源:snp2table.py

示例3: _buildAllele

# 需要導入模塊: from CGAT import Genomics [as 別名]
# 或者: from CGAT.Genomics import translate [as 別名]

#.........這裏部分代碼省略.........
                intron_name, intron_seq5, intron_seq3 = Genomics.GetIntronType(
                    intron_seq)
                if intron_name == "unknown":
                    if intron_seq[:2].islower() and intron_seq[-2:].islower():
                        E.debug("%s: transcript has unknown splice signal - kept because not a variant: %s: %s:%s" %
                                (transcript_id, intron_name, intron_seq5, intron_seq3))
                        nsplice_noncanonical += 1
                    else:
                        is_splice_truncated = True
                        E.debug("%s: transcript has splice truncated allele: %s: %s:%s" %
                                (transcript_id, intron_name, intron_seq5, intron_seq3))
                        break
                # start a new exon
                cds_starts.append(lcds)

            else:
                # treat as frameshifting intron
                #
                # frame-shifting introns are checked if they are
                # fixed by indels either in the intron itself or
                # the terminal exon sequence. To this end, the effective
                # size of the intron is computed:
                # effective size of intron =
                # indels at terminal x bases at previous exon
                # + size of intron
                # + indels at terminal x bases at next exon
                effective_intron_size = len(intron_seq)
                previous_indels = _sumIndels(
                    last_exon_sequence[max(0, -frameshiftsize):])
                next_indels = _sumIndels(exon_sequence[:frameshiftsize])
                effective_intron_size += previous_indels + next_indels

                if previous_indels + next_indels == 0 and len(intron_seq) % 3 == 0:
                    has_stop = "X" in Genomics.translate(intron_seq.upper(),
                                                         is_seleno=is_seleno)
                else:
                    has_stop = False

                if effective_intron_size % 3 == 0 and not has_stop:
                    E.debug("%s: fixed frame-shifting intron %i-%i of size %i (size:%i, indels:%i,%i)" %
                            (transcript_id, last_end, exon.start,
                             effective_intron_size,
                             len(intron_seq),
                             previous_indels, next_indels,))

                    # add to previous exon
                    cds.append(intron_seq)
                    lcds += len(intron_seq)
                    ncorrected_frameshifts += 1
                    new_exon = False
                else:
                    E.debug("%s: could not fix frame-shifting intron %i-%i of size %i (size:%i, indels:%i,%i, has_stop=%i)" %
                            (transcript_id, last_end, exon.start,
                             effective_intron_size,
                             len(intron_seq),
                             previous_indels, next_indels,
                             has_stop))

                    nuncorrected_frameshifts += 1
                    # start a new exon
                    cds_starts.append(lcds)

            if E.global_options.loglevel >= 8:
                print "%i: intron_indels (%i-%i):" % (allele_id, last_end, exon.start)
                if intron_key:
                    for x, c in enumerate(introns[intron_key]):
開發者ID:SCV,項目名稱:cgat,代碼行數:70,代碼來源:gtf2alleles.py

示例4: main

# 需要導入模塊: from CGAT import Genomics [as 別名]
# 或者: from CGAT.Genomics import translate [as 別名]
def main( argv = None ):

    parser = E.OptionParser( version = "%prog version: $Id: analyze_codonbias_shannon.py 2864 2010-03-03 10:18:16Z andreas $",
                                    usage = globals()["__doc__"] )

    parser.add_option( "-c", "--is-cds", dest="is_cds", action="store_true",
                       help = "input are cds (nucleotide) sequences [%default]" )
    
    parser.set_defaults(
        is_cds = False,
        )
    
    (options, args) = E.Start( parser, argv = argv )

    options.stdout.write( "snpid\tidentifier\tpos\treference\tvariant\tcounts\tweight\n" )

    alphabet = "ACDEFGHIKLMNPQRSTVWY"
    
    snpid = 0

    for entry in FastaIterator.iterate( options.stdin ):
        identifier = entry.title

        if options.is_cds:
            cds_sequence = entry.sequence.upper()
            assert len(cds_sequence) % 3 == 0, \
                "length of sequence '%s' is not a multiple of 3" % entry.title

            sequence = Genomics.translate( cds_sequence )
            weights = []
            for pos, cds_pos in enumerate(range( 0, len(cds_sequence), 3)):
                codon = cds_sequence[cds_pos:cds_pos+3]
                counts = collections.defaultdict(int)
                for x in range(0,3):
                    rna = codon[x]
                    for na in "ACGT":
                        if na == rna: continue
                        taa = Genomics.translate(codon[:x] + na + codon[x+1:])
                        counts[taa] += 1
                weights.append( counts )

        else:
            sequence = entry.sequence.upper()
            counts = {}
            for x in alphabet: counts[x] = 1
            weights = [counts] * len(sequence)

        for pos, ref in enumerate( sequence ):

            if ref not in alphabet: continue
            w = weights[pos]
            t = float(sum(w.values()))
            for variant in alphabet:
                if variant == ref: continue
                snpid +=1
                options.stdout.write( 
                    "%s\n" % "\t".join(
                        ( "%010i" % snpid,
                          identifier,
                          str(pos+1),
                          ref, 
                          variant,
                          "%i" % w[variant],
                          "%6.4f" % (w[variant] / t),
                          )))
    
    E.Stop()
開發者ID:BioinformaticsArchive,項目名稱:cgat,代碼行數:69,代碼來源:fasta2variants.py


注:本文中的CGAT.Genomics.translate方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。