本文整理匯總了Python中CGAT.Genomics.translate方法的典型用法代碼示例。如果您正苦於以下問題:Python Genomics.translate方法的具體用法?Python Genomics.translate怎麽用?Python Genomics.translate使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類CGAT.Genomics
的用法示例。
在下文中一共展示了Genomics.translate方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: updateSNPs
# 需要導入模塊: from CGAT import Genomics [as 別名]
# 或者: from CGAT.Genomics import translate [as 別名]
def updateSNPs(self, snp, is_negative_strand, pos):
"""update SNPs."""
contig = snp.chromosome
lcontig = self.mFasta.getLength(contig)
reference_base = snp.reference_base
if snp.genotype in "ACGTacgt":
# homozygous substitution
self.mVariantType.append("O")
else:
# heterozygous substitution
self.mVariantType.append("E")
# switch reference strand codon to correct strand
if reference_base != "*" and is_negative_strand:
reference_base = Genomics.complement(reference_base)
# collect all possible variants of reference codons
for reference_codon in self.mReferenceCodons:
self.mReferenceAAs.append(Genomics.translate(reference_codon))
# process single base changes
variant_bases = Genomics.resolveAmbiguousNA(snp.genotype)
if reference_codon[pos] != reference_base:
raise ValueError(
"base mismatch at %i (codon=%s,%i): codon:%s != genome:%s; `%s`"
% (snp.pos, reference_codon, pos, reference_codon[pos], reference_base, ";".join(map(str, snp)))
)
for variant_base in variant_bases:
if is_negative_strand:
variant_base = Genomics.complement(variant_base)
self.mVariantAAs.extend([Genomics.translate(x) for x in self.mVariantCodons])
示例2: updateIndels
# 需要導入模塊: from CGAT import Genomics [as 別名]
# 或者: from CGAT.Genomics import translate [as 別名]
def updateIndels(self, snp, is_negative_strand):
contig = snp.chromosome
lcontig = self.mFasta.getLength(contig)
# get location of insertion/deletion. The location
# is after position, hence get position and position + 1
code = self.mAnnotations.getSequence(contig, "+", snp.pos, snp.pos + 2)
self.mCode = code
variants = snp.genotype.split("/")
for variant in variants:
if variant[0] == "*":
self.mVariantType.append("W")
elif variant[0] == "+":
toinsert = variant[1:]
self.mVariantType.append("I")
elif variant[0] == "-":
todelete = variant[1:]
# deletions need to be looked at in a wider range
self.mVariantType.append("D")
else:
raise ValueError("unknown variant sign '%s'" % variant[0])
# ignore non-coding Indels
if code[0] and code[1] not in 'abcABC':
return
if is_negative_strand:
variants = [Genomics.complement(x) for x in variants]
for reference_codon in self.mReferenceCodons:
variants = snp.genotype.split("/")
variants = [x[1:] for x in variants]
for variant in variants:
if len(variant) % 3 != 0:
self.mVariantCodons.append("!")
else:
self.mVariantCodons.append(variant)
self.mVariantAAs.extend(
[Genomics.translate(x) for x in self.mVariantCodons])
示例3: _buildAllele
# 需要導入模塊: from CGAT import Genomics [as 別名]
# 或者: from CGAT.Genomics import translate [as 別名]
#.........這裏部分代碼省略.........
intron_name, intron_seq5, intron_seq3 = Genomics.GetIntronType(
intron_seq)
if intron_name == "unknown":
if intron_seq[:2].islower() and intron_seq[-2:].islower():
E.debug("%s: transcript has unknown splice signal - kept because not a variant: %s: %s:%s" %
(transcript_id, intron_name, intron_seq5, intron_seq3))
nsplice_noncanonical += 1
else:
is_splice_truncated = True
E.debug("%s: transcript has splice truncated allele: %s: %s:%s" %
(transcript_id, intron_name, intron_seq5, intron_seq3))
break
# start a new exon
cds_starts.append(lcds)
else:
# treat as frameshifting intron
#
# frame-shifting introns are checked if they are
# fixed by indels either in the intron itself or
# the terminal exon sequence. To this end, the effective
# size of the intron is computed:
# effective size of intron =
# indels at terminal x bases at previous exon
# + size of intron
# + indels at terminal x bases at next exon
effective_intron_size = len(intron_seq)
previous_indels = _sumIndels(
last_exon_sequence[max(0, -frameshiftsize):])
next_indels = _sumIndels(exon_sequence[:frameshiftsize])
effective_intron_size += previous_indels + next_indels
if previous_indels + next_indels == 0 and len(intron_seq) % 3 == 0:
has_stop = "X" in Genomics.translate(intron_seq.upper(),
is_seleno=is_seleno)
else:
has_stop = False
if effective_intron_size % 3 == 0 and not has_stop:
E.debug("%s: fixed frame-shifting intron %i-%i of size %i (size:%i, indels:%i,%i)" %
(transcript_id, last_end, exon.start,
effective_intron_size,
len(intron_seq),
previous_indels, next_indels,))
# add to previous exon
cds.append(intron_seq)
lcds += len(intron_seq)
ncorrected_frameshifts += 1
new_exon = False
else:
E.debug("%s: could not fix frame-shifting intron %i-%i of size %i (size:%i, indels:%i,%i, has_stop=%i)" %
(transcript_id, last_end, exon.start,
effective_intron_size,
len(intron_seq),
previous_indels, next_indels,
has_stop))
nuncorrected_frameshifts += 1
# start a new exon
cds_starts.append(lcds)
if E.global_options.loglevel >= 8:
print "%i: intron_indels (%i-%i):" % (allele_id, last_end, exon.start)
if intron_key:
for x, c in enumerate(introns[intron_key]):
示例4: main
# 需要導入模塊: from CGAT import Genomics [as 別名]
# 或者: from CGAT.Genomics import translate [as 別名]
def main( argv = None ):
parser = E.OptionParser( version = "%prog version: $Id: analyze_codonbias_shannon.py 2864 2010-03-03 10:18:16Z andreas $",
usage = globals()["__doc__"] )
parser.add_option( "-c", "--is-cds", dest="is_cds", action="store_true",
help = "input are cds (nucleotide) sequences [%default]" )
parser.set_defaults(
is_cds = False,
)
(options, args) = E.Start( parser, argv = argv )
options.stdout.write( "snpid\tidentifier\tpos\treference\tvariant\tcounts\tweight\n" )
alphabet = "ACDEFGHIKLMNPQRSTVWY"
snpid = 0
for entry in FastaIterator.iterate( options.stdin ):
identifier = entry.title
if options.is_cds:
cds_sequence = entry.sequence.upper()
assert len(cds_sequence) % 3 == 0, \
"length of sequence '%s' is not a multiple of 3" % entry.title
sequence = Genomics.translate( cds_sequence )
weights = []
for pos, cds_pos in enumerate(range( 0, len(cds_sequence), 3)):
codon = cds_sequence[cds_pos:cds_pos+3]
counts = collections.defaultdict(int)
for x in range(0,3):
rna = codon[x]
for na in "ACGT":
if na == rna: continue
taa = Genomics.translate(codon[:x] + na + codon[x+1:])
counts[taa] += 1
weights.append( counts )
else:
sequence = entry.sequence.upper()
counts = {}
for x in alphabet: counts[x] = 1
weights = [counts] * len(sequence)
for pos, ref in enumerate( sequence ):
if ref not in alphabet: continue
w = weights[pos]
t = float(sum(w.values()))
for variant in alphabet:
if variant == ref: continue
snpid +=1
options.stdout.write(
"%s\n" % "\t".join(
( "%010i" % snpid,
identifier,
str(pos+1),
ref,
variant,
"%i" % w[variant],
"%6.4f" % (w[variant] / t),
)))
E.Stop()