本文整理汇总了Python中CGAT.Genomics.resolveReverseAmbiguousNA方法的典型用法代码示例。如果您正苦于以下问题:Python Genomics.resolveReverseAmbiguousNA方法的具体用法?Python Genomics.resolveReverseAmbiguousNA怎么用?Python Genomics.resolveReverseAmbiguousNA使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类CGAT.Genomics
的用法示例。
在下文中一共展示了Genomics.resolveReverseAmbiguousNA方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: buildCompactVariantSequences
# 需要导入模块: from CGAT import Genomics [as 别名]
# 或者: from CGAT.Genomics import resolveReverseAmbiguousNA [as 别名]
def buildCompactVariantSequences(variants, sequences):
'''build variant sequences by inserting ``variants`` into ``sequences``.
The original frame of the sequence is maintained by
converting the input sequence to a list. Each entry
in the list corresponds to a position in a wild type.
The wild type (WT) sequence is lower case
SNP: variant (ambiguity codes for variants)
homozygous insertion: upper-case bases after lower-case (WT) base
heterozygous insertion: lower-case bases after lower-case (WT) base
homozygous deletion: empty fields
heterozygous deletion: "-" after lower-case (WT) base
returns a dictionary of lists.
'''
result = {}
for key, sequence in sequences.iteritems():
variant_seq = list(sequence.lower())
start, end = key
# get all variants that overlap with sequences
for var_start, var_end, values in variants.find(start, end):
reference, action, has_wildtype, variantseqs = values
is_homozygous = len(variantseqs) == 1 and not has_wildtype
rel_start, rel_end = var_start - start, var_end - start
startoffset = max(0, start - var_start)
endoffset = max(0, var_end - end)
if action == "=":
assert rel_start >= 0
assert sequence[rel_start].upper() == reference, \
'reference base mismatch: expected %s, got %s at %i-%i' % \
(sequence[rel_start].upper(), reference,
var_start, var_end)
if is_homozygous:
variant_seq[rel_start] = variantseqs[0]
else:
variant_seq[rel_start] = Genomics.resolveReverseAmbiguousNA(
"".join(variantseqs))
elif action == "-":
xstart, xend = max(0, rel_start), min(len(sequence), rel_end)
for variant in variantseqs:
# truncated for variants of unequal lengths (-AA/-AAA)
refseq = sequence[xstart:xend].upper()[:len(variant)]
assert refseq == variant[startoffset:len(variant) - endoffset], \
'reference base mismatch at deletion: expected %s %s %s, got %s[%i:%i] at %i-%i (%i-%i), action=%s' % \
(sequence[xstart - 10:xstart],
refseq,
sequence[xend:xend + 10],
variant, startoffset, len(variant) - endoffset,
var_start, var_end, start, end,
action)
l = len(variant) - startoffset - endoffset
if is_homozygous:
variant_seq[xstart:xend] = [""] * l
else:
for x in range(xstart, xend):
if variant_seq[x].endswith("-"):
assert not has_wildtype
variant_seq[x] = ""
else:
variant_seq[x] += "-"
elif action == "+":
if is_homozygous:
variant_seq[rel_start] += variantseqs[0].upper()
else:
if has_wildtype:
variant_seq[rel_start] += variantseqs[0].upper()
else:
# merge indels like +AAA/+AA
a, b = variantseqs
if a.startswith(b):
variant_seq[
rel_start] += b.upper() + a[len(b):].lower()
elif b.startswith(a):
variant_seq[
rel_start] += a.upper() + b[len(a):].lower()
else:
raise ValueError(
"don't know how to encode variant: %s" % variantseqs)
result[(start, end)] = variant_seq
return result