本文整理汇总了Python中Bio.Alphabet.IUPAC.unambiguous_dna方法的典型用法代码示例。如果您正苦于以下问题:Python IUPAC.unambiguous_dna方法的具体用法?Python IUPAC.unambiguous_dna怎么用?Python IUPAC.unambiguous_dna使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio.Alphabet.IUPAC
的用法示例。
在下文中一共展示了IUPAC.unambiguous_dna方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_mut_sequence
# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def test_mut_sequence():
random.seed(42)
np.random.seed(42)
err_mod = basic.BasicErrorModel()
read = SeqRecord(
Seq(str('AAAAA' * 25),
IUPAC.unambiguous_dna
),
id='read_1',
description='test read'
)
read.letter_annotations["phred_quality"] = [5] * 125
read.seq = err_mod.mut_sequence(read, 'forward')
assert str(read.seq[:10]) == 'AAAACAGAAA'
示例2: test_introduce_errors
# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def test_introduce_errors():
np.random.seed(42)
err_mod = basic.BasicErrorModel()
read = SeqRecord(
Seq(str('AATGC' * 25),
IUPAC.unambiguous_dna
),
id='read_1',
description='test read'
)
read = err_mod.introduce_error_scores(read, 'forward')
qualities = read.letter_annotations["phred_quality"][:10]
assert qualities == [40, 26, 40, 40, 25, 25, 40, 40, 22, 40]
示例3: test_introduce_indels
# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def test_introduce_indels():
random.seed(42)
np.random.seed(42)
err_mod = basic.BasicErrorModel()
err_mod.ins_for[1]['G'] = 1.0
err_mod.del_for[0]['A'] = 1.0
bounds = (5, 130)
read = SeqRecord(
Seq(str('ATATA' * 25),
IUPAC.unambiguous_dna
),
id='read_1',
description='test read'
)
ref_genome = SeqRecord(
Seq(str('ATATA' * 100),
IUPAC.unambiguous_dna
),
id='ref_genome',
description='test reference'
)
read.seq = err_mod.introduce_indels(
read, 'forward', ref_genome, bounds)
assert len(read.seq) == 125
assert read.seq[:10] == 'ATGATAATAT'
示例4: test_simulate_and_save
# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def test_simulate_and_save():
err_mod = basic.BasicErrorModel()
ref_genome = SeqRecord(
Seq(str('AAAAACCCCC' * 100),
IUPAC.unambiguous_dna
),
id='my_genome',
description='test genome'
)
generator.reads(ref_genome, err_mod, 1000, 0, 'data/.test', 0, True)
示例5: test_simulate_and_save_short
# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def test_simulate_and_save_short():
err_mod = basic.BasicErrorModel()
ref_genome = SeqRecord(
Seq(str('AACCC' * 100),
IUPAC.unambiguous_dna
),
id='my_genome',
description='test genome'
)
generator.reads(ref_genome, err_mod, 1000, 0, 'data/.test', 0, True)
示例6: test_basic
# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def test_basic():
if sys.version_info > (3,):
random.seed(42)
np.random.seed(42)
err_mod = basic.BasicErrorModel()
ref_genome = SeqRecord(
Seq(str('AAAAACCCCC' * 100),
IUPAC.unambiguous_dna
),
id='my_genome',
description='test genome'
)
read_tuple = generator.simulate_read(ref_genome, err_mod, 1, 0)
big_read = ''.join(str(read_tuple[0].seq) + str(read_tuple[1].seq))
assert big_read[-15:] == 'TTTTGGGGGTTTTTG'
示例7: test_kde
# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def test_kde():
if sys.version_info > (3,):
random.seed(42)
np.random.seed(42)
err_mod = kde.KDErrorModel('data/ecoli.npz')
ref_genome = SeqRecord(
Seq(str('CGTTTCAACC' * 400),
IUPAC.unambiguous_dna
),
id='my_genome',
description='test genome'
)
read_tuple = generator.simulate_read(ref_genome, err_mod, 1, 0)
big_read = ''.join(str(read_tuple[0].seq) + str(read_tuple[1].seq))
assert big_read[:15] == 'CCGTTTCAACCCGTT'
示例8: test_kde_short
# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def test_kde_short():
if sys.version_info > (3,):
random.seed(42)
np.random.seed(42)
err_mod = kde.KDErrorModel('data/ecoli.npz')
ref_genome = SeqRecord(
Seq(str('AAACC' * 100),
IUPAC.unambiguous_dna
),
id='my_genome',
description='test genome'
)
read_tuple = generator.simulate_read(ref_genome, err_mod, 1, 0)
big_read = ''.join(str(read_tuple[0].seq) + str(read_tuple[1].seq))
assert big_read == 'ACCAAACCAAACCAAACCAAGGTTTGGTTTGGTTTGGTGT'
示例9: createRCs
# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def createRCs(inputFile, outNameVal):
"""Creates a .bed file with the reverse complements of the given set of
sequences."""
# Determine the stem of the input filename.
fileName = str(inputFile).split('.')[0]
# Open input file for reading.
with open(inputFile, 'r') as f:
file_read = [line.strip() for line in f]
# Create list to hold output.
outList = []
# Parse out probe info, flip sequence to RC, and write to output list.
for i in range(0, len(file_read), 1):
chrom = file_read[i].split('\t')[0]
start = file_read[i].split('\t')[1]
stop = file_read[i].split('\t')[2]
probeSeq = file_read[i].split('\t')[3]
RevSeq = Seq(probeSeq, IUPAC.unambiguous_dna).reverse_complement()
Tm = file_read[i].split('\t')[4]
outList.append('%s\t%s\t%s\t%s\t%s' % (chrom, start, stop, RevSeq, Tm))
# Determine the name of the output file.
if outNameVal is None:
outName = '%s_RC' % fileName
else:
outName = outNameVal
# Create the output file.
output = open('%s.bed' % outName, 'w')
# Write the output file
output.write('\n'.join(outList))
output.close()
示例10: truncation_check
# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def truncation_check(hit, cov_threshold=90.0):
"""
Checks to see if the gene is truncated at the amino acid level.
"""
# BLAST gives the aligned sequence, so we might need to remove dashes if there are deletions
# relative to the reference.
nucl_seq = hit.hit_seq.replace('-', '')
# BLAST also returns the contig's sequence so we might need to flip to the reference strand.
if hit.strand == 'minus':
nucl_seq = reverse_complement(nucl_seq)
ref_start, ref_end = hit.ref_end, hit.ref_start
else:
ref_start, ref_end = hit.ref_start, hit.ref_end
# The hit must start at the first base of the gene. If not, the gene is considered 0%.
if ref_start != 1:
return '-0%', 0.0, ''
# If there are any ambiguous bases in the sequence, then they will break translation, probably
# resulting in truncation call.
ambiguous_bases = set(b for b in nucl_seq) - {'A', 'C', 'G', 'T'}
for b in ambiguous_bases:
nucl_seq = nucl_seq.split(b)[0]
# BioPython doesn't like it if the sequence isn't a multiple of 3.
nucl_seq = nucl_seq[:len(nucl_seq) // 3 * 3]
# The assumption is that the reference allele is a full CDS with a stop codon at the end. This
# isn't always true (the reference sequence is sometimes broken) but will serve to make our
# denominator for coverage.
ref_aa_length = (hit.ref_length - 3) // 3
coding_dna = Seq(nucl_seq, IUPAC.unambiguous_dna)
translation = str(coding_dna.translate(table='Bacterial', to_stop=True))
coverage = 100.0 * len(translation) / ref_aa_length
if coverage >= cov_threshold:
return '', coverage, translation
else:
return '-{:.0f}%'.format(coverage), coverage, translation
示例11: align2TargetSeq
# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def align2TargetSeq(targetSeq, seqList):
#align = Alignment(Gapped(IUPAC.unambiguous_dna, "-"))
alignSeqList = []
stateList = []
for i in range(len(seqList)):
# Perform pairwise local alignment. And increase the penalty of gap opening and extending into -10 and -10,
# so that there will be no gap in the alignned result.
seq = seqList[i]
alignTmp = pairwise2.align.localms(targetSeq, seq, 2, -1, -20, -20)
targetSeqTmpNew = alignTmp[0][0]
seqTmp = alignTmp[0][1]
state = judgeAllign(targetSeqTmpNew, seqTmp)
stateList.append(state)
if len(alignSeqList) == 0:
alignSeqList.append(targetSeqTmpNew)
alignSeqList.append(seqTmp)
else:
if targetSeqTmpNew == alignSeqList[0]:
alignSeqList.append(seqTmp)
else:
headAdd1 = targetSeqTmpNew.index(targetSeq)
tailAdd1 = len(targetSeqTmpNew)-headAdd1-len(targetSeq)
headAdd2 = alignSeqList[0].index(targetSeq)
tailAdd2 = len(alignSeqList[0])-headAdd2-len(targetSeq)
if headAdd1 >= headAdd2 and tailAdd1 >= tailAdd2:
for j in range(len(alignSeqList)):
alignSeqList[j] = (headAdd1 - headAdd2)*'-'+alignSeqList[j]+(tailAdd1 - tailAdd2)*'-'
alignSeqList.append(seqTmp)
elif headAdd1 >= headAdd2 and tailAdd1 < tailAdd2:
for j in range(len(alignSeqList)):
alignSeqList[j] = (headAdd1 - headAdd2)*'-'+alignSeqList[j]
alignSeqList.append(seqTmp+(tailAdd2 - tailAdd1)*'-')
elif headAdd1 < headAdd2 and tailAdd1 >= tailAdd2:
for j in range(len(alignSeqList)):
alignSeqList[j] = alignSeqList[j]+(tailAdd1 - tailAdd2)*'-'
alignSeqList.append((headAdd2 - headAdd1)*'-'+seqTmp)
elif headAdd1 < headAdd2 and tailAdd1 < tailAdd2:
alignSeqList.append((headAdd2 - headAdd1)*'-'+seqTmp+(tailAdd2 - tailAdd1)*'-')
else:
pass
return (alignSeqList, stateList)
示例12: align2Standard
# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def align2Standard(self):
#align = Alignment(Gapped(IUPAC.unambiguous_dna, "-"))
alignSeqList = []
clusterSeqTmp = self.clusterSeq
exactMatchCount = 0
#for seq in self.readSeqList:
for i in range(len(self.readSeqList)):
# Perform pairwise local alignment. And increase the penalty of gap opening and extending into -10 and -10,
# so that there will be no gap in the alignned result.
seq = self.readSeqList[i]
alignTmp = pairwise2.align.localms(clusterSeqTmp, seq, 2, -1, -20, -20)
clusterSeqTmpNew = alignTmp[0][0]
seqTmp = alignTmp[0][1]
identity = calculate_identity(clusterSeqTmpNew, seqTmp)
if identity == len(seq):
exactMatchCount = exactMatchCount + self.readCountList[i]
else:
pass
if len(alignSeqList) == 0:
alignSeqList.append(clusterSeqTmpNew)
alignSeqList.append(seqTmp)
else:
if clusterSeqTmpNew == alignSeqList[0]:
alignSeqList.append(seqTmp)
else:
headAdd1 = clusterSeqTmpNew.index(clusterSeqTmp)
tailAdd1 = len(clusterSeqTmpNew)-headAdd1-len(clusterSeqTmp)
headAdd2 = alignSeqList[0].index(clusterSeqTmp)
tailAdd2 = len(alignSeqList[0])-headAdd2-len(clusterSeqTmp)
if headAdd1 >= headAdd2 and tailAdd1 >= tailAdd2:
for j in range(len(alignSeqList)):
alignSeqList[j] = (headAdd1 - headAdd2)*'-'+alignSeqList[j]+(tailAdd1 - tailAdd2)*'-'
alignSeqList.append(seqTmp)
elif headAdd1 >= headAdd2 and tailAdd1 < tailAdd2:
for j in range(len(alignSeqList)):
alignSeqList[j] = (headAdd1 - headAdd2)*'-'+alignSeqList[j]
alignSeqList.append(seqTmp+(tailAdd2 - tailAdd1)*'-')
elif headAdd1 < headAdd2 and tailAdd1 >= tailAdd2:
for j in range(len(alignSeqList)):
alignSeqList[j] = alignSeqList[j]+(tailAdd1 - tailAdd2)*'-'
alignSeqList.append((headAdd2 - headAdd1)*'-'+seqTmp)
elif headAdd1 < headAdd2 and tailAdd1 < tailAdd2:
alignSeqList.append((headAdd2 - headAdd1)*'-'+seqTmp+(tailAdd2 - tailAdd1)*'-')
else:
pass
return (alignSeqList, exactMatchCount)