Python IUPAC.unambiguous_dna方法代码示例

本文整理汇总了Python中Bio.Alphabet.IUPAC.unambiguous_dna方法的典型用法代码示例。如果您正苦于以下问题：Python IUPAC.unambiguous_dna方法的具体用法？Python IUPAC.unambiguous_dna怎么用？Python IUPAC.unambiguous_dna使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio.Alphabet.IUPAC的用法示例。

在下文中一共展示了IUPAC.unambiguous_dna方法的12个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_mut_sequence

# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def test_mut_sequence():
    random.seed(42)
    np.random.seed(42)

    err_mod = basic.BasicErrorModel()

    read = SeqRecord(
        Seq(str('AAAAA' * 25),
            IUPAC.unambiguous_dna
            ),
        id='read_1',
        description='test read'
    )
    read.letter_annotations["phred_quality"] = [5] * 125
    read.seq = err_mod.mut_sequence(read, 'forward')
    assert str(read.seq[:10]) == 'AAAACAGAAA'

开发者ID:HadrienG，项目名称:InSilicoSeq，代码行数:18，代码来源:test_error_model.py

示例2: test_introduce_errors

# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def test_introduce_errors():
    np.random.seed(42)
    err_mod = basic.BasicErrorModel()

    read = SeqRecord(
        Seq(str('AATGC' * 25),
            IUPAC.unambiguous_dna
            ),
        id='read_1',
        description='test read'
    )
    read = err_mod.introduce_error_scores(read, 'forward')
    qualities = read.letter_annotations["phred_quality"][:10]
    assert qualities == [40, 26, 40, 40, 25, 25, 40, 40, 22, 40]

开发者ID:HadrienG，项目名称:InSilicoSeq，代码行数:16，代码来源:test_error_model.py

示例3: test_introduce_indels

# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def test_introduce_indels():
    random.seed(42)
    np.random.seed(42)

    err_mod = basic.BasicErrorModel()
    err_mod.ins_for[1]['G'] = 1.0
    err_mod.del_for[0]['A'] = 1.0
    bounds = (5, 130)
    read = SeqRecord(
        Seq(str('ATATA' * 25),
            IUPAC.unambiguous_dna
            ),
        id='read_1',
        description='test read'
    )
    ref_genome = SeqRecord(
        Seq(str('ATATA' * 100),
            IUPAC.unambiguous_dna
            ),
        id='ref_genome',
        description='test reference'
    )
    read.seq = err_mod.introduce_indels(
        read, 'forward', ref_genome, bounds)
    assert len(read.seq) == 125
    assert read.seq[:10] == 'ATGATAATAT'

开发者ID:HadrienG，项目名称:InSilicoSeq，代码行数:28，代码来源:test_error_model.py

示例4: test_simulate_and_save

# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def test_simulate_and_save():
    err_mod = basic.BasicErrorModel()
    ref_genome = SeqRecord(
        Seq(str('AAAAACCCCC' * 100),
            IUPAC.unambiguous_dna
            ),
        id='my_genome',
        description='test genome'
    )
    generator.reads(ref_genome, err_mod, 1000, 0, 'data/.test', 0, True)

开发者ID:HadrienG，项目名称:InSilicoSeq，代码行数:12，代码来源:test_generator.py

示例5: test_simulate_and_save_short

# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def test_simulate_and_save_short():
    err_mod = basic.BasicErrorModel()
    ref_genome = SeqRecord(
        Seq(str('AACCC' * 100),
            IUPAC.unambiguous_dna
            ),
        id='my_genome',
        description='test genome'
    )
    generator.reads(ref_genome, err_mod, 1000, 0, 'data/.test', 0, True)

开发者ID:HadrienG，项目名称:InSilicoSeq，代码行数:12，代码来源:test_generator.py

示例6: test_basic

# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def test_basic():
    if sys.version_info > (3,):
        random.seed(42)
        np.random.seed(42)
        err_mod = basic.BasicErrorModel()
        ref_genome = SeqRecord(
            Seq(str('AAAAACCCCC' * 100),
                IUPAC.unambiguous_dna
                ),
            id='my_genome',
            description='test genome'
        )
        read_tuple = generator.simulate_read(ref_genome, err_mod, 1, 0)
        big_read = ''.join(str(read_tuple[0].seq) + str(read_tuple[1].seq))
        assert big_read[-15:] == 'TTTTGGGGGTTTTTG'

开发者ID:HadrienG，项目名称:InSilicoSeq，代码行数:17，代码来源:test_generator.py

示例7: test_kde

# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def test_kde():
    if sys.version_info > (3,):
        random.seed(42)
        np.random.seed(42)
        err_mod = kde.KDErrorModel('data/ecoli.npz')
        ref_genome = SeqRecord(
            Seq(str('CGTTTCAACC' * 400),
                IUPAC.unambiguous_dna
                ),
            id='my_genome',
            description='test genome'
        )
        read_tuple = generator.simulate_read(ref_genome, err_mod, 1, 0)
        big_read = ''.join(str(read_tuple[0].seq) + str(read_tuple[1].seq))
        assert big_read[:15] == 'CCGTTTCAACCCGTT'

开发者ID:HadrienG，项目名称:InSilicoSeq，代码行数:17，代码来源:test_generator.py

示例8: test_kde_short

# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def test_kde_short():
    if sys.version_info > (3,):
        random.seed(42)
        np.random.seed(42)
        err_mod = kde.KDErrorModel('data/ecoli.npz')
        ref_genome = SeqRecord(
            Seq(str('AAACC' * 100),
                IUPAC.unambiguous_dna
                ),
            id='my_genome',
            description='test genome'
        )
        read_tuple = generator.simulate_read(ref_genome, err_mod, 1, 0)
        big_read = ''.join(str(read_tuple[0].seq) + str(read_tuple[1].seq))
        assert big_read == 'ACCAAACCAAACCAAACCAAGGTTTGGTTTGGTTTGGTGT'

开发者ID:HadrienG，项目名称:InSilicoSeq，代码行数:17，代码来源:test_generator.py

示例9: createRCs

# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def createRCs(inputFile, outNameVal):
    """Creates a .bed file with the reverse complements of the given set of
    sequences."""

    # Determine the stem of the input filename.
    fileName = str(inputFile).split('.')[0]

    # Open input file for reading.
    with open(inputFile, 'r') as f:
        file_read = [line.strip() for line in f]

    # Create list to hold output.
    outList = []

    # Parse out probe info, flip sequence to RC, and write to output list.
    for i in range(0, len(file_read), 1):
        chrom = file_read[i].split('\t')[0]
        start = file_read[i].split('\t')[1]
        stop = file_read[i].split('\t')[2]
        probeSeq = file_read[i].split('\t')[3]
        RevSeq = Seq(probeSeq, IUPAC.unambiguous_dna).reverse_complement()
        Tm = file_read[i].split('\t')[4]
        outList.append('%s\t%s\t%s\t%s\t%s' % (chrom, start, stop, RevSeq, Tm))

    # Determine the name of the output file.
    if outNameVal is None:
        outName = '%s_RC' % fileName
    else:
        outName = outNameVal

    # Create the output file.
    output = open('%s.bed' % outName, 'w')

    # Write the output file
    output.write('\n'.join(outList))
    output.close()

开发者ID:beliveau-lab，项目名称:OligoMiner，代码行数:38，代码来源:probeRC.py

示例10: truncation_check

# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def truncation_check(hit, cov_threshold=90.0):
    """
    Checks to see if the gene is truncated at the amino acid level.
    """
    # BLAST gives the aligned sequence, so we might need to remove dashes if there are deletions
    # relative to the reference.
    nucl_seq = hit.hit_seq.replace('-', '')

    # BLAST also returns the contig's sequence so we might need to flip to the reference strand.
    if hit.strand == 'minus':
        nucl_seq = reverse_complement(nucl_seq)
        ref_start, ref_end = hit.ref_end, hit.ref_start
    else:
        ref_start, ref_end = hit.ref_start, hit.ref_end

    # The hit must start at the first base of the gene. If not, the gene is considered 0%.
    if ref_start != 1:
        return '-0%', 0.0, ''

    # If there are any ambiguous bases in the sequence, then they will break translation, probably
    # resulting in truncation call.
    ambiguous_bases = set(b for b in nucl_seq) - {'A', 'C', 'G', 'T'}
    for b in ambiguous_bases:
        nucl_seq = nucl_seq.split(b)[0]

    # BioPython doesn't like it if the sequence isn't a multiple of 3.
    nucl_seq = nucl_seq[:len(nucl_seq) // 3 * 3]

    # The assumption is that the reference allele is a full CDS with a stop codon at the end. This
    # isn't always true (the reference sequence is sometimes broken) but will serve to make our
    # denominator for coverage.
    ref_aa_length = (hit.ref_length - 3) // 3

    coding_dna = Seq(nucl_seq, IUPAC.unambiguous_dna)
    translation = str(coding_dna.translate(table='Bacterial', to_stop=True))

    coverage = 100.0 * len(translation) / ref_aa_length
    if coverage >= cov_threshold:
        return '', coverage, translation
    else:
        return '-{:.0f}%'.format(coverage), coverage, translation

开发者ID:katholt，项目名称:Kleborate，代码行数:43，代码来源:truncation.py

示例11: align2TargetSeq

# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def align2TargetSeq(targetSeq, seqList):
	#align = Alignment(Gapped(IUPAC.unambiguous_dna, "-"))
	alignSeqList = []
	stateList = []
	for i in range(len(seqList)):
		# Perform pairwise local alignment. And increase the penalty of gap opening and extending into -10 and -10,
		# so that there will be no gap in the alignned result.
		seq = seqList[i]
		alignTmp = pairwise2.align.localms(targetSeq, seq, 2, -1, -20, -20)
		targetSeqTmpNew = alignTmp[0][0]
		seqTmp = alignTmp[0][1]

		state = judgeAllign(targetSeqTmpNew, seqTmp)
		stateList.append(state)

		if len(alignSeqList) == 0:
			alignSeqList.append(targetSeqTmpNew)
			alignSeqList.append(seqTmp)
		else:
			if targetSeqTmpNew == alignSeqList[0]:
				alignSeqList.append(seqTmp)
			else:
				headAdd1 = targetSeqTmpNew.index(targetSeq)
				tailAdd1 = len(targetSeqTmpNew)-headAdd1-len(targetSeq)
				headAdd2 = alignSeqList[0].index(targetSeq)
				tailAdd2 = len(alignSeqList[0])-headAdd2-len(targetSeq)
				if headAdd1 >= headAdd2 and tailAdd1 >= tailAdd2:
					for j in range(len(alignSeqList)):
						alignSeqList[j] = (headAdd1 - headAdd2)*'-'+alignSeqList[j]+(tailAdd1 - tailAdd2)*'-'
					alignSeqList.append(seqTmp)
				elif headAdd1 >= headAdd2 and tailAdd1 < tailAdd2:
					for j in range(len(alignSeqList)):
						alignSeqList[j] = (headAdd1 - headAdd2)*'-'+alignSeqList[j]
					alignSeqList.append(seqTmp+(tailAdd2 - tailAdd1)*'-')
				elif headAdd1 < headAdd2 and tailAdd1 >= tailAdd2:
					for j in range(len(alignSeqList)):
						alignSeqList[j] = alignSeqList[j]+(tailAdd1 - tailAdd2)*'-'
					alignSeqList.append((headAdd2 - headAdd1)*'-'+seqTmp)
				elif headAdd1 < headAdd2 and tailAdd1 < tailAdd2:
					alignSeqList.append((headAdd2 - headAdd1)*'-'+seqTmp+(tailAdd2 - tailAdd1)*'-')
				else:
					pass
	return (alignSeqList, stateList)

开发者ID:mhalushka，项目名称:miRge，代码行数:45，代码来源:writeDataToCSV.py

示例12: align2Standard

# 需要导入模块: from Bio.Alphabet import IUPAC [as 别名]
# 或者: from Bio.Alphabet.IUPAC import unambiguous_dna [as 别名]
def align2Standard(self):
		#align = Alignment(Gapped(IUPAC.unambiguous_dna, "-"))
		alignSeqList = []
		clusterSeqTmp = self.clusterSeq
		exactMatchCount = 0
		#for seq in self.readSeqList:
		for i in range(len(self.readSeqList)):
			# Perform pairwise local alignment. And increase the penalty of gap opening and extending into -10 and -10,
			# so that there will be no gap in the alignned result.
			seq = self.readSeqList[i]
			alignTmp = pairwise2.align.localms(clusterSeqTmp, seq, 2, -1, -20, -20)
			clusterSeqTmpNew = alignTmp[0][0]
			seqTmp = alignTmp[0][1]
			identity = calculate_identity(clusterSeqTmpNew, seqTmp)
			if identity == len(seq):
				exactMatchCount = exactMatchCount + self.readCountList[i]
			else:
				pass
			if len(alignSeqList) == 0:
				alignSeqList.append(clusterSeqTmpNew)
				alignSeqList.append(seqTmp)
			else:
				if clusterSeqTmpNew == alignSeqList[0]:
					alignSeqList.append(seqTmp)
				else:
					headAdd1 = clusterSeqTmpNew.index(clusterSeqTmp)
					tailAdd1 = len(clusterSeqTmpNew)-headAdd1-len(clusterSeqTmp)
					headAdd2 = alignSeqList[0].index(clusterSeqTmp)
					tailAdd2 = len(alignSeqList[0])-headAdd2-len(clusterSeqTmp)
					if headAdd1 >= headAdd2 and tailAdd1 >= tailAdd2:
						for j in range(len(alignSeqList)):
							alignSeqList[j] = (headAdd1 - headAdd2)*'-'+alignSeqList[j]+(tailAdd1 - tailAdd2)*'-'
						alignSeqList.append(seqTmp)
					elif headAdd1 >= headAdd2 and tailAdd1 < tailAdd2:
						for j in range(len(alignSeqList)):
							alignSeqList[j] = (headAdd1 - headAdd2)*'-'+alignSeqList[j]
						alignSeqList.append(seqTmp+(tailAdd2 - tailAdd1)*'-')
					elif headAdd1 < headAdd2 and tailAdd1 >= tailAdd2:
						for j in range(len(alignSeqList)):
							alignSeqList[j] = alignSeqList[j]+(tailAdd1 - tailAdd2)*'-'
						alignSeqList.append((headAdd2 - headAdd1)*'-'+seqTmp)
					elif headAdd1 < headAdd2 and tailAdd1 < tailAdd2:
						alignSeqList.append((headAdd2 - headAdd1)*'-'+seqTmp+(tailAdd2 - tailAdd1)*'-')
					else:
						pass
		return (alignSeqList, exactMatchCount)

开发者ID:mhalushka，项目名称:miRge，代码行数:48，代码来源:readCluster.py

注：本文中的Bio.Alphabet.IUPAC.unambiguous_dna方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。