本文整理汇总了Python中cogent.DNA类的典型用法代码示例。如果您正苦于以下问题:Python DNA类的具体用法?Python DNA怎么用?Python DNA使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了DNA类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_gaps_at_both_ends
def test_gaps_at_both_ends(self):
s = 'aaaccggttt'
s1 = DNA.makeSequence(s[:-2], Name="A")
s2 = DNA.makeSequence(s[2:], Name="B")
for a in self._aligned_both_ways(s1, s2, local=False):
self.assertEqual(matchedColumns(a), 6)
self.assertEqual(len(a), 10)
示例2: test_assemble_seq
def test_assemble_seq(self):
"""should correctly fill in a sequence with N's"""
expect = DNA.makeSequence("NAAAAANNCCCCCNNGGGNNN")
frags = ["AAAAA","CCCCC","GGG"]
positions = [(11, 16), (18, 23), (25, 28)]
self.assertEqual(_assemble_seq(frags, 10, 31, positions), expect)
positions = [(1, 6), (8, 13), (15, 18)]
self.assertEqual(_assemble_seq(frags, 0, 21, positions), expect)
# should work with:
# start matches first frag start
expect = DNA.makeSequence("AAAAANNCCCCCNNGGGNNN")
positions = [(0, 5), (7, 12), (14, 17)]
self.assertEqual(_assemble_seq(frags, 0, 20, positions), expect)
# end matches last frag_end
expect = DNA.makeSequence("NAAAAANNCCCCCNNGGG")
positions = [(11, 16), (18, 23), (25, 28)]
self.assertEqual(_assemble_seq(frags, 10, 28, positions), expect)
# both start and end matched
expect = DNA.makeSequence("AAAAANNCCCCCNNGGG")
positions = [(10, 15), (17, 22), (24, 27)]
self.assertEqual(_assemble_seq(frags, 10, 27, positions), expect)
# one frag
expect = DNA.makeSequence(''.join(frags))
positions = [(10, 23)]
self.assertEqual(_assemble_seq([''.join(frags)],10,23,positions),
expect)
示例3: test_local_tiebreak
def test_local_tiebreak(self):
"""Should pick the first best-equal hit rather than the last one"""
# so that the Pyrex and Python versions give the same result.
score_matrix = make_dna_scoring_dict(match=1, transition=-1,
transversion=-1)
pattern = DNA.makeSequence('cwc', Name='pattern')
two_hit = DNA.makeSequence( 'cactc', Name= 'target')
aln = local_pairwise(pattern, two_hit, score_matrix, 5, 2)
hit = aln.NamedSeqs['target']
self.assertEqual(str(hit).lower(), 'cac')
示例4: setUp
def setUp(self):
self.cigar_text = '3D2M3D6MDM2D3MD'
self.aln_seq = DNA.makeSequence('---AA---GCTTAG-A--CCT-')
self.aln_seq1 = DNA.makeSequence('CCAAAAAA---TAGT-GGC--G')
self.map, self.seq = self.aln_seq.parseOutGaps()
self.map1, self.seq1 = self.aln_seq1.parseOutGaps()
self.slices = [(1, 4), (0, 8), (7, 12), (0, 1), (3, 5)]
self.aln = LoadSeqs(data = {"FAKE01": self.aln_seq, "FAKE02": self.aln_seq1})
self.cigars = {"FAKE01": self.cigar_text, "FAKE02": map_to_cigar(self.map1)}
self.seqs = {"FAKE01": str(self.seq), "FAKE02": str(self.seq1)}
示例5: _make_utr_seq
def _make_utr_seq(self):
if self.UntranslatedExons5 is None and self.UntranslatedExons3 is None:
self._cached["Utr5"] = self.NULL_VALUE
self._cached["Utr3"] = self.NULL_VALUE
return
Utr5_seq, Utr3_seq = DNA.makeSequence(""), DNA.makeSequence("")
for exon in self.UntranslatedExons5:
Utr5_seq += exon.Seq
for exon in self.UntranslatedExons3:
Utr3_seq += exon.Seq
self._cached["Utr5"] = Utr5_seq
self._cached["Utr3"] = Utr3_seq
示例6: test
def test(r=1, **kw):
S = make_dna_scoring_dict(10, -1, -8)
seq2 = DNA.makeSequence("AAAATGCTTA" * r)
seq1 = DNA.makeSequence("AATTTTGCTG" * r)
t0 = time.time()
aln = classic_align_pairwise(seq1, seq2, S, 10, 2, local=False, **kw)
t = time.time() - t0
return (len(seq1) * len(seq2)) / t
print t
示例7: process_uclust_pw_alignment_results
def process_uclust_pw_alignment_results(fasta_pairs_lines,uc_lines):
""" Process results of uclust search and align """
alignments = get_next_two_fasta_records(fasta_pairs_lines)
for hit in get_next_record_type(uc_lines,'H'):
matching_strand = hit[4]
if matching_strand == '-':
strand_id = '-'
target_rev_match = True
elif matching_strand == '+':
strand_id = '+'
target_rev_match = False
elif matching_strand == '.':
# protein sequence, so no strand information
strand_id = ''
target_rev_match = False
else:
raise UclustParseError, "Unknown strand type: %s" % matching_strand
uc_query_id = hit[8]
uc_target_id = hit[9]
percent_id = float(hit[3])
fasta_pair = alignments.next()
fasta_query_id = fasta_pair[0][0]
aligned_query = fasta_pair[0][1]
if fasta_query_id != uc_query_id:
raise UclustParseError,\
"Order of fasta and uc files do not match."+\
" Got query %s but expected %s." %\
(fasta_query_id, uc_query_id)
fasta_target_id = fasta_pair[1][0]
aligned_target = fasta_pair[1][1]
if fasta_target_id != uc_target_id + strand_id:
raise UclustParseError, \
"Order of fasta and uc files do not match."+\
" Got target %s but expected %s." %\
(fasta_target_id, uc_target_id + strand_id)
if target_rev_match:
query_id = uc_query_id + ' RC'
aligned_query = DNA.rc(aligned_query)
target_id = uc_target_id
aligned_target = DNA.rc(aligned_target)
else:
query_id = uc_query_id
aligned_query = aligned_query
target_id = uc_target_id
aligned_target = aligned_target
yield (query_id, target_id, aligned_query, aligned_target, percent_id)
示例8: test_codon
def test_codon(self):
s1 = DNA.makeSequence('tacgccgta', Name="A")
s2 = DNA.makeSequence('tacgta', Name="B")
codon_model = cogent.evolve.substitution_model.Codon(
model_gaps=False, equal_motif_probs=True,
mprob_model='conditional')
tree = cogent.LoadTree(tip_names=['A', 'B'])
lf = codon_model.makeLikelihoodFunction(tree, aligned=False)
lf.setSequences(dict(A=s1, B=s2))
a = lf.getLogLikelihood().edge.getViterbiPath().getAlignment()
self.assertEqual(matchedColumns(a), 6)
self.assertEqual(len(a), 9)
示例9: test_picklability
def test_picklability(self):
"""Pickle an alignment containing an annotated sequence"""
# This depends on alignments, sequences, features, maps and spans
# Doesn't test round trip result is correct, which should possibly
# be done for maps/spans, but seqs/alignments are just simple
# python classes without __getstate__ etc.
import cPickle as pickle
seq1 = DNA.makeSequence("aagaagaagaccccca")
seq2 = DNA.makeSequence("aagaagaagaccccct")
seq2.addFeature('exon', 'fred', [(10,15)])
aln = LoadSeqs(data={'a':seq1, 'b':seq2})
aln2 = pickle.loads(pickle.dumps(aln))
示例10: test
def test(r=1, **kw):
S = make_dna_scoring_dict(10, -1, -8)
seq2 = DNA.makeSequence('AAAATGCTTA' * r)
seq1 = DNA.makeSequence('AATTTTGCTG' * r)
t0 = time.clock()
try:
# return_alignment is False in order to emphasise the quadratic part of the work.
aln = classic_align_pairwise(seq1, seq2, S, 10, 2, local=False, return_alignment=False, **kw)
except ArithmeticError:
return '*'
else:
t = time.clock() - t0
return int ( (len(seq1)*len(seq2))/t/1000 )
示例11: process_barcode_single_end_data
def process_barcode_single_end_data(read1_data, output_bc_fastq, output_fastq1, bc1_len=6, rev_comp_bc1=False):
""" Processes, writes single-end barcode data, parsed sequence
read1_data: list of header, read, quality scores
output_bc_fastq: open output fastq filepath
output_fastq1: open output fastq reads filepath
bc1_len: length of barcode to remove from beginning of data
rev_comp_bc1: reverse complement barcode before writing.
"""
header_index = 0
sequence_index = 1
quality_index = 2
bc_read = read1_data[sequence_index][:bc1_len]
bc_qual = read1_data[quality_index][:bc1_len]
if rev_comp_bc1:
bc_read = DNA.rc(bc_read)
bc_qual = bc_qual[::-1]
bc_lines = format_fastq_record(read1_data[header_index], bc_read, bc_qual)
output_bc_fastq.write(bc_lines)
seq_lines = format_fastq_record(
read1_data[header_index], read1_data[sequence_index][bc1_len:], read1_data[quality_index][bc1_len:]
)
output_fastq1.write(seq_lines)
return
示例12: makeSampleSequence
def makeSampleSequence():
seq = 'tgccnwsrygagcgtgttaaacaatggccaactctctaccttcctatgttaaacaagtgagatcgcaggcgcgccaaggc'
seq = DNA.makeSequence(seq)
v = seq.addAnnotation(annotation.Feature, 'exon', 'exon', [(20,35)])
v = seq.addAnnotation(annotation.Feature, 'repeat_unit', 'repeat_unit', [(39,49)])
v = seq.addAnnotation(annotation.Feature, 'repeat_unit', 'rep2', [(49,60)])
return seq
示例13: _get_flanking_seq_data
def _get_flanking_seq_data(self):
# maps to flanking_sequence through variation_feature_id
# if this fails, we grab from genomic sequence
variation_id = self._table_rows['variation_feature']['variation_id']
flanking_seq_table = self.flanking_sequence_table
query = sql.select([flanking_seq_table],
flanking_seq_table.c.variation_id == variation_id)
record = asserted_one(query.execute())
self._table_rows['flanking_sequence'] = record
up_seq = record['up_seq']
down_seq = record['down_seq']
# the following two lines are because -- wait for it -- someone has
# entered the string 'NULL' instead of NULL in the MySQL tables!!!
up_seq = [up_seq, None][up_seq == 'NULL']
down_seq = [down_seq, None][down_seq == 'NULL']
seqs = dict(up=up_seq, down=down_seq)
for name, seq in seqs.items():
if seq is not None:
seq = DNA.makeSequence(seq)
else:
resized = [(-301, -1), (1, 301)][name == 'down']
if self.Location.Strand == -1:
resized = [(1, 301), (-301, -1)][name == 'down']
flank = self.Location.resized(*resized)
flanking = self.genome.getRegion(region=flank)
seq = flanking.Seq
seqs[name] = seq
self._cached[('FlankingSeq')] = (seqs['up'][-300:],seqs['down'][:300])
示例14: parse_illumina_single_end_read_file
def parse_illumina_single_end_read_file(read_file,barcode_length,\
max_bad_run_length,quality_threshold,min_per_read_length,
rev_comp,rev_comp_barcode,barcode_in_seq,barcode_max_N=0,seq_max_N=0):
"""Parses Illumina single-end read file
"""
for read_line in read_file:
read = parse_illumina_line(read_line,barcode_length,
rev_comp_barcode,barcode_in_seq)
read_desc = illumina_read_description_from_read_data(read)
read_barcode = read['Barcode']
if read_barcode.count('N') > barcode_max_N:
continue
seq, qual = read_qual_score_filter(\
read['Sequence'], read['Quality Score'],\
max_bad_run_length, quality_threshold)
if (len(seq) < min_per_read_length) or (seq.count('N') > seq_max_N):
continue
if rev_comp:
seq = DNA.rc(seq)
qual = qual[::-1]
yield read_desc, read_barcode, seq, qual
示例15: CigarParser
def CigarParser(seqs, cigars, sliced = False, ref_seqname = None, start = None, end = None, moltype=DNA):
"""return an alignment from raw sequences and cigar strings
if sliced, will return an alignment correspondent to ref sequence start to end
Arguments:
seqs - raw sequences as {seqname: seq}
cigars - corresponding cigar text as {seqname: cigar_text}
cigars and seqs should have the same seqnames
MolType - optional default to DNA
"""
data = {}
if not sliced:
for seqname in seqs.keys():
aligned_seq = aligned_from_cigar(cigars[seqname],
seqs[seqname], moltype=moltype)
data[seqname] = aligned_seq
else:
ref_aln_seq = aligned_from_cigar(cigars[ref_seqname],
seqs[ref_seqname], moltype=moltype)
m, aln_loc = slice_cigar(cigars[ref_seqname], start, end, by_align = False)
data[ref_seqname] = ref_aln_seq[aln_loc[0]:aln_loc[1]]
for seqname in [seqname for seqname in seqs.keys() if seqname != ref_seqname]:
m, seq_loc = slice_cigar(cigars[seqname], aln_loc[0], aln_loc[1])
if seq_loc:
seq = seqs[seqname]
if isinstance(seq, str):
seq = moltype.makeSequence(seq)
data[seqname] = seq[seq_loc[0]:seq_loc[1]].gappedByMap(m)
else:
data[seqname] = DNA.makeSequence('-'*(aln_loc[1] - aln_loc[0]))
aln = LoadSeqs(data = data, aligned = True)
return aln