本文整理汇总了Python中Bio.Align.Generic.Alignment类的典型用法代码示例。如果您正苦于以下问题:Python Alignment类的具体用法?Python Alignment怎么用?Python Alignment使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Alignment类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: add_gaps_to_align
def add_gaps_to_align(organisms, missing, align, verbatim=False, genera=False, min_taxa=3):
local_organisms = copy.deepcopy(organisms)
for a in align:
if len(a) < min_taxa:
new_align = None
break
elif len(a) >= min_taxa:
#pdb.set_trace()
new_align = Alignment(Gapped(IUPAC.unambiguous_dna, "-"))
overall_length = len(a[0])
for seq in a:
if genera and any(sp for sp in genera if sp in seq.name):
new_seq_name = '_'.join(seq.name.split('_')[-1:])
elif not verbatim:
new_seq_name = '_'.join(seq.name.split('_')[-2:])
else:
new_seq_name = seq.name.lower()
new_align.add_sequence(new_seq_name, str(seq.seq))
local_organisms.remove(new_seq_name)
for org in local_organisms:
if genera and any(sp for sp in genera if sp in seq.name):
loc = '_'.join(seq.name.split('_')[:-1])
elif not verbatim:
loc = '_'.join(seq.name.split('_')[:-2])
else:
loc = seq.name
if missing:
try:
assert loc in missing[org], "Locus missing"
except:
assert loc in missing['{}*'.format(org)], "Locus missing"
new_align.add_sequence(org, '?' * overall_length)
return new_align
示例2: ace2fasta
def ace2fasta(in_file, out_file):
ace_gen = Ace.parse(open(in_file, 'r'))
with open(out_file, "w") as output_file:
while 1:
try:
contig = ace_gen.next()
except:
print "All contigs treated"
break
align = Alignment(Gapped(IUPAC.ambiguous_dna, "-"))
# Now we have started our alignment we can add sequences to it
# Add concensus sequence to alignment
align.add_sequence(contig.name, contig.sequence.replace("*",""))
"""for readn in xrange(len(contig.reads)):
clipst = contig.reads[readn].qa.qual_clipping_start
clipe = contig.reads[readn].qa.qual_clipping_end
start = contig.af[readn].padded_start
seq = cut_ends(contig.reads[readn].rd.sequence, clipst, clipe)
seq = pad_read(seq, start, len(contig.sequence))
if "pseudo" not in contig.reads[readn].rd.name:
align.add_sequence(contig.reads[readn].rd.name, seq)"""
output_file.write(align.format("fasta"))
示例3: __init__
def __init__(self, alphabet = Alphabet.Gapped(IUPAC.ambiguous_dna)):
Alignment.__init__(self, alphabet)
# represent all of those stars in the aln output format
self._star_info = ''
self._version = ''
示例4: main
def main():
args = get_args()
# iterate through all the files to determine the longest alignment
files = get_files(args.nexus)
old_names = set()
for f in files:
for align in AlignIO.parse(f, 'nexus'):
for seq in list(align):
old_names.update([seq.name])
#pdb.set_trace()
name_map = abbreviator(old_names)
for count, f in enumerate(files):
new_align = Alignment(Gapped(IUPAC.unambiguous_dna, "-"))
#filename = os.path.basename(f)
#chromo_name = filename.split('.')[0]
for align in AlignIO.parse(f, 'nexus'):
for seq in list(align):
new_seq_name = name_map[seq.name]
new_align.add_sequence(new_seq_name, str(seq.seq))
#pdb.set_trace()
outf = os.path.join(args.output, os.path.split(f)[1])
try:
AlignIO.write(new_align, open(outf, 'w'), 'nexus')
except ValueError:
pdb.set_trace()
print count
示例5: _domain_alignment
def _domain_alignment(self,alignment,domain_region, alignment_index):
# Now we need to subselect the portion of the alignment
# that contains the domain.
protein_record = alignment[alignment_index]
protein_seq = str(protein_record.seq)
# Figure out which columns encapsulate the domain.
aa_count = 0
column_start = None
column_stop = None
#print protein_seq
for column,aa in enumerate(protein_seq):
#print column,aa
if aa!='-':
aa_count=aa_count+1
if aa_count==domain_region.start and column_start==None:
column_start = column
if aa_count==domain_region.stop and column_stop==None:
column_stop = column
break
#print column_start,column_stop
assert column_start != None, str(column_start)
assert column_stop != None, str(column_stop)
domain_alignment = Alignment(alphabet = alignment._alphabet)
# Grab the portion of each sequence that correspond to columns
# for the domain.
for record in alignment:
domain_alignment.add_sequence(record.id,
str(record.seq)[column_start:column_stop])
return (domain_alignment, column_start, column_stop)
示例6: build_align
def build_align( self, seq ):
align = Alignment( Gapped( DNAAlphabet() ) )
alphabet = self.alphabet
len_seq = len( seq )
step = self.segment_size
for j in range( 0, len_seq, step ):
segment = seq[j : j + step]
align.add_sequence( name, segment )
self.friendly = align
示例7: createAlignment
def createAlignment(sequences, alphabet):
"""Create an Alignment object from a list of sequences"""
align = Alignment(alphabet)
counter = 0
for sequence in sequences:
name = "sequence" + str(counter)
align.add_sequence(name, sequence)
counter+=1
return align
示例8: phylip
def phylip(handle):
seqs,columns = handle.readline().split()
from Bio.Align.Generic import Alignment
from Bio.Alphabet import IUPAC, Gapped
alignment = Alignment(Gapped(IUPAC.protein, "-"))
for line in handle:
name,seq = line.split()
alignment.add_sequence(name, seq)
return alignment
示例9: testCulledColumnMapper
def testCulledColumnMapper(self):
align = Alignment(Gapped(IUPAC.protein, "-"))
original = "ABCDEFGHI"
align.add_sequence("test",original)
culled = [0,1,4,8]
# should yield
result = "CDFGH"
mapper = CulledColumnMapper(align,culled)
for i,aa in enumerate(result):
assert original[mapper[i]]==aa
示例10: rename
def rename(align, first, second):
for a in align:
new_align = Alignment(Gapped(IUPAC.unambiguous_dna, "-"))
for seq in a:
split_name = seq.id.split('_')
#pdb.set_trace()
if first and second:
new_seq_name = '_'.join([split_name[first][0:3], split_name[second][0:3]])
elif not second:
new_seq_name = split_name[first]
new_align.add_sequence(new_seq_name, str(seq.seq))
yield new_align
示例11: main
def main():
args = get_args()
nexus_files = get_files(args.input)
taxa = get_all_taxon_names(nexus_files)
taxa_to_keep = get_samples_to_run(args, taxa)
for count, align_file in enumerate(nexus_files):
new_align = Alignment(Gapped(IUPAC.unambiguous_dna, "-"))
for align in AlignIO.parse(align_file, "nexus"):
for taxon in list(align):
if taxon.name in taxa_to_keep:
new_align.add_sequence(taxon.name, str(taxon.seq))
outf = os.path.join(args.output, os.path.basename(align_file))
AlignIO.write(new_align, open(outf, 'w'), 'nexus')
print count
示例12: proteins_alignment_to_biopython
def proteins_alignment_to_biopython(al, seq1, seq2, name1, name2):
"Convert our internal alignment format into BioPython Alignment"
s1 = ""
s2 = ""
align = Alignment(Gapped(IUPAC.protein, "-"))
for a, b in al:
if a!=-1:
s1 += seq1[a].upper()
else:
s1 += "-"
if b!=-1:
s2 += seq2[b].upper()
align.add_sequence(name1, s1)
align.add_sequence(name2, s2)
return align
示例13: parse_ace
def parse_ace(ace_file):
ace_gen = Ace.parse(open(ace_file, 'r'))
contig = ace_gen.next()
align = Alignment(Gapped(IUPAC.ambiguous_dna, "-"))
align.add_sequence(contig.name, contig.sequence)
for readn in range(len(contig.reads)):
clipst = contig.reads[readn].qa.qual_clipping_start
clipe = contig.reads[readn].qa.qual_clipping_end
start = contig.af[readn].padded_start
seq = cut_ends(contig.reads[readn].rd.sequence, clipst, clipe)
seq = pad_read(seq, start, len(contig.sequence))
align.add_sequence(contig.reads[readn].rd.name + "_" + contig.af[readn].coru, seq)
return contig, align
示例14: __str__
def __str__(self):
"""
"""
outstr = _Alignment.__str__(self)
if self._secStruct:
outstr+='\n'+str(self._secStruct)
return outstr
示例15: get_column
def get_column(self, col):
"""Returns a string containing a given column (OBSOLETE).
This is a method provided for backwards compatibility with the old
Bio.Align.Generic.Alignment object. You are encouraged to use the
slice notation instead.
"""
return _Alignment.get_column(self, col)