本文整理汇总了Python中Bio.Align.Generic.Alignment.add_sequence方法的典型用法代码示例。如果您正苦于以下问题:Python Alignment.add_sequence方法的具体用法?Python Alignment.add_sequence怎么用?Python Alignment.add_sequence使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio.Align.Generic.Alignment
的用法示例。
在下文中一共展示了Alignment.add_sequence方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: add_gaps_to_align
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def add_gaps_to_align(organisms, missing, align, verbatim=False, genera=False, min_taxa=3):
local_organisms = copy.deepcopy(organisms)
for a in align:
if len(a) < min_taxa:
new_align = None
break
elif len(a) >= min_taxa:
#pdb.set_trace()
new_align = Alignment(Gapped(IUPAC.unambiguous_dna, "-"))
overall_length = len(a[0])
for seq in a:
if genera and any(sp for sp in genera if sp in seq.name):
new_seq_name = '_'.join(seq.name.split('_')[-1:])
elif not verbatim:
new_seq_name = '_'.join(seq.name.split('_')[-2:])
else:
new_seq_name = seq.name.lower()
new_align.add_sequence(new_seq_name, str(seq.seq))
local_organisms.remove(new_seq_name)
for org in local_organisms:
if genera and any(sp for sp in genera if sp in seq.name):
loc = '_'.join(seq.name.split('_')[:-1])
elif not verbatim:
loc = '_'.join(seq.name.split('_')[:-2])
else:
loc = seq.name
if missing:
try:
assert loc in missing[org], "Locus missing"
except:
assert loc in missing['{}*'.format(org)], "Locus missing"
new_align.add_sequence(org, '?' * overall_length)
return new_align
示例2: main
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def main():
args = get_args()
# iterate through all the files to determine the longest alignment
files = get_files(args.nexus)
old_names = set()
for f in files:
for align in AlignIO.parse(f, 'nexus'):
for seq in list(align):
old_names.update([seq.name])
#pdb.set_trace()
name_map = abbreviator(old_names)
for count, f in enumerate(files):
new_align = Alignment(Gapped(IUPAC.unambiguous_dna, "-"))
#filename = os.path.basename(f)
#chromo_name = filename.split('.')[0]
for align in AlignIO.parse(f, 'nexus'):
for seq in list(align):
new_seq_name = name_map[seq.name]
new_align.add_sequence(new_seq_name, str(seq.seq))
#pdb.set_trace()
outf = os.path.join(args.output, os.path.split(f)[1])
try:
AlignIO.write(new_align, open(outf, 'w'), 'nexus')
except ValueError:
pdb.set_trace()
print count
示例3: ace2fasta
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def ace2fasta(in_file, out_file):
ace_gen = Ace.parse(open(in_file, 'r'))
with open(out_file, "w") as output_file:
while 1:
try:
contig = ace_gen.next()
except:
print "All contigs treated"
break
align = Alignment(Gapped(IUPAC.ambiguous_dna, "-"))
# Now we have started our alignment we can add sequences to it
# Add concensus sequence to alignment
align.add_sequence(contig.name, contig.sequence)
for readn in xrange(len(contig.reads)):
clipst = contig.reads[readn].qa.qual_clipping_start
clipe = contig.reads[readn].qa.qual_clipping_end
start = contig.af[readn].padded_start
seq = cut_ends(contig.reads[readn].rd.sequence, clipst, clipe)
seq = pad_read(seq, start, len(contig.sequence))
if "pseudo" not in contig.reads[readn].rd.name:
align.add_sequence(contig.reads[readn].rd.name, seq)
output_file.write(align.format("fasta"))
示例4: _domain_alignment
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def _domain_alignment(self,alignment,domain_region, alignment_index):
# Now we need to subselect the portion of the alignment
# that contains the domain.
protein_record = alignment[alignment_index]
protein_seq = str(protein_record.seq)
# Figure out which columns encapsulate the domain.
aa_count = 0
column_start = None
column_stop = None
#print protein_seq
for column,aa in enumerate(protein_seq):
#print column,aa
if aa!='-':
aa_count=aa_count+1
if aa_count==domain_region.start and column_start==None:
column_start = column
if aa_count==domain_region.stop and column_stop==None:
column_stop = column
break
#print column_start,column_stop
assert column_start != None, str(column_start)
assert column_stop != None, str(column_stop)
domain_alignment = Alignment(alphabet = alignment._alphabet)
# Grab the portion of each sequence that correspond to columns
# for the domain.
for record in alignment:
domain_alignment.add_sequence(record.id,
str(record.seq)[column_start:column_stop])
return (domain_alignment, column_start, column_stop)
示例5: build_align
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def build_align( self, seq ):
align = Alignment( Gapped( DNAAlphabet() ) )
alphabet = self.alphabet
len_seq = len( seq )
step = self.segment_size
for j in range( 0, len_seq, step ):
segment = seq[j : j + step]
align.add_sequence( name, segment )
self.friendly = align
示例6: createAlignment
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def createAlignment(sequences, alphabet):
"""Create an Alignment object from a list of sequences"""
align = Alignment(alphabet)
counter = 0
for sequence in sequences:
name = "sequence" + str(counter)
align.add_sequence(name, sequence)
counter+=1
return align
示例7: phylip
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def phylip(handle):
seqs,columns = handle.readline().split()
from Bio.Align.Generic import Alignment
from Bio.Alphabet import IUPAC, Gapped
alignment = Alignment(Gapped(IUPAC.protein, "-"))
for line in handle:
name,seq = line.split()
alignment.add_sequence(name, seq)
return alignment
示例8: testCulledColumnMapper
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def testCulledColumnMapper(self):
align = Alignment(Gapped(IUPAC.protein, "-"))
original = "ABCDEFGHI"
align.add_sequence("test",original)
culled = [0,1,4,8]
# should yield
result = "CDFGH"
mapper = CulledColumnMapper(align,culled)
for i,aa in enumerate(result):
assert original[mapper[i]]==aa
示例9: gene_expression_2matrix
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def gene_expression_2matrix(in_ace, out_file, tags, min_seq):
"""Count sequences with each tags in all contigs.
"""
print
print "USING MATRIX OUTPUT FORMAT"
print
ace_gen = Ace.parse(open(in_ace, 'r'))
with open(out_file, "w") as output_file:
output_file.write("gene_name\tgene_length")
for tag in tags:
output_file.write("\t" + tag)
output_file.write("\tXX_noTag")
output_file.write("\n")
while 1:
try:
contig = ace_gen.next()
except:
print "***All contigs treated***"
break
align = Alignment(Gapped(IUPAC.ambiguous_dna, "-"))
align.add_sequence(contig.name, contig.sequence)
for readn in xrange(len(contig.reads)):
clipst = contig.reads[readn].qa.qual_clipping_start
clipe = contig.reads[readn].qa.qual_clipping_end
start = contig.af[readn].padded_start
seq = cut_ends(contig.reads[readn].rd.sequence, clipst, clipe)
seq = pad_read(seq, start, len(contig.sequence))
if "pseudo" not in contig.reads[readn].rd.name:
align.add_sequence(contig.reads[readn].rd.name, seq)
sequences = read_fasta_2list(align.format("fasta"))
if len(sequences) < min_seq:
continue
contig_name = re.findall("(Contig_[0-9]+)", sequences[0][0])[0]
contig_seq = sequences[0][1].replace("*", "")
contig_length = str(len(contig_seq))
output_file.write(contig_name + "\t" + contig_length)
print "Treating", contig_name
d = defaultdict(int)
for tag in tags:
d[tag] = 0
d["XX_noTag"] = 0
fasta_counter = 0
for fasta in sequences:
fasta_counter += 1
found_tag = 0
for tag in tags:
if fasta[0].find(tag) > -1:
d[tag] += 1
found_tag = 1
if found_tag == 0 and fasta[0].find("Consensus") < 0:
d["XX_noTag"] += 1
for tag in sorted(d):
output_file.write("\t" + str(d[tag]))
output_file.write("\n")
示例10: rename
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def rename(align, first, second):
for a in align:
new_align = Alignment(Gapped(IUPAC.unambiguous_dna, "-"))
for seq in a:
split_name = seq.id.split('_')
#pdb.set_trace()
if first and second:
new_seq_name = '_'.join([split_name[first][0:3], split_name[second][0:3]])
elif not second:
new_seq_name = split_name[first]
new_align.add_sequence(new_seq_name, str(seq.seq))
yield new_align
示例11: main
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def main():
args = get_args()
nexus_files = get_files(args.input)
taxa = get_all_taxon_names(nexus_files)
taxa_to_keep = get_samples_to_run(args, taxa)
for count, align_file in enumerate(nexus_files):
new_align = Alignment(Gapped(IUPAC.unambiguous_dna, "-"))
for align in AlignIO.parse(align_file, "nexus"):
for taxon in list(align):
if taxon.name in taxa_to_keep:
new_align.add_sequence(taxon.name, str(taxon.seq))
outf = os.path.join(args.output, os.path.basename(align_file))
AlignIO.write(new_align, open(outf, 'w'), 'nexus')
print count
示例12: proteins_alignment_to_biopython
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def proteins_alignment_to_biopython(al, seq1, seq2, name1, name2):
"Convert our internal alignment format into BioPython Alignment"
s1 = ""
s2 = ""
align = Alignment(Gapped(IUPAC.protein, "-"))
for a, b in al:
if a!=-1:
s1 += seq1[a].upper()
else:
s1 += "-"
if b!=-1:
s2 += seq2[b].upper()
align.add_sequence(name1, s1)
align.add_sequence(name2, s2)
return align
示例13: parse_ace
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def parse_ace(ace_file):
ace_gen = Ace.parse(open(ace_file, 'r'))
contig = ace_gen.next()
align = Alignment(Gapped(IUPAC.ambiguous_dna, "-"))
align.add_sequence(contig.name, contig.sequence)
for readn in range(len(contig.reads)):
clipst = contig.reads[readn].qa.qual_clipping_start
clipe = contig.reads[readn].qa.qual_clipping_end
start = contig.af[readn].padded_start
seq = cut_ends(contig.reads[readn].rd.sequence, clipst, clipe)
seq = pad_read(seq, start, len(contig.sequence))
align.add_sequence(contig.reads[readn].rd.name + "_" + contig.af[readn].coru, seq)
return contig, align
示例14: get_alignment
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def get_alignment(self):
"""Construct an alignment from the aligned sequences in this tree."""
def seq_is_aligned(node):
if isinstance(node, Sequence) and node.mol_seq.is_aligned:
return True
return False
seqs = self.depth_first_search(self, seq_is_aligned)
try:
first_seq = seqs.next()
except StopIteration:
warnings.warn("No aligned sequences were found in this tree.",
Warning, stacklevel=2)
aln = Alignment(first_seq.get_alphabet())
aln.add_sequence(str(first_seq), first_seq.mol_seq.value)
for seq in seqs:
aln.add_sequence(str(seq), seq.mol_seq.value)
return aln
示例15: get_alignment
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def get_alignment(self):
"""Construct an alignment from the aligned sequences in this tree."""
def is_aligned_seq(node):
if isinstance(node, Sequence) and node.mol_seq.is_aligned:
return True
return False
seqs = self._filter_search(is_aligned_seq, 'preorder', True)
try:
first_seq = seqs.next()
except StopIteration:
# No aligned sequences were found
# Can't construct an Alignment without an alphabet, so... nothin'
return
aln = Alignment(first_seq.get_alphabet())
aln.add_sequence(str(first_seq), first_seq.mol_seq.value)
for seq in seqs:
aln.add_sequence(str(seq), seq.mol_seq.value)
return aln