本文整理汇总了Python中Bio.Align.Generic.Alignment.format方法的典型用法代码示例。如果您正苦于以下问题:Python Alignment.format方法的具体用法?Python Alignment.format怎么用?Python Alignment.format使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio.Align.Generic.Alignment
的用法示例。
在下文中一共展示了Alignment.format方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ace2fasta
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import format [as 别名]
def ace2fasta(in_file, out_file):
ace_gen = Ace.parse(open(in_file, 'r'))
with open(out_file, "w") as output_file:
while 1:
try:
contig = ace_gen.next()
except:
print "All contigs treated"
break
align = Alignment(Gapped(IUPAC.ambiguous_dna, "-"))
# Now we have started our alignment we can add sequences to it
# Add concensus sequence to alignment
align.add_sequence(contig.name, contig.sequence)
for readn in xrange(len(contig.reads)):
clipst = contig.reads[readn].qa.qual_clipping_start
clipe = contig.reads[readn].qa.qual_clipping_end
start = contig.af[readn].padded_start
seq = cut_ends(contig.reads[readn].rd.sequence, clipst, clipe)
seq = pad_read(seq, start, len(contig.sequence))
if "pseudo" not in contig.reads[readn].rd.name:
align.add_sequence(contig.reads[readn].rd.name, seq)
output_file.write(align.format("fasta"))
示例2: gene_expression_2matrix
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import format [as 别名]
def gene_expression_2matrix(in_ace, out_file, tags, min_seq):
"""Count sequences with each tags in all contigs.
"""
print
print "USING MATRIX OUTPUT FORMAT"
print
ace_gen = Ace.parse(open(in_ace, 'r'))
with open(out_file, "w") as output_file:
output_file.write("gene_name\tgene_length")
for tag in tags:
output_file.write("\t" + tag)
output_file.write("\tXX_noTag")
output_file.write("\n")
while 1:
try:
contig = ace_gen.next()
except:
print "***All contigs treated***"
break
align = Alignment(Gapped(IUPAC.ambiguous_dna, "-"))
align.add_sequence(contig.name, contig.sequence)
for readn in xrange(len(contig.reads)):
clipst = contig.reads[readn].qa.qual_clipping_start
clipe = contig.reads[readn].qa.qual_clipping_end
start = contig.af[readn].padded_start
seq = cut_ends(contig.reads[readn].rd.sequence, clipst, clipe)
seq = pad_read(seq, start, len(contig.sequence))
if "pseudo" not in contig.reads[readn].rd.name:
align.add_sequence(contig.reads[readn].rd.name, seq)
sequences = read_fasta_2list(align.format("fasta"))
if len(sequences) < min_seq:
continue
contig_name = re.findall("(Contig_[0-9]+)", sequences[0][0])[0]
contig_seq = sequences[0][1].replace("*", "")
contig_length = str(len(contig_seq))
output_file.write(contig_name + "\t" + contig_length)
print "Treating", contig_name
d = defaultdict(int)
for tag in tags:
d[tag] = 0
d["XX_noTag"] = 0
fasta_counter = 0
for fasta in sequences:
fasta_counter += 1
found_tag = 0
for tag in tags:
if fasta[0].find(tag) > -1:
d[tag] += 1
found_tag = 1
if found_tag == 0 and fasta[0].find("Consensus") < 0:
d["XX_noTag"] += 1
for tag in sorted(d):
output_file.write("\t" + str(d[tag]))
output_file.write("\n")
示例3: isinstance
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import format [as 别名]
align_info = AlignInfo.SummaryInfo(alignment)
consensus = align_info.dumb_consensus(ambiguous="N", threshold=0.6)
assert isinstance(consensus, Seq.Seq)
print 'consensus:', repr(consensus)
print alignment
print "Test format conversion..."
# parse the alignment file and get an aligment object
alignment = Clustalw.parse_file(os.path.join(os.curdir, 'Clustalw',
'opuntia.aln'))
print "As FASTA:"
print alignment.format("fasta")
print "As Clustal:"
print alignment.format("clustal")
"""
# test to find a position in an original sequence given a
# column position in an alignment
print "Testing finding column positions..."
alignment_info = ["GATC--CGATC--G",
"GA--CCCG-TC--G",
"GAT--CC--TC--G"]
gapped_unambiguous = Alphabet.Gapped(IUPAC.unambiguous_dna)
alignment = Alignment(gapped_unambiguous)
for seq in alignment_info:
示例4: get_haplotypes
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import format [as 别名]
def get_haplotypes(in_ace, out_file, out_bamova, win_len, step,
coverage, stars, ngroups, nhaplo):
"""Get haplotypes from contigs in an ace file
"""
marker_number = 0
min_freq = 0.05
ace_gen = Ace.parse(open(in_ace, 'r'))
with open(out_file, "w") as output_file:
with open(out_bamova, "w") as bamova_file:
output_file.write("Contig_nb\tWindow\tHaplotype\n")
contig_counter = 0
ntreated = 0
for contig in ace_gen:
pass_haplo = False
contig_counter += 1
align = Alignment(Gapped(IUPAC.ambiguous_dna, "X"))
align.add_sequence(contig.name, contig.sequence)
if len(contig.reads) -1 < coverage:
continue
ntreated += 1
for readn in xrange(len(contig.reads)):
clipst = contig.reads[readn].qa.qual_clipping_start
clipe = contig.reads[readn].qa.qual_clipping_end
clipst2 = contig.reads[readn].qa.align_clipping_start
clipe2 = contig.reads[readn].qa.align_clipping_end
if clipst2 > clipst:
clipst = clipst2
if clipe2 < clipe2:
clipe = clipe2
start = contig.af[readn].padded_start
seq = cut_ends(contig.reads[readn].rd.sequence, clipst, clipe)
seq = pad_read(seq, start, len(contig.sequence))
if "pseudo" not in contig.reads[readn].rd.name:
align.add_sequence(contig.reads[readn].rd.name, seq)
sequences = read_fasta(align.format("fasta"))
sequences = [[s[0].replace(">", ""), s[1]] for s in sequences]
contig_name = sequences[0][0]
concensus = sequences[0][1]
error_positions = multi_find("*", concensus)[::-1]
for p in error_positions:
sequences = [[s[0], s[1][0:p] + s[1][p+1:]] for s in sequences]
concensus = sequences[0][1]
sequences = [[s[0], correct_sequence(concensus, s[1])]
for s in sequences[1:]]
sequences, snp_pos = snp_positions(sequences)
haplotypes = best_snps(sequences, snp_pos, coverage)
if haplotypes != "Empty":
bamova = []
variants = list(sorted(list(set([h[-1] for h in haplotypes[-1]]))))
groups = list(sorted(set([h[0][:3] for h in haplotypes[-1]])))
if len(groups) >= ngroups:
pass_haplo = True
for g in groups:
if len([h[0] for h in haplotypes[-1] if h[0].startswith(g)]) < nhaplo:
pass_haplo = False
if pass_haplo:
print contig.name
bamova_file.write("Marker" + str(marker_number) + "\n")
group_number = 0
for g in groups:
bamova_file.write("Population\t" + str(group_number))
group_number += 1
for v in variants:
bamova_file.write("\t" + str(len([h for h in haplotypes[-1]
if h[-1] == v and h[0].startswith(g)])))
bamova_file.write("\n")
with open ("fasta_output/" + contig.name + ".fasta", "w") as f:
output_file.write(contig.name + "\n")
for h in haplotypes[-1]:
f.write(">" + h[0] + str(marker_number) + "\n" + h[2] + "\n")
h[1] = [x - h[1][0] + 1 for x in h[1]]
output_file.write("Marker" + str(marker_number) + "\t" +
"\t".join([str(x) for x in h]) + "\t" +
":".join(variants) + "\n")
marker_number += 1
output_file.flush()
bamova_file.flush()
cutoff = 100000
if contig_counter > cutoff:
break
print "\n", str(ntreated), "contigs out of", str(contig_counter), "were treated"
示例5: pairwise
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import format [as 别名]
def pairwise(in_ace, out_file):
"""Calculate pairwise differentiation indexes.
"""
ace_gen = Ace.parse(open(in_ace, 'r'))
with open(out_file, "w") as output_file:
while 1:
try:
contig = ace_gen.next()
except:
print "***All contigs treated***"
break
align = Alignment(Gapped(IUPAC.ambiguous_dna, "-"))
align.add_sequence(contig.name, contig.sequence)
for readn in xrange(len(contig.reads)):
clipst = contig.reads[readn].qa.qual_clipping_start
clipe = contig.reads[readn].qa.qual_clipping_end
start = contig.af[readn].padded_start
seq = cut_ends(contig.reads[readn].rd.sequence, clipst, clipe)
seq = pad_read(seq, start, len(contig.sequence))
if "pseudo" not in contig.reads[readn].rd.name:
align.add_sequence(contig.reads[readn].rd.name, seq)
sequences = read_fasta(align.format("fasta"))
contig_name = re.findall("(Contig_[0-9]+)", sequences[0][0])[0]
print "Treating", contig_name
window_len = 8 # PARAMETER
max_diff = 3 # PARAMETER
len_contig = len(sequences[0][1])
number_indexes = 0
total_indexes = 0
for seq in sequences[1:]:
try:
start = len(re.findall("^-+", seq[1])[0])
except:
start = 0
len_seq = 0
min_len_seq = 100 # PARAMETER
count = 0
for window in range(start, len_contig, window_len):
nuc_contig = sequences[0][1][window:window + window_len]
nuc_seq = seq[1][window:window + window_len]
if "-" in nuc_seq:
len_seq += len(nuc_seq.replace("-", ""))
else:
diff = count_diff(nuc_contig, nuc_seq, max_diff)
if diff[1] == False:
count += diff[0]
len_seq += window_len
len_seq -= seq.count("*")
if len_seq >= min_len_seq:
index = float(count) / len_seq
if count > 0:
number_indexes +=1
total_indexes += index
else:
index = "NA"
#output_file.write(contig_name + "\t" + str(index) + "\n")
try:
mean_index = float(total_indexes) / number_indexes
except:
mean_index = "NA"
output_file.write(contig_name + "\t" + str(mean_index) + "\n")
示例6: snp_count
# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import format [as 别名]
def snp_count(in_ace, out_file, snp_dict, tags, win_len, max_del, stars):
"""Genotype individuals at SNPs loci.
"""
win_buffer = (win_len - 1) / 2
ace_gen = Ace.parse(open(in_ace, 'r'))
with open(out_file, "w") as output_file:
output_file.write("Contig_nb\tPos\ttag_name\tA\tC\tG\tT\tN\t*\t-\n")
while 1:
try:
contig = ace_gen.next()
except:
print "***All contigs treated***"
break
align = Alignment(Gapped(IUPAC.ambiguous_dna, "-"))
align.add_sequence(contig.name, contig.sequence)
for readn in xrange(len(contig.reads)):
clipst = contig.reads[readn].qa.qual_clipping_start # GOOD
clipe = contig.reads[readn].qa.qual_clipping_end # GOOD
clipst2 = contig.reads[readn].qa.align_clipping_start # Added
clipe2 = contig.reads[readn].qa.align_clipping_end # Added
if clipst2 > clipst: # Added
clipst = clipst2 # Added
if clipe2 < clipe2: # Added
clipe = clipe2 # Added
start = contig.af[readn].padded_start
seq = cut_ends(contig.reads[readn].rd.sequence, clipst, clipe)
seq = pad_read(seq, start, len(contig.sequence))
if "pseudo" not in contig.reads[readn].rd.name:
align.add_sequence(contig.reads[readn].rd.name, seq)
sequences = read_fasta(align.format("fasta"))
contig_name = re.findall("(Contig_[0-9]+)", sequences[0][0])[0]
print "Treating", contig_name
positions = []
try:
positions = snp_dict[contig_name]
except:
continue
d = {}
for pos in positions:
if stars == True:
pos_ok = correct_position(pos, sequences[0][1])
else:
pos_ok = pos
left = pos_ok - 5
if left < 0:
left = 0
right = pos_ok + 1 + 5 # takes into account the middle nucleotide
ref_window = sequences[0][1][left:right]
d.setdefault(pos, {})
d[pos].setdefault("XX_noTag", {})
for nuc in list("ACGTN*-"):
d[pos]["XX_noTag"].setdefault(nuc, 0)
for tag in tags:
d[pos].setdefault(tag, {})
for nuc in list("ACGTN*-"):
d[pos][tag].setdefault(nuc, 0)
for fasta in sequences:
window = fasta[1][left:right]
del_count = 0
if window.count("-") > win_buffer - 3:
continue # Need at least 3 nucleotides on each side
for tag in tags:
if tag in fasta[0]:
t = tag
break
else:
t = "XX_noTag"
if len(ref_window) == len(window):
for i in xrange(len(window)):
if ref_window[i].isalpha() and window[i] == "*" or \
window[i].isalpha() and ref_window[i] == "*":
del_count += 1
if del_count > max_del:
continue
p = pos
s = fasta[1] # Sequence
n = s[pos_ok - 1].upper()
d[p][t][n] += 1
for p in sorted(d):
for t in sorted(d[p]):
output_file.write(contig_name + "\t" + str(p) + "\t" +
str(t))
for n in list("ACGTN*-"):
output_file.write("\t" + str(d[p][t][n]))
output_file.write("\n")