本文整理汇总了Python中Bio.AlignIO.read方法的典型用法代码示例。如果您正苦于以下问题:Python AlignIO.read方法的具体用法?Python AlignIO.read怎么用?Python AlignIO.read使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Bio.AlignIO
的用法示例。
在下文中一共展示了AlignIO.read方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ref
# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def ref(self, in_ref):
"""
Parameters
----------
in_ref : file name, str, Bio.Seq.Seq, Bio.SeqRecord.SeqRecord
reference sequence will read and stored a byte array
"""
read_from_file=False
if in_ref and isfile(in_ref):
for fmt in ['fasta', 'genbank']:
try:
in_ref = SeqIO.read(in_ref, fmt)
self.logger("SequenceData: loaded reference sequence as %s format"%fmt,1)
read_from_file=True
break
except:
continue
if not read_from_file:
raise TypeError('SequenceData.ref: reference sequence file %s could not be parsed, fasta and genbank formats are supported.')
if in_ref:
self._ref = seq2array(in_ref, fill_overhangs=False, word_length=self.word_length)
self.full_length = self._ref.shape[0]
self.compressed_to_full_sequence_map = None
self.multiplicity = None
示例2: load_alignments
# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def load_alignments(sequence_files, gene_names):
from Bio import AlignIO
alignments = {}
for fname, gene in zip(sequence_files, gene_names):
alignments[gene] = AlignIO.read(fname, 'fasta')
return alignments
示例3: read_sequences
# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def read_sequences(*fnames):
"""return list of sequences from all fnames"""
seqs = {}
try:
for fname in fnames:
for record in SeqIO.parse(fname, 'fasta'):
if record.name in seqs and record.seq != seqs[record.name].seq:
raise AlignmentError("Detected duplicate input strains \"%s\" but the sequences are different." % record.name)
# if the same sequence then we can proceed (and we only take one)
seqs[record.name] = record
except FileNotFoundError:
raise AlignmentError("\nCannot read sequences -- make sure the file %s exists and contains sequences in fasta format" % fname)
except ValueError as error:
raise AlignmentError("\nERROR: Problem reading in {}: {}".format(fname, str(error)))
return list(seqs.values())
示例4: read_alignment
# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def read_alignment(fname):
try:
return AlignIO.read(fname, 'fasta')
except Exception as error:
raise AlignmentError("\nERROR: Problem reading in {}: {}".format(fname, str(error)))
示例5: read_reference
# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def read_reference(ref_fname):
if not os.path.isfile(ref_fname):
raise AlignmentError("ERROR: Cannot read reference sequence."
"\n\tmake sure the file \"%s\" exists"%ref_fname)
try:
ref_seq = SeqIO.read(ref_fname, 'genbank' if ref_fname.split('.')[-1] in ['gb', 'genbank'] else 'fasta')
except:
raise AlignmentError("ERROR: Cannot read reference sequence."
"\n\tmake sure the file %s contains one sequence in genbank or fasta format"%ref_fname)
return ref_seq
示例6: prepare_msa_heatmap
# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def prepare_msa_heatmap(msa_path, consensus_threshold):
"""Plots a heatmap for the given heatmap.
"""
msa = AlignIO.read(msa_path, "fasta")
summary_align = AlignInfo.SummaryInfo(msa)
consensus = summary_align.dumb_consensus(threshold=consensus_threshold)
consensus_array = np.asarray(consensus)
matches = np.apply_along_axis(lambda row: row == consensus_array, 1, np.asarray(msa)).astype(int)
sorted_matches = np.array(sorted(matches, key=lambda row: row.sum(), reverse=True))
return sorted_matches
示例7: read_alignment
# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def read_alignment(input_file, format='fasta'):
"""
Load multiple alignment from file.
:param input_file: Input file name.
:returns: The alignment read from the input file.
:rtype: MultipleSeqAlignment
"""
msa = AlignIO.read(input_file, format)
return msa
示例8: add_gaps
# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def add_gaps(align, vcfDict):
alignment = AlignIO.read(align, "fasta")
for i,seqRecord in enumerate(alignment):
gapIndex = find(seqRecord.seq, '-')
for snp in vcfDict:
if int(snp) - 1 in gapIndex:
vcfDict[snp][1][i-1] = "-"
for snp in vcfDict:
vcfDict[snp][1] = [s for s in vcfDict[snp][1] if s != "-"]
return vcfDict
示例9: process_data
# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def process_data(data, data_type='counts', seq_type='dna'):
if data_type == 'counts':
pfm, total = count_to_pfm(data)
ic = calc_relative_information(pfm, total)
elif data_type == 'probability':
pfm = data
ic = calc_relative_information(pfm, 10)
elif data_type in ['fasta', 'stockholm']:
#motif, ic = read_alignment(data, data_type, seq_type)
#pfm = motif.counts.normalize(pseudocounts=1)
data, total = read_alignment(data, data_type, seq_type)
pfm, _ = count_to_pfm(data)
ic = calc_relative_information(pfm, total)
elif data_type in [
'alignace', 'meme', 'mast', 'transfac', 'pfm', 'sites', 'jaspar'
]:
if data_type in ['jaspar', 'transfac']:
motif = motifs.parse(open(data, 'r'), data_type.upper())[0]
pfm = dict(motif.counts.normalize())
total = sum(list(motif.counts.values())[0])
else:
motif = motifs.read(open(data, 'r'), data_type)
try:
pfm = motif.counts.normalize(psuedocounts=1)
except:
pfm = motif.counts.normalize()
total = motif.counts
ic = calc_relative_information(pfm, total)
return (format_matrix(pfm), format_matrix(ic))
示例10: generate_summary_stats
# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def generate_summary_stats(output_dir):
with open(output_dir + "gene_presence_absence_roary.csv", 'r') as inhandle:
gene_presence_absence = inhandle.read().splitlines()[1:]
noSamples = len(gene_presence_absence[0].split(',')) - 14
#Layout categories
noCore = 0
noSoftCore = 0
noShell = 0
noCloud = 0
total_genes = 0
#Iterate through GPA and summarise
for gene in gene_presence_absence:
proportion_present = float(gene.split(',')[4]) / noSamples * 100.0
if proportion_present >= 99:
noCore += 1
elif proportion_present >= 95:
noSoftCore += 1
elif proportion_present >= 15:
noShell += 1
else:
noCloud += 1
total_genes += 1
#write output
with open(output_dir + "summary_statistics.txt", 'w') as outfile:
output = ("Core genes\t(99% <= strains <= 100%)\t" + str(noCore) +
"\n" + "Soft core genes\t(95% <= strains < 99%)\t" +
str(noSoftCore) + "\n" +
"Shell genes\t(15% <= strains < 95%)\t" + str(noShell) +
"\n" + "Cloud genes\t(0% <= strains < 15%)\t" +
str(noCloud) + "\n" +
"Total genes\t(0% <= strains <= 100%)\t" + str(total_genes))
outfile.write(output)
return True
示例11: pairwiseIdentity
# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def pairwiseIdentity(self, alnFile):
# 序列成对,生成相似性矩阵
aln = AlignIO.read(open(alnFile), 'fasta')
calculator = DistanceCalculator('identity')
identity = (1 - calculator.get_distance(aln).matrix[1][0]) * 100
return identity
示例12: build_newick_fasttree
# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def build_newick_fasttree(aln_fname, nuc=True):
import os
from Bio import Phylo
print("Building tree with fasttree")
tree_cmd = ["fasttree"]
if nuc: tree_cmd.append("-nt")
tree_cmd.extend([aln_fname,"1>","tmp.nwk", "2>", "fasttree_stderr"])
os.system(" ".join(tree_cmd))
return Phylo.read("tmp.nwk", 'newick')
示例13: build_newick_raxml
# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def build_newick_raxml(aln_fname, nthreads=2, raxml_bin="raxml", **kwargs):
import shutil,os
print("Building tree with raxml")
from Bio import Phylo, AlignIO
AlignIO.write(AlignIO.read(aln_fname, 'fasta'),"temp.phyx", "phylip-relaxed")
cmd = raxml_bin + " -f d -T " + str(nthreads) + " -m GTRCAT -c 25 -p 235813 -n tre -s temp.phyx"
os.system(cmd)
return Phylo.read('RAxML_bestTree.tre', "newick")
示例14: __init__
# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def __init__(self, **kwargs):
upload.__init__(self, **kwargs)
self.grouping_upload_fields = ['vtype', 'subtype', 'lineage']
# patterns from the subtype and lineage fields in the GISAID fasta file
self.patterns = {('a / h1n1', 'pdm09'): ('a', 'h1n1', 'seasonal_h1n1pdm'),
('a / h1n2', ''): ('a', 'h1n2', None),
('a / h1n2', 'seasonal'): ('a', 'h1n2', 'seasonal_h1n2'),
('a / h2n2', ''): ('a', 'h2n2', None),
('a / h3n2', ''): ('a', 'h3n2', 'seasonal_h3n2'),
('a / h3n2', 'seasonal'): ('a', 'h3n2', 'seasonal_h3n2'),
('a / h3n3', ''): ('a', 'h3n3', None),
('a / h5n1', ''): ('a', 'h5n1', None),
('a / h5n6', ''): ('a', 'h5n6', None),
('a / h6n1', ''): ('a', 'h6n1', None),
('a / h7n1', ''): ('a', 'h7n1', None),
('a / h7n2', ''): ('a', 'h7n2', None),
('a / h7n3', ''): ('a', 'h7n3', None),
('a / h7n7', ''): ('a', 'h7n7', None),
('a / h7n9', ''): ('a', 'h7n9', None),
('a / h9n2', ''): ('a', 'h9n2', None),
('a / h10n7', ''): ('a', 'h10n7', None),
('a / h10n8', ''): ('a', 'h10n8', None),
('a / h11', ''): ('a', 'h11', None),
('b / h0n0', 'victoria'): ('b', None, 'seasonal_vic'),
('b / h0n0', 'yamagata'): ('b', None, 'seasonal_yam'),
('b', 'victoria'): ('b', None, 'seasonal_vic'),
('b', 'yamagata'): ('b', None, 'seasonal_yam')}
self.outgroups = {lineage: SeqIO.read('source-data/'+lineage+'_outgroup.gb', 'genbank') for lineage in ['H3N2', 'H1N1pdm', 'Vic', 'Yam']}
self.outgroup_patterns = {'H3N2': ('a', 'h3n2', 'seasonal_h3n2'),
'H1N1': ('a', 'h1n1', 'seasonal_h1n1'),
'H1N1pdm': ('a', 'h1n1', 'seasonal_h1n1pdm'),
'Vic': ('b', None, 'seasonal_vic'),
'Yam': ('b', None, 'seasonal_yam')}
self.strain_fix_fname = "source-data/flu_strain_name_fix.tsv"
self.location_fix_fname = "source-data/flu_location_fix.tsv"
self.location_label_fix_fname = "source-data/flu_fix_location_label.tsv"
self.virus_to_sequence_transfer_fields = ['submission_date']
self.fix = set()
示例15: align_flu
# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def align_flu(self, doc, min_score_percentage=0.85, **kwargs):
'''
align with sequence from outgroup to determine subtype and lineage
:return: True if determined grouping, False otherwise
'''
try:
scores = []
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Alphabet import IUPAC
from Bio import AlignIO
record = SeqRecord(Seq(doc['sequence'],
IUPAC.ambiguous_dna),
id=doc['strain'])
for olineage, oseq in self.outgroups.items():
SeqIO.write([oseq, record], "temp_in.fasta", "fasta")
os.system("mafft --auto temp_in.fasta > temp_out.fasta 2>tmp")
tmp_aln = np.array(AlignIO.read('temp_out.fasta', 'fasta'))
scores.append((olineage, (tmp_aln[0]==tmp_aln[1]).sum()))
scores.sort(key = lambda x:x[1], reverse=True)
if scores[0][1]>min_score_percentage*len(record.seq):
print("Lineage based on similarity:", scores[0][0], doc['strain'], len(record.seq), scores)
return self.outgroup_patterns[scores[0][0]]
else:
print("Couldn't parse virus subtype and lineage from aligning sequence: ", doc['strain'], len(record.seq), scores)
return None
except:
print("Alignment failed: " + doc['strain'])
return None