当前位置: 首页>>代码示例>>Python>>正文


Python AlignIO.read方法代码示例

本文整理汇总了Python中Bio.AlignIO.read方法的典型用法代码示例。如果您正苦于以下问题:Python AlignIO.read方法的具体用法?Python AlignIO.read怎么用?Python AlignIO.read使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Bio.AlignIO的用法示例。


在下文中一共展示了AlignIO.read方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: ref

# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def ref(self, in_ref):
        """
        Parameters
        ----------
        in_ref : file name, str, Bio.Seq.Seq, Bio.SeqRecord.SeqRecord
            reference sequence will read and stored a byte array
        """
        read_from_file=False
        if in_ref and isfile(in_ref):
            for fmt in ['fasta', 'genbank']:
                try:
                    in_ref = SeqIO.read(in_ref, fmt)
                    self.logger("SequenceData: loaded reference sequence as %s format"%fmt,1)
                    read_from_file=True
                    break
                except:
                    continue
            if not read_from_file:
                raise TypeError('SequenceData.ref: reference sequence file %s could not be parsed, fasta and genbank formats are supported.')

        if in_ref:
            self._ref = seq2array(in_ref, fill_overhangs=False, word_length=self.word_length)
            self.full_length = self._ref.shape[0]
            self.compressed_to_full_sequence_map = None
            self.multiplicity = None 
开发者ID:neherlab,项目名称:treetime,代码行数:27,代码来源:sequence_data.py

示例2: load_alignments

# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def load_alignments(sequence_files, gene_names):
    from Bio import AlignIO
    alignments = {}
    for fname, gene in zip(sequence_files, gene_names):
        alignments[gene] = AlignIO.read(fname, 'fasta')
    return alignments 
开发者ID:nextstrain,项目名称:augur,代码行数:8,代码来源:reconstruct_sequences.py

示例3: read_sequences

# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def read_sequences(*fnames):
    """return list of sequences from all fnames"""
    seqs = {}
    try:
        for fname in fnames:
            for record in SeqIO.parse(fname, 'fasta'):
                if record.name in seqs and record.seq != seqs[record.name].seq:
                    raise AlignmentError("Detected duplicate input strains \"%s\" but the sequences are different." % record.name)
                    # if the same sequence then we can proceed (and we only take one)
                seqs[record.name] = record
    except FileNotFoundError:
        raise AlignmentError("\nCannot read sequences -- make sure the file %s exists and contains sequences in fasta format" % fname)
    except ValueError as error:
        raise AlignmentError("\nERROR: Problem reading in {}: {}".format(fname, str(error)))
    return list(seqs.values()) 
开发者ID:nextstrain,项目名称:augur,代码行数:17,代码来源:align.py

示例4: read_alignment

# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def read_alignment(fname):
    try:
        return AlignIO.read(fname, 'fasta')
    except Exception as error:
        raise AlignmentError("\nERROR: Problem reading in {}: {}".format(fname, str(error))) 
开发者ID:nextstrain,项目名称:augur,代码行数:7,代码来源:align.py

示例5: read_reference

# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def read_reference(ref_fname):
    if not os.path.isfile(ref_fname):
        raise AlignmentError("ERROR: Cannot read reference sequence."
                             "\n\tmake sure the file \"%s\" exists"%ref_fname)
    try:
        ref_seq = SeqIO.read(ref_fname, 'genbank' if ref_fname.split('.')[-1] in ['gb', 'genbank'] else 'fasta')
    except:
        raise AlignmentError("ERROR: Cannot read reference sequence."
                "\n\tmake sure the file %s contains one sequence in genbank or fasta format"%ref_fname)
    return ref_seq 
开发者ID:nextstrain,项目名称:augur,代码行数:12,代码来源:align.py

示例6: prepare_msa_heatmap

# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def prepare_msa_heatmap(msa_path, consensus_threshold):
    """Plots a heatmap for the given heatmap.
    """
    msa = AlignIO.read(msa_path, "fasta")
    summary_align = AlignInfo.SummaryInfo(msa)
    consensus = summary_align.dumb_consensus(threshold=consensus_threshold)
    consensus_array = np.asarray(consensus)
    matches = np.apply_along_axis(lambda row: row == consensus_array, 1, np.asarray(msa)).astype(int)
    sorted_matches = np.array(sorted(matches, key=lambda row: row.sum(), reverse=True))

    return sorted_matches 
开发者ID:nextstrain,项目名称:augur,代码行数:13,代码来源:plot_msa.py

示例7: read_alignment

# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def read_alignment(input_file, format='fasta'):
    """
    Load multiple alignment from file.

    :param input_file: Input file name.
    :returns: The alignment read from the input file.
    :rtype: MultipleSeqAlignment

    """
    msa = AlignIO.read(input_file, format)
    return msa 
开发者ID:nanoporetech,项目名称:wub,代码行数:13,代码来源:seq.py

示例8: add_gaps

# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def add_gaps(align, vcfDict):
    alignment = AlignIO.read(align, "fasta")
    for i,seqRecord in enumerate(alignment):
        gapIndex = find(seqRecord.seq, '-')
        for snp in vcfDict:
            if int(snp) - 1 in gapIndex:
                vcfDict[snp][1][i-1] = "-"
    for snp in vcfDict:
        vcfDict[snp][1] = [s for s in vcfDict[snp][1] if s != "-"]
    return vcfDict 
开发者ID:tatumdmortimer,项目名称:popgen-stats,代码行数:12,代码来源:subsampleVCF.py

示例9: process_data

# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def process_data(data, data_type='counts', seq_type='dna'):
    if data_type == 'counts':
        pfm, total = count_to_pfm(data)
        ic = calc_relative_information(pfm, total)
    elif data_type == 'probability':
        pfm = data
        ic = calc_relative_information(pfm, 10)
    elif data_type in ['fasta', 'stockholm']:
        #motif, ic = read_alignment(data, data_type, seq_type)
        #pfm = motif.counts.normalize(pseudocounts=1)
        data, total = read_alignment(data, data_type, seq_type)
        pfm, _ = count_to_pfm(data)
        ic = calc_relative_information(pfm, total)
    elif data_type in [
            'alignace', 'meme', 'mast', 'transfac', 'pfm', 'sites', 'jaspar'
    ]:
        if data_type in ['jaspar', 'transfac']:
            motif = motifs.parse(open(data, 'r'), data_type.upper())[0]
            pfm = dict(motif.counts.normalize())
            total = sum(list(motif.counts.values())[0])
        else:
            motif = motifs.read(open(data, 'r'), data_type)
            try:
                pfm = motif.counts.normalize(psuedocounts=1)
            except:
                pfm = motif.counts.normalize()
            total = motif.counts
        ic = calc_relative_information(pfm, total)
    return (format_matrix(pfm), format_matrix(ic)) 
开发者ID:saketkc,项目名称:pyseqlogo,代码行数:31,代码来源:format_utils.py

示例10: generate_summary_stats

# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def generate_summary_stats(output_dir):
    with open(output_dir + "gene_presence_absence_roary.csv", 'r') as inhandle:
        gene_presence_absence = inhandle.read().splitlines()[1:]
    noSamples = len(gene_presence_absence[0].split(',')) - 14
    #Layout categories
    noCore = 0
    noSoftCore = 0
    noShell = 0
    noCloud = 0
    total_genes = 0
    #Iterate through GPA and summarise
    for gene in gene_presence_absence:
        proportion_present = float(gene.split(',')[4]) / noSamples * 100.0
        if proportion_present >= 99:
            noCore += 1
        elif proportion_present >= 95:
            noSoftCore += 1
        elif proportion_present >= 15:
            noShell += 1
        else:
            noCloud += 1
        total_genes += 1

    #write output
    with open(output_dir + "summary_statistics.txt", 'w') as outfile:
        output = ("Core genes\t(99% <= strains <= 100%)\t" + str(noCore) +
                  "\n" + "Soft core genes\t(95% <= strains < 99%)\t" +
                  str(noSoftCore) + "\n" +
                  "Shell genes\t(15% <= strains < 95%)\t" + str(noShell) +
                  "\n" + "Cloud genes\t(0% <= strains < 15%)\t" +
                  str(noCloud) + "\n" +
                  "Total genes\t(0% <= strains <= 100%)\t" + str(total_genes))
        outfile.write(output)

    return True 
开发者ID:gtonkinhill,项目名称:panaroo,代码行数:37,代码来源:generate_output.py

示例11: pairwiseIdentity

# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def pairwiseIdentity(self, alnFile):
        # 序列成对,生成相似性矩阵
        aln = AlignIO.read(open(alnFile), 'fasta')
        calculator = DistanceCalculator('identity')
        identity = (1 - calculator.get_distance(aln).matrix[1][0]) * 100
        return identity 
开发者ID:dongzhang0725,项目名称:PhyloSuite,代码行数:8,代码来源:Lg_compareTable.py

示例12: build_newick_fasttree

# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def build_newick_fasttree(aln_fname, nuc=True):
    import os
    from Bio import Phylo
    print("Building tree with fasttree")
    tree_cmd = ["fasttree"]
    if nuc: tree_cmd.append("-nt")

    tree_cmd.extend([aln_fname,"1>","tmp.nwk", "2>", "fasttree_stderr"])
    os.system(" ".join(tree_cmd))
    return Phylo.read("tmp.nwk", 'newick') 
开发者ID:neherlab,项目名称:treetime,代码行数:12,代码来源:utils.py

示例13: build_newick_raxml

# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def build_newick_raxml(aln_fname, nthreads=2, raxml_bin="raxml", **kwargs):
    import shutil,os
    print("Building tree with raxml")
    from Bio import Phylo, AlignIO
    AlignIO.write(AlignIO.read(aln_fname, 'fasta'),"temp.phyx", "phylip-relaxed")
    cmd = raxml_bin + " -f d -T " + str(nthreads) + " -m GTRCAT -c 25 -p 235813 -n tre -s temp.phyx"
    os.system(cmd)
    return Phylo.read('RAxML_bestTree.tre', "newick") 
开发者ID:neherlab,项目名称:treetime,代码行数:10,代码来源:utils.py

示例14: __init__

# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def __init__(self, **kwargs):
        upload.__init__(self, **kwargs)
        self.grouping_upload_fields = ['vtype', 'subtype', 'lineage']
        # patterns from the subtype and lineage fields in the GISAID fasta file
        self.patterns = {('a / h1n1', 'pdm09'): ('a', 'h1n1', 'seasonal_h1n1pdm'),
                    ('a / h1n2', ''): ('a', 'h1n2', None),
                    ('a / h1n2', 'seasonal'): ('a', 'h1n2', 'seasonal_h1n2'),
                    ('a / h2n2', ''): ('a', 'h2n2', None),
                    ('a / h3n2', ''): ('a', 'h3n2', 'seasonal_h3n2'),
                    ('a / h3n2', 'seasonal'): ('a', 'h3n2', 'seasonal_h3n2'),
                    ('a / h3n3', ''): ('a', 'h3n3', None),
                    ('a / h5n1', ''): ('a', 'h5n1', None),
                    ('a / h5n6', ''): ('a', 'h5n6', None),
                    ('a / h6n1', ''): ('a', 'h6n1', None),
                    ('a / h7n1', ''): ('a', 'h7n1', None),
                    ('a / h7n2', ''): ('a', 'h7n2', None),
                    ('a / h7n3', ''): ('a', 'h7n3', None),
                    ('a / h7n7', ''): ('a', 'h7n7', None),
                    ('a / h7n9', ''): ('a', 'h7n9', None),
                    ('a / h9n2', ''): ('a', 'h9n2', None),
                    ('a / h10n7', ''): ('a', 'h10n7', None),
                    ('a / h10n8', ''): ('a', 'h10n8', None),
                    ('a / h11', ''): ('a', 'h11', None),
                    ('b / h0n0', 'victoria'): ('b', None, 'seasonal_vic'),
                    ('b / h0n0', 'yamagata'): ('b', None, 'seasonal_yam'),
                    ('b', 'victoria'): ('b', None, 'seasonal_vic'),
                    ('b', 'yamagata'): ('b', None, 'seasonal_yam')}
        self.outgroups = {lineage: SeqIO.read('source-data/'+lineage+'_outgroup.gb', 'genbank') for lineage in ['H3N2', 'H1N1pdm', 'Vic', 'Yam']}
        self.outgroup_patterns = {'H3N2': ('a', 'h3n2', 'seasonal_h3n2'),
                                  'H1N1': ('a', 'h1n1', 'seasonal_h1n1'),
                                  'H1N1pdm': ('a', 'h1n1', 'seasonal_h1n1pdm'),
                                  'Vic': ('b', None, 'seasonal_vic'),
                                  'Yam': ('b', None, 'seasonal_yam')}
        self.strain_fix_fname = "source-data/flu_strain_name_fix.tsv"
        self.location_fix_fname = "source-data/flu_location_fix.tsv"
        self.location_label_fix_fname = "source-data/flu_fix_location_label.tsv"
        self.virus_to_sequence_transfer_fields = ['submission_date']
        self.fix = set() 
开发者ID:nextstrain,项目名称:fauna,代码行数:40,代码来源:flu_upload.py

示例15: align_flu

# 需要导入模块: from Bio import AlignIO [as 别名]
# 或者: from Bio.AlignIO import read [as 别名]
def align_flu(self, doc, min_score_percentage=0.85, **kwargs):
        '''
        align with sequence from outgroup to determine subtype and lineage
        :return: True if determined grouping, False otherwise
        '''
        try:
            scores = []
            from Bio.Seq import Seq
            from Bio.SeqRecord import SeqRecord
            from Bio.Alphabet import IUPAC
            from Bio import AlignIO
            record = SeqRecord(Seq(doc['sequence'],
                               IUPAC.ambiguous_dna),
                               id=doc['strain'])
            for olineage, oseq in self.outgroups.items():
                SeqIO.write([oseq, record], "temp_in.fasta", "fasta")
                os.system("mafft --auto temp_in.fasta > temp_out.fasta 2>tmp")
                tmp_aln = np.array(AlignIO.read('temp_out.fasta', 'fasta'))
                scores.append((olineage, (tmp_aln[0]==tmp_aln[1]).sum()))
            scores.sort(key = lambda x:x[1], reverse=True)
            if scores[0][1]>min_score_percentage*len(record.seq):
                print("Lineage based on similarity:", scores[0][0], doc['strain'], len(record.seq), scores)
                return self.outgroup_patterns[scores[0][0]]
            else:
                print("Couldn't parse virus subtype and lineage from aligning sequence: ", doc['strain'], len(record.seq), scores)
                return None
        except:
            print("Alignment failed: " + doc['strain'])
            return None 
开发者ID:nextstrain,项目名称:fauna,代码行数:31,代码来源:flu_upload.py


注:本文中的Bio.AlignIO.read方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。