当前位置: 首页>>代码示例>>Python>>正文


Python Alignment.add_sequence方法代码示例

本文整理汇总了Python中Bio.Align.Generic.Alignment.add_sequence方法的典型用法代码示例。如果您正苦于以下问题:Python Alignment.add_sequence方法的具体用法?Python Alignment.add_sequence怎么用?Python Alignment.add_sequence使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Bio.Align.Generic.Alignment的用法示例。


在下文中一共展示了Alignment.add_sequence方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: add_gaps_to_align

# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def add_gaps_to_align(organisms, missing, align, verbatim=False, genera=False, min_taxa=3):
    local_organisms = copy.deepcopy(organisms)
    for a in align:
        if len(a) < min_taxa:
            new_align = None
            break
        elif len(a) >= min_taxa:
            #pdb.set_trace()
            new_align = Alignment(Gapped(IUPAC.unambiguous_dna, "-"))
            overall_length = len(a[0])
            for seq in a:
                if genera and any(sp for sp in genera if sp in seq.name):
                    new_seq_name = '_'.join(seq.name.split('_')[-1:])
                elif not verbatim:
                    new_seq_name = '_'.join(seq.name.split('_')[-2:])
                else:
                    new_seq_name = seq.name.lower()
                new_align.add_sequence(new_seq_name, str(seq.seq))
                local_organisms.remove(new_seq_name)
            for org in local_organisms:
                if genera and any(sp for sp in genera if sp in seq.name):
                    loc = '_'.join(seq.name.split('_')[:-1])
                elif not verbatim:
                    loc = '_'.join(seq.name.split('_')[:-2])
                else:
                    loc = seq.name
                if missing:
                    try:
                        assert loc in missing[org], "Locus missing"
                    except:
                        assert loc in missing['{}*'.format(org)], "Locus missing"
                new_align.add_sequence(org, '?' * overall_length)
    return new_align
开发者ID:GrahamDB,项目名称:phyluce,代码行数:35,代码来源:add_missing_data_designators.py

示例2: main

# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def main():
    args = get_args()
    # iterate through all the files to determine the longest alignment
    files = get_files(args.nexus)
    old_names = set()
    for f in files:
        for align in AlignIO.parse(f, 'nexus'):
            for seq in list(align):
                old_names.update([seq.name])
    #pdb.set_trace()
    name_map = abbreviator(old_names)
    for count, f in enumerate(files):
        new_align = Alignment(Gapped(IUPAC.unambiguous_dna, "-"))
        #filename = os.path.basename(f)
        #chromo_name = filename.split('.')[0]
        for align in AlignIO.parse(f, 'nexus'):
            for seq in list(align):
                new_seq_name = name_map[seq.name]
                new_align.add_sequence(new_seq_name, str(seq.seq))
        #pdb.set_trace()
        outf = os.path.join(args.output, os.path.split(f)[1])
        try:
            AlignIO.write(new_align, open(outf, 'w'), 'nexus')
        except ValueError:
            pdb.set_trace()
        print count
开发者ID:zacklawrence,项目名称:phyluce,代码行数:28,代码来源:abbreviate_nexus_files.py

示例3: ace2fasta

# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def ace2fasta(in_file, out_file):
    ace_gen = Ace.parse(open(in_file, 'r'))
    with open(out_file, "w") as output_file:
        while 1:
            try:
                contig = ace_gen.next()
            except:
                print "All contigs treated"
                break
            align = Alignment(Gapped(IUPAC.ambiguous_dna, "-"))
            
            # Now we have started our alignment we can add sequences to it 
            # Add concensus sequence to alignment
            align.add_sequence(contig.name, contig.sequence)
            
            for readn in xrange(len(contig.reads)):
                clipst = contig.reads[readn].qa.qual_clipping_start
                clipe = contig.reads[readn].qa.qual_clipping_end
                start = contig.af[readn].padded_start
                seq = cut_ends(contig.reads[readn].rd.sequence, clipst, clipe)
                seq = pad_read(seq, start, len(contig.sequence))
                if "pseudo" not in contig.reads[readn].rd.name:
                    align.add_sequence(contig.reads[readn].rd.name, seq)
            
            output_file.write(align.format("fasta"))
开发者ID:Ecological-and-Evolutionary-Genomics,项目名称:Scripts,代码行数:27,代码来源:ace2fasta.py

示例4: _domain_alignment

# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
 def _domain_alignment(self,alignment,domain_region, alignment_index):
     # Now we need to subselect the portion of the alignment 
     # that contains the domain.
     protein_record = alignment[alignment_index]
     protein_seq = str(protein_record.seq)
     # Figure out which columns encapsulate the domain.
     aa_count = 0
     column_start = None
     column_stop = None
     #print protein_seq
     for column,aa in enumerate(protein_seq):
         #print column,aa
         if aa!='-':
             aa_count=aa_count+1
         if aa_count==domain_region.start and column_start==None:
             column_start = column
         if aa_count==domain_region.stop and column_stop==None:
             column_stop = column
             break
     #print column_start,column_stop
     assert column_start != None, str(column_start)
     assert column_stop != None, str(column_stop)
     domain_alignment = Alignment(alphabet = alignment._alphabet)
     # Grab the portion of each sequence that correspond to columns
     # for the domain.
     for record in alignment:
         domain_alignment.add_sequence(record.id,
                                       str(record.seq)[column_start:column_stop])
     return (domain_alignment, column_start, column_stop)
开发者ID:bsmithers,项目名称:hpf,代码行数:31,代码来源:ginzu.py

示例5: build_align

# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
 def build_align( self, seq ):
     align = Alignment( Gapped( DNAAlphabet() ) )
     alphabet = self.alphabet
     len_seq = len( seq )
     step = self.segment_size
     for j in range( 0, len_seq, step ):
         segment = seq[j : j + step]
         align.add_sequence( name, segment )
     self.friendly = align
开发者ID:dbmi-pitt,项目名称:DIKB-Evidence-analytics,代码行数:11,代码来源:__init__.py

示例6: createAlignment

# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def createAlignment(sequences, alphabet):
    """Create an Alignment object from a list of sequences"""
    align = Alignment(alphabet)
    counter = 0
    for sequence in sequences:
        name = "sequence" + str(counter)
        align.add_sequence(name, sequence)
        counter+=1
    return align
开发者ID:Mat-D,项目名称:biopython,代码行数:11,代码来源:test_CAPS.py

示例7: phylip

# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def phylip(handle):
    seqs,columns = handle.readline().split()
    from Bio.Align.Generic import Alignment
    from Bio.Alphabet import IUPAC, Gapped
    alignment = Alignment(Gapped(IUPAC.protein, "-"))
    for line in handle:
        name,seq = line.split()
        alignment.add_sequence(name, seq)
    return alignment
开发者ID:bsmithers,项目名称:hpf,代码行数:11,代码来源:oid.py

示例8: testCulledColumnMapper

# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
 def testCulledColumnMapper(self):
     align = Alignment(Gapped(IUPAC.protein, "-"))
     original = "ABCDEFGHI"
     align.add_sequence("test",original)
     culled = [0,1,4,8]
     # should yield
     result = "CDFGH"
     mapper = CulledColumnMapper(align,culled)
     for i,aa in enumerate(result):
         assert original[mapper[i]]==aa
开发者ID:dpenfoldbrown,项目名称:hpf,代码行数:12,代码来源:align.py

示例9: gene_expression_2matrix

# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def gene_expression_2matrix(in_ace, out_file, tags, min_seq):
    """Count sequences with each tags in all contigs.
    
    """
    print
    print "USING MATRIX OUTPUT FORMAT"
    print
    ace_gen = Ace.parse(open(in_ace, 'r'))
    with open(out_file, "w") as output_file:
        output_file.write("gene_name\tgene_length")
        for tag in tags:
            output_file.write("\t" + tag)
        output_file.write("\tXX_noTag")
        output_file.write("\n")
        while 1:
            try:
                contig = ace_gen.next()
            except:
                print "***All contigs treated***"
                break
            align = Alignment(Gapped(IUPAC.ambiguous_dna, "-"))
            align.add_sequence(contig.name, contig.sequence)
            for readn in xrange(len(contig.reads)):
                clipst = contig.reads[readn].qa.qual_clipping_start
                clipe = contig.reads[readn].qa.qual_clipping_end
                start = contig.af[readn].padded_start
                seq = cut_ends(contig.reads[readn].rd.sequence, clipst, clipe)
                seq = pad_read(seq, start, len(contig.sequence))
                if "pseudo" not in contig.reads[readn].rd.name:
                    align.add_sequence(contig.reads[readn].rd.name, seq)
            sequences = read_fasta_2list(align.format("fasta"))
            if len(sequences) < min_seq:
                continue
            contig_name = re.findall("(Contig_[0-9]+)", sequences[0][0])[0]
            contig_seq = sequences[0][1].replace("*", "")
            contig_length = str(len(contig_seq))
            output_file.write(contig_name + "\t" + contig_length)
            print "Treating", contig_name
            d = defaultdict(int)
            for tag in tags:
                d[tag] = 0
            d["XX_noTag"] = 0
            fasta_counter = 0
            for fasta in sequences:
                fasta_counter += 1
                found_tag = 0
                for tag in tags:
                    if fasta[0].find(tag) > -1:
                        d[tag] += 1
                        found_tag = 1
                if found_tag == 0 and fasta[0].find("Consensus") < 0:
                    d["XX_noTag"] += 1
            for tag in sorted(d):
                output_file.write("\t" + str(d[tag]))
            output_file.write("\n")
开发者ID:Ecological-and-Evolutionary-Genomics,项目名称:Scripts,代码行数:57,代码来源:ace2gene_expression.py

示例10: rename

# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def rename(align, first, second):
    for a in align:
        new_align = Alignment(Gapped(IUPAC.unambiguous_dna, "-"))
        for seq in a:
            split_name = seq.id.split('_')
            #pdb.set_trace()
            if first and second:
                new_seq_name = '_'.join([split_name[first][0:3], split_name[second][0:3]])
            elif not second:
                new_seq_name = split_name[first]
            new_align.add_sequence(new_seq_name, str(seq.seq))
        yield new_align
开发者ID:crinfante,项目名称:2011-fairclothetal-systbiol-uce,代码行数:14,代码来源:convert_nexus_to_phylip.py

示例11: main

# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def main():
    args = get_args()
    nexus_files = get_files(args.input)
    taxa = get_all_taxon_names(nexus_files)
    taxa_to_keep = get_samples_to_run(args, taxa)
    for count, align_file in enumerate(nexus_files):
        new_align = Alignment(Gapped(IUPAC.unambiguous_dna, "-"))
        for align in AlignIO.parse(align_file, "nexus"):
            for taxon in list(align):
                if taxon.name in taxa_to_keep:
                    new_align.add_sequence(taxon.name, str(taxon.seq))
        outf = os.path.join(args.output, os.path.basename(align_file))
        AlignIO.write(new_align, open(outf, 'w'), 'nexus')
        print count
开发者ID:GrahamDB,项目名称:phyluce,代码行数:16,代码来源:extract_taxa_from_alignments.py

示例12: proteins_alignment_to_biopython

# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def proteins_alignment_to_biopython(al, seq1, seq2, name1, name2):
    "Convert our internal alignment format into BioPython Alignment"
    s1 = ""
    s2 = ""
    align = Alignment(Gapped(IUPAC.protein, "-"))
    for a, b in al:
        if a!=-1:
            s1 += seq1[a].upper()
        else:
            s1 += "-"
        if b!=-1:
            s2 += seq2[b].upper()
    align.add_sequence(name1, s1)
    align.add_sequence(name2, s2)
    return align
开发者ID:victor-yacovlev,项目名称:biosymbol,代码行数:17,代码来源:bioformats.py

示例13: parse_ace

# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
def parse_ace(ace_file):
	ace_gen = Ace.parse(open(ace_file, 'r'))
	contig = ace_gen.next()
	align = Alignment(Gapped(IUPAC.ambiguous_dna, "-"))
	align.add_sequence(contig.name, contig.sequence)
 
	for readn in range(len(contig.reads)):
		clipst = contig.reads[readn].qa.qual_clipping_start
		clipe = contig.reads[readn].qa.qual_clipping_end
		start = contig.af[readn].padded_start
		seq = cut_ends(contig.reads[readn].rd.sequence, clipst, clipe)

		seq = pad_read(seq, start, len(contig.sequence))
		align.add_sequence(contig.reads[readn].rd.name + "_" + contig.af[readn].coru, seq)

	return contig, align
开发者ID:zssasa,项目名称:benchtop-sequencing-comparison,代码行数:18,代码来源:ace_to_alignment.py

示例14: get_alignment

# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
 def get_alignment(self):
     """Construct an alignment from the aligned sequences in this tree."""
     def seq_is_aligned(node):
         if isinstance(node, Sequence) and node.mol_seq.is_aligned:
             return True
         return False
     seqs = self.depth_first_search(self, seq_is_aligned)
     try:
         first_seq = seqs.next()
     except StopIteration:
         warnings.warn("No aligned sequences were found in this tree.",
                 Warning, stacklevel=2)
     aln = Alignment(first_seq.get_alphabet())
     aln.add_sequence(str(first_seq), first_seq.mol_seq.value)
     for seq in seqs:
         aln.add_sequence(str(seq), seq.mol_seq.value)
     return aln
开发者ID:Mat-D,项目名称:biopython,代码行数:19,代码来源:PhyloXML.py

示例15: get_alignment

# 需要导入模块: from Bio.Align.Generic import Alignment [as 别名]
# 或者: from Bio.Align.Generic.Alignment import add_sequence [as 别名]
 def get_alignment(self):
     """Construct an alignment from the aligned sequences in this tree."""
     def is_aligned_seq(node):
         if isinstance(node, Sequence) and node.mol_seq.is_aligned:
             return True
         return False
     seqs = self._filter_search(is_aligned_seq, 'preorder', True)
     try:
         first_seq = seqs.next()
     except StopIteration:
         # No aligned sequences were found
         # Can't construct an Alignment without an alphabet, so... nothin'
         return
     aln = Alignment(first_seq.get_alphabet())
     aln.add_sequence(str(first_seq), first_seq.mol_seq.value)
     for seq in seqs:
         aln.add_sequence(str(seq), seq.mol_seq.value)
     return aln
开发者ID:Nizy,项目名称:biopython,代码行数:20,代码来源:PhyloXML.py


注:本文中的Bio.Align.Generic.Alignment.add_sequence方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。