当前位置: 首页>>代码示例>>Python>>正文


Python Alphabet.generic_dna方法代码示例

本文整理汇总了Python中Bio.Alphabet.generic_dna方法的典型用法代码示例。如果您正苦于以下问题:Python Alphabet.generic_dna方法的具体用法?Python Alphabet.generic_dna怎么用?Python Alphabet.generic_dna使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Bio.Alphabet的用法示例。


在下文中一共展示了Alphabet.generic_dna方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_Short

# 需要导入模块: from Bio import Alphabet [as 别名]
# 或者: from Bio.Alphabet import generic_dna [as 别名]
def get_Short(genesList):
    for gene in genesList:
        # gene = gene.rstrip('\n')
        pathtoDir = os.path.join(os.path.dirname(gene), "short")
        if not os.path.exists(pathtoDir):
            os.makedirs(pathtoDir)
        shortgene = os.path.join(os.path.dirname(gene), "short", os.path.basename(gene))
        shortgene = shortgene.replace(".fasta", "_short.fasta")

        #gene_fp2 = HTSeq.FastaReader(gene)
        for allele in SeqIO.parse(gene, "fasta", generic_dna):
            fG = open(shortgene, 'w')
            fG.write('>' + str(allele.id) + '\n' + str(allele.seq.upper()) + '\n')
            fG.close()
            break

    return True 
开发者ID:B-UMMI,项目名称:chewBBACA,代码行数:19,代码来源:init_schema_4_bbaca.py

示例2: gene_seqs_info

# 需要导入模块: from Bio import Alphabet [as 别名]
# 或者: from Bio.Alphabet import generic_dna [as 别名]
def gene_seqs_info(gene):
    """ Determines the total number of alleles and the mean length
        of allele sequences per gene.

        Args:
            genes_list (list): a list with names/paths for FASTA
            files.
        Returns:
            genes_info (list): a list with a sublist for each input
            gene file. Each sublist contains a gene identifier, the
            total number of alleles for that gene and the mean length
            of allele sequences for that gene.
    """

    seq_generator = SeqIO.parse(gene, 'fasta', generic_dna)
    alleles_lengths = [len(allele) for allele in seq_generator]
    mean_length = sum(alleles_lengths)/len(alleles_lengths)
    total_seqs = len(alleles_lengths)
    genes_info = [gene, total_seqs, mean_length]

    return genes_info 
开发者ID:B-UMMI,项目名称:chewBBACA,代码行数:23,代码来源:auxiliary_functions.py

示例3: setUp

# 需要导入模块: from Bio import Alphabet [as 别名]
# 或者: from Bio.Alphabet import generic_dna [as 别名]
def setUp(self):
        # locations:            VVV         VVV
        record = Record(Seq("ATGTTATGAGGGTCATAACAT", generic_dna))

        record.add_cds_feature(DummyCDS(0, 9, strand=1))
        record.add_cds_feature(DummyCDS(12, 21, strand=-1))

        cluster = DummyProtocluster(start=0, end=21)
        record.add_protocluster(cluster)
        record.create_candidate_clusters()
        record.create_regions()
        # if these aren't correct, the tests will fail
        assert len(cluster.cds_children) == 2
        assert len(record.get_regions()) == 1
        for cds in record.get_cds_features():
            assert cds.is_contained_by(cluster)
            assert cds.extract(record.seq) == "ATGTTATGA", str(cds.location)

        self.record = record 
开发者ID:antismash,项目名称:antismash,代码行数:21,代码来源:test_tta.py

示例4: test_genbank

# 需要导入模块: from Bio import Alphabet [as 别名]
# 或者: from Bio.Alphabet import generic_dna [as 别名]
def test_genbank(self):
        dummy_record = Record(Seq("A"*100, generic_dna))
        clusters = [create_protocluster(3, 20, "prodA"),
                    create_protocluster(25, 41, "prodB")]
        for cluster in clusters:
            dummy_record.add_protocluster(cluster)
        subregion = SubRegion(FeatureLocation(35, 71), "test", 0.7)
        dummy_record.add_subregion(subregion)
        candidate = CandidateCluster(CandidateCluster.kinds.NEIGHBOURING, clusters)
        dummy_record.add_candidate_cluster(candidate)
        region = Region(candidate_clusters=[candidate],
                        subregions=[subregion])
        dummy_record.add_region(region)
        with NamedTemporaryFile(suffix=".gbk") as output:
            region.write_to_genbank(output.name)
            bio = list(seqio.parse(output.name))
        assert len(bio) == 1
        print(bio[0].features)
        rec = Record.from_biopython(bio[0], taxon="bacteria")
        assert len(rec.get_regions()) == 1
        new = rec.get_region(0)
        assert new.location.start == 3 - region.location.start
        assert new.location.end == 71 - region.location.start
        assert new.products == region.products
        assert new.probabilities == region.probabilities 
开发者ID:antismash,项目名称:antismash,代码行数:27,代码来源:test_region.py

示例5: save_transcript_cdna

# 需要导入模块: from Bio import Alphabet [as 别名]
# 或者: from Bio.Alphabet import generic_dna [as 别名]
def save_transcript_cdna(self, out_dir='.', middlestar=False):
        """
        Save the cDNA sequences for all fusion isoforms to a fasta file
        """

        if not os.path.exists(out_dir):
            os.mkdir(out_dir)

        fout = open(
            os.path.join(
                out_dir,
                self.name + '_cdna.fa'
            ),
            'w'
        )

        for name, transcript in list(self.transcripts.items()):

            if transcript.cdna is not None:

                if middlestar:
                    temp = str(transcript.cdna.seq)
                    temp = temp[:transcript.transcript_cdna_junction_5prime] + '*' + temp[transcript.transcript_cdna_junction_5prime:]
                    transcript.cdna.seq = Seq.Seq(temp,generic_dna)

                SeqIO.write(transcript.cdna,fout,"fasta")
            else:
                cdna = SeqRecord.SeqRecord(
                    Seq.Seq("",generic_dna),
                    id=transcript.name,
                    name=transcript.name,
                    description="No cDNA, fusion junction outside transcript(s) boundary"
                )
                SeqIO.write(cdna,fout,"fasta")

        fout.close() 
开发者ID:murphycj,项目名称:AGFusion,代码行数:38,代码来源:model.py

示例6: save_transcript_cds

# 需要导入模块: from Bio import Alphabet [as 别名]
# 或者: from Bio.Alphabet import generic_dna [as 别名]
def save_transcript_cds(self, out_dir='.', middlestar=False):
        """
        Save the CDS sequences for all fusion isoforms to a fasta file
        """

        if not os.path.exists(out_dir):
            os.mkdir(out_dir)

        #check if any transcripts have coding potential

        n=0
        for name, transcript in list(self.transcripts.items()):

            if transcript.cds is not None:
                n+=1

        if n == 0:
            self.db.logger.debug('The %s fusion does not produce any protein coding transcripts. No cds.fa file will be written' % self.name)
            return

        fout = open(
            os.path.join(
                out_dir,
                self.name + '_cds.fa'
            ),
            'w'
        )

        for name, transcript in list(self.transcripts.items()):

            if transcript.cds is not None:

                if middlestar:
                    temp = str(transcript.cds.seq)
                    temp = temp[:transcript.transcript_cds_junction_5prime] + '*' + temp[transcript.transcript_cds_junction_5prime:]
                    transcript.cds.seq = Seq.Seq(temp,generic_dna)

                SeqIO.write(transcript.cds,fout,"fasta")

        fout.close() 
开发者ID:murphycj,项目名称:AGFusion,代码行数:42,代码来源:model.py

示例7: test_1

# 需要导入模块: from Bio import Alphabet [as 别名]
# 或者: from Bio.Alphabet import generic_dna [as 别名]
def test_1(self):
        """
        test CDS and prortein correct for junction that is on exon boundaries and
        produces an out-of-frame protein.
        """

        #test the dna and protein coding sequences are correct by comparing
        #with manually generally sequences

        fusion = agfusion.Fusion(
            gene5prime="TMEM87B",
            gene5primejunction=112843681,
            gene3prime="MERTK",
            gene3primejunction=112722768,
            db=db_human,
            pyensembl_data=data_human,
            protein_databases=['pfam', 'tmhmm'],
            noncanonical=False
        )

        fusion.save_transcript_cdna('TMEM87B-MERTK-case0')
        fusion.save_transcript_cds('TMEM87B-MERTK-case0')
        fusion.save_proteins('TMEM87B-MERTK-case0')
        #fusion.save_images('DLG1-BRAF_mouse')

        test_cds = open('./data/test-human-case-0.txt','r').read()
        test_protein = Seq.Seq(test_cds,alphabet=Alphabet.generic_dna).translate()
        test_protein = test_protein[0:test_protein.find('*')]

        trans=fusion.transcripts['ENST00000283206-ENST00000295408']

        assert test_cds==trans.cds.seq, "cds is wrongly predicted for human fusion (case 0)"
        assert test_protein==trans.protein.seq, "protein is wrongly predicted for human fusion (case 0)" 
开发者ID:murphycj,项目名称:AGFusion,代码行数:35,代码来源:test.py

示例8: test_2

# 需要导入模块: from Bio import Alphabet [as 别名]
# 或者: from Bio.Alphabet import generic_dna [as 别名]
def test_2(self):
        """
        """

        #test the dna and protein coding sequences are correct by comparing
        #with manually generally sequences

        fusion = agfusion.Fusion(
            gene5prime="TMEM87B",
            gene5primejunction=112843681,
            gene3prime="MERTK",
            gene3primejunction=112722769,
            db=db_human,
            pyensembl_data=data_human,
            protein_databases=['pfam', 'tmhmm'],
            noncanonical=False
        )

        fusion.save_transcript_cdna('TMEM87B-MERTK-case2')
        fusion.save_transcript_cds('TMEM87B-MERTK-case2')
        fusion.save_proteins('TMEM87B-MERTK-case2')
        #fusion.save_images('DLG1-BRAF_mouse')

        test_cds = open('./data/test-human-case-2.txt','r').read()
        test_protein = Seq.Seq(test_cds,alphabet=Alphabet.generic_dna).translate()
        test_protein = test_protein[0:test_protein.find('*')]

        trans=fusion.transcripts['ENST00000283206-ENST00000295408']

        assert test_cds==trans.cds.seq, "cds is wrongly predicted for human fusion (case 2)"
        assert test_protein==trans.protein.seq, "protein is wrongly predicted for human fusion (case 2)" 
开发者ID:murphycj,项目名称:AGFusion,代码行数:33,代码来源:test.py

示例9: test_3

# 需要导入模块: from Bio import Alphabet [as 别名]
# 或者: from Bio.Alphabet import generic_dna [as 别名]
def test_3(self):
        """
        """

        #test the dna and protein coding sequences are correct by comparing
        #with manually generally sequences

        fusion = agfusion.Fusion(
            gene5prime="TMEM87B",
            gene5primejunction=112843681,
            gene3prime="MERTK",
            gene3primejunction=112722771,
            db=db_human,
            pyensembl_data=data_human,
            protein_databases=['pfam', 'tmhmm'],
            noncanonical=False
        )

        fusion.save_transcript_cdna('TMEM87B-MERTK-case3')
        fusion.save_transcript_cds('TMEM87B-MERTK-case3')
        fusion.save_proteins('TMEM87B-MERTK-case3')
        #fusion.save_images('DLG1-BRAF_mouse')

        test_cds = open('./data/test-human-case-3.txt','r').read()
        test_protein = Seq.Seq(test_cds,alphabet=Alphabet.generic_dna).translate()
        test_protein = test_protein[0:test_protein.find('*')]

        trans=fusion.transcripts['ENST00000283206-ENST00000295408']

        assert test_cds==trans.cds.seq, "cds is wrongly predicted for human fusion (case 3)"
        assert test_protein==trans.protein.seq, "protein is wrongly predicted for human fusion (case 3)" 
开发者ID:murphycj,项目名称:AGFusion,代码行数:33,代码来源:test.py

示例10: _vdj_aa

# 需要导入模块: from Bio import Alphabet [as 别名]
# 或者: from Bio.Alphabet import generic_dna [as 别名]
def _vdj_aa(self):
        'Returns the amino acid sequence of the VDJ region.'
        self.v_rf_offset = (len(self.oriented_input[self.v.query_start:self.junction.junction_nt_start]) % 3)
        self.coding_start = self.v.query_start + self.v_rf_offset
        self.coding_end = self.j.query_end - (len(self.oriented_input[self.coding_start:self.j.query_end])) % 3
        self.coding_region = self.oriented_input[self.coding_start:self.coding_end + 1]
        translated_seq = Seq(self.coding_region, generic_dna).translate()
        self.log('READING FRAME OFFSET:', self.v_rf_offset)
        self.log('CODING START:', self.coding_start)
        self.log('CODING END:', self.coding_end)
        self.log('CODING REGION:', self.coding_region)
        return str(translated_seq) 
开发者ID:briney,项目名称:abstar,代码行数:14,代码来源:antibody.py

示例11: _vdj_germ_aa

# 需要导入模块: from Bio import Alphabet [as 别名]
# 或者: from Bio.Alphabet import generic_dna [as 别名]
def _vdj_germ_aa(self):
        'Returns the germline amino acid sequence of the VDJ region.'
        trim = len(self.vdj_germ_nt) - (len(self.vdj_germ_nt[self.v_rf_offset:]) % 3)
        translated_seq = Seq(self.vdj_germ_nt[self.v_rf_offset:trim], generic_dna).translate()
        return str(translated_seq) 
开发者ID:briney,项目名称:abstar,代码行数:7,代码来源:antibody.py

示例12: _get_aa_sequence

# 需要导入模块: from Bio import Alphabet [as 别名]
# 或者: from Bio.Alphabet import generic_dna [as 别名]
def _get_aa_sequence(self):
        return Seq(self.coding_region, generic_dna).translate() 
开发者ID:briney,项目名称:abstar,代码行数:4,代码来源:germline.py

示例13: curate

# 需要导入模块: from Bio import Alphabet [as 别名]
# 或者: from Bio.Alphabet import generic_dna [as 别名]
def curate(geneFile):

	gene2write=''
	#gene_fp2 = HTSeq.FastaReader(geneFile)
	for allele in SeqIO.parse(geneFile, "fasta", generic_dna):
		sequence = str(allele.seq.upper())
		name = allele.name
		#per gene remove the alleles that are not CDS
		#for allele in gene_fp2:

		# if allele is not multiple of 3 it's useless to try to translate
		if (len(sequence) % 3 != 0):

			pass
		else:
			try:
				protseq,seq,reversedSeq=translateSeq(sequence, 11)
				gene2write+=">"+name+"\n"+sequence+"\n"

			except Exception as err:
				print(err)


	with open(geneFile, "wb") as f:
		f.write(gene2write)
	return True 
开发者ID:B-UMMI,项目名称:chewBBACA,代码行数:28,代码来源:AutoAlleleCDSCuration.py

示例14: check_if_list_or_folder

# 需要导入模块: from Bio import Alphabet [as 别名]
# 或者: from Bio.Alphabet import generic_dna [as 别名]
def check_if_list_or_folder(folder_or_list):
	list_files = []
	# check if given a list of genomes paths or a folder to create schema
	try:
		list_files=[]
		gene_fp = open( folder_or_list, 'r')
		for gene in gene_fp:
			gene = gene.strip()
			list_files.append(gene)
		#~ f = open(folder_or_list, 'r')
		#~ f.close()
		#~ list_files = folder_or_list
	except IOError:

		for gene in os.listdir(folder_or_list):
			if not gene.endswith(".fasta"):
				continue
			try:
				genepath = os.path.join(folder_or_list, gene)
				
				if os.path.isdir(genepath):
					continue
				
				for allele in SeqIO.parse(genepath, "fasta", generic_dna):
					break
				list_files.append(os.path.abspath(genepath))
			except Exception as e:
				print (e)
				pass

	return list_files 
开发者ID:B-UMMI,项目名称:chewBBACA,代码行数:33,代码来源:init_schema_4_bbaca.py

示例15: proc_gene

# 需要导入模块: from Bio import Alphabet [as 别名]
# 或者: from Bio.Alphabet import generic_dna [as 别名]
def proc_gene(gene,auxBar):

    #~ print gene
    name=''
    url=''
    prevName=''
    prevUrl=''
    for allele in SeqIO.parse(gene, "fasta", generic_dna):
        params = {}
        sequence=str(allele.seq)
        try:
            proteinSequence=translateSeq(sequence,False)
        except:
            continue
        try:
            name,url=get_protein_info(proteinSequence)
            if "Uncharacterized protein" in name or "hypothetical" in name or "DUF" in name :
                if not prevName=="":
                    name=prevName
                    url=prevUrl
                #~ print("trying next allele")
                continue
            else:
                prevName=name
                prevUrl=url
                #~ print (name)
                #~ print (url)
                break
        except Exception as e:
            #~ print (e)
            #~ print("trying next allele")
            continue

    if gene in auxBar:
        auxlen=len(auxBar)
        index=auxBar.index(gene)
        print ( "["+"="*index+">"+" "*(auxlen-index)+"] Querying "+str(int((float(index)/auxlen)*100))+"%")

    return [gene, name, url] 
开发者ID:B-UMMI,项目名称:chewBBACA,代码行数:41,代码来源:uniprot_find.py


注:本文中的Bio.Alphabet.generic_dna方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。