本文整理匯總了Python中Bio.Alphabet.generic_dna方法的典型用法代碼示例。如果您正苦於以下問題:Python Alphabet.generic_dna方法的具體用法?Python Alphabet.generic_dna怎麽用?Python Alphabet.generic_dna使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類Bio.Alphabet
的用法示例。
在下文中一共展示了Alphabet.generic_dna方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: get_Short
# 需要導入模塊: from Bio import Alphabet [as 別名]
# 或者: from Bio.Alphabet import generic_dna [as 別名]
def get_Short(genesList):
for gene in genesList:
# gene = gene.rstrip('\n')
pathtoDir = os.path.join(os.path.dirname(gene), "short")
if not os.path.exists(pathtoDir):
os.makedirs(pathtoDir)
shortgene = os.path.join(os.path.dirname(gene), "short", os.path.basename(gene))
shortgene = shortgene.replace(".fasta", "_short.fasta")
#gene_fp2 = HTSeq.FastaReader(gene)
for allele in SeqIO.parse(gene, "fasta", generic_dna):
fG = open(shortgene, 'w')
fG.write('>' + str(allele.id) + '\n' + str(allele.seq.upper()) + '\n')
fG.close()
break
return True
示例2: gene_seqs_info
# 需要導入模塊: from Bio import Alphabet [as 別名]
# 或者: from Bio.Alphabet import generic_dna [as 別名]
def gene_seqs_info(gene):
""" Determines the total number of alleles and the mean length
of allele sequences per gene.
Args:
genes_list (list): a list with names/paths for FASTA
files.
Returns:
genes_info (list): a list with a sublist for each input
gene file. Each sublist contains a gene identifier, the
total number of alleles for that gene and the mean length
of allele sequences for that gene.
"""
seq_generator = SeqIO.parse(gene, 'fasta', generic_dna)
alleles_lengths = [len(allele) for allele in seq_generator]
mean_length = sum(alleles_lengths)/len(alleles_lengths)
total_seqs = len(alleles_lengths)
genes_info = [gene, total_seqs, mean_length]
return genes_info
示例3: setUp
# 需要導入模塊: from Bio import Alphabet [as 別名]
# 或者: from Bio.Alphabet import generic_dna [as 別名]
def setUp(self):
# locations: VVV VVV
record = Record(Seq("ATGTTATGAGGGTCATAACAT", generic_dna))
record.add_cds_feature(DummyCDS(0, 9, strand=1))
record.add_cds_feature(DummyCDS(12, 21, strand=-1))
cluster = DummyProtocluster(start=0, end=21)
record.add_protocluster(cluster)
record.create_candidate_clusters()
record.create_regions()
# if these aren't correct, the tests will fail
assert len(cluster.cds_children) == 2
assert len(record.get_regions()) == 1
for cds in record.get_cds_features():
assert cds.is_contained_by(cluster)
assert cds.extract(record.seq) == "ATGTTATGA", str(cds.location)
self.record = record
示例4: test_genbank
# 需要導入模塊: from Bio import Alphabet [as 別名]
# 或者: from Bio.Alphabet import generic_dna [as 別名]
def test_genbank(self):
dummy_record = Record(Seq("A"*100, generic_dna))
clusters = [create_protocluster(3, 20, "prodA"),
create_protocluster(25, 41, "prodB")]
for cluster in clusters:
dummy_record.add_protocluster(cluster)
subregion = SubRegion(FeatureLocation(35, 71), "test", 0.7)
dummy_record.add_subregion(subregion)
candidate = CandidateCluster(CandidateCluster.kinds.NEIGHBOURING, clusters)
dummy_record.add_candidate_cluster(candidate)
region = Region(candidate_clusters=[candidate],
subregions=[subregion])
dummy_record.add_region(region)
with NamedTemporaryFile(suffix=".gbk") as output:
region.write_to_genbank(output.name)
bio = list(seqio.parse(output.name))
assert len(bio) == 1
print(bio[0].features)
rec = Record.from_biopython(bio[0], taxon="bacteria")
assert len(rec.get_regions()) == 1
new = rec.get_region(0)
assert new.location.start == 3 - region.location.start
assert new.location.end == 71 - region.location.start
assert new.products == region.products
assert new.probabilities == region.probabilities
示例5: save_transcript_cdna
# 需要導入模塊: from Bio import Alphabet [as 別名]
# 或者: from Bio.Alphabet import generic_dna [as 別名]
def save_transcript_cdna(self, out_dir='.', middlestar=False):
"""
Save the cDNA sequences for all fusion isoforms to a fasta file
"""
if not os.path.exists(out_dir):
os.mkdir(out_dir)
fout = open(
os.path.join(
out_dir,
self.name + '_cdna.fa'
),
'w'
)
for name, transcript in list(self.transcripts.items()):
if transcript.cdna is not None:
if middlestar:
temp = str(transcript.cdna.seq)
temp = temp[:transcript.transcript_cdna_junction_5prime] + '*' + temp[transcript.transcript_cdna_junction_5prime:]
transcript.cdna.seq = Seq.Seq(temp,generic_dna)
SeqIO.write(transcript.cdna,fout,"fasta")
else:
cdna = SeqRecord.SeqRecord(
Seq.Seq("",generic_dna),
id=transcript.name,
name=transcript.name,
description="No cDNA, fusion junction outside transcript(s) boundary"
)
SeqIO.write(cdna,fout,"fasta")
fout.close()
示例6: save_transcript_cds
# 需要導入模塊: from Bio import Alphabet [as 別名]
# 或者: from Bio.Alphabet import generic_dna [as 別名]
def save_transcript_cds(self, out_dir='.', middlestar=False):
"""
Save the CDS sequences for all fusion isoforms to a fasta file
"""
if not os.path.exists(out_dir):
os.mkdir(out_dir)
#check if any transcripts have coding potential
n=0
for name, transcript in list(self.transcripts.items()):
if transcript.cds is not None:
n+=1
if n == 0:
self.db.logger.debug('The %s fusion does not produce any protein coding transcripts. No cds.fa file will be written' % self.name)
return
fout = open(
os.path.join(
out_dir,
self.name + '_cds.fa'
),
'w'
)
for name, transcript in list(self.transcripts.items()):
if transcript.cds is not None:
if middlestar:
temp = str(transcript.cds.seq)
temp = temp[:transcript.transcript_cds_junction_5prime] + '*' + temp[transcript.transcript_cds_junction_5prime:]
transcript.cds.seq = Seq.Seq(temp,generic_dna)
SeqIO.write(transcript.cds,fout,"fasta")
fout.close()
示例7: test_1
# 需要導入模塊: from Bio import Alphabet [as 別名]
# 或者: from Bio.Alphabet import generic_dna [as 別名]
def test_1(self):
"""
test CDS and prortein correct for junction that is on exon boundaries and
produces an out-of-frame protein.
"""
#test the dna and protein coding sequences are correct by comparing
#with manually generally sequences
fusion = agfusion.Fusion(
gene5prime="TMEM87B",
gene5primejunction=112843681,
gene3prime="MERTK",
gene3primejunction=112722768,
db=db_human,
pyensembl_data=data_human,
protein_databases=['pfam', 'tmhmm'],
noncanonical=False
)
fusion.save_transcript_cdna('TMEM87B-MERTK-case0')
fusion.save_transcript_cds('TMEM87B-MERTK-case0')
fusion.save_proteins('TMEM87B-MERTK-case0')
#fusion.save_images('DLG1-BRAF_mouse')
test_cds = open('./data/test-human-case-0.txt','r').read()
test_protein = Seq.Seq(test_cds,alphabet=Alphabet.generic_dna).translate()
test_protein = test_protein[0:test_protein.find('*')]
trans=fusion.transcripts['ENST00000283206-ENST00000295408']
assert test_cds==trans.cds.seq, "cds is wrongly predicted for human fusion (case 0)"
assert test_protein==trans.protein.seq, "protein is wrongly predicted for human fusion (case 0)"
示例8: test_2
# 需要導入模塊: from Bio import Alphabet [as 別名]
# 或者: from Bio.Alphabet import generic_dna [as 別名]
def test_2(self):
"""
"""
#test the dna and protein coding sequences are correct by comparing
#with manually generally sequences
fusion = agfusion.Fusion(
gene5prime="TMEM87B",
gene5primejunction=112843681,
gene3prime="MERTK",
gene3primejunction=112722769,
db=db_human,
pyensembl_data=data_human,
protein_databases=['pfam', 'tmhmm'],
noncanonical=False
)
fusion.save_transcript_cdna('TMEM87B-MERTK-case2')
fusion.save_transcript_cds('TMEM87B-MERTK-case2')
fusion.save_proteins('TMEM87B-MERTK-case2')
#fusion.save_images('DLG1-BRAF_mouse')
test_cds = open('./data/test-human-case-2.txt','r').read()
test_protein = Seq.Seq(test_cds,alphabet=Alphabet.generic_dna).translate()
test_protein = test_protein[0:test_protein.find('*')]
trans=fusion.transcripts['ENST00000283206-ENST00000295408']
assert test_cds==trans.cds.seq, "cds is wrongly predicted for human fusion (case 2)"
assert test_protein==trans.protein.seq, "protein is wrongly predicted for human fusion (case 2)"
示例9: test_3
# 需要導入模塊: from Bio import Alphabet [as 別名]
# 或者: from Bio.Alphabet import generic_dna [as 別名]
def test_3(self):
"""
"""
#test the dna and protein coding sequences are correct by comparing
#with manually generally sequences
fusion = agfusion.Fusion(
gene5prime="TMEM87B",
gene5primejunction=112843681,
gene3prime="MERTK",
gene3primejunction=112722771,
db=db_human,
pyensembl_data=data_human,
protein_databases=['pfam', 'tmhmm'],
noncanonical=False
)
fusion.save_transcript_cdna('TMEM87B-MERTK-case3')
fusion.save_transcript_cds('TMEM87B-MERTK-case3')
fusion.save_proteins('TMEM87B-MERTK-case3')
#fusion.save_images('DLG1-BRAF_mouse')
test_cds = open('./data/test-human-case-3.txt','r').read()
test_protein = Seq.Seq(test_cds,alphabet=Alphabet.generic_dna).translate()
test_protein = test_protein[0:test_protein.find('*')]
trans=fusion.transcripts['ENST00000283206-ENST00000295408']
assert test_cds==trans.cds.seq, "cds is wrongly predicted for human fusion (case 3)"
assert test_protein==trans.protein.seq, "protein is wrongly predicted for human fusion (case 3)"
示例10: _vdj_aa
# 需要導入模塊: from Bio import Alphabet [as 別名]
# 或者: from Bio.Alphabet import generic_dna [as 別名]
def _vdj_aa(self):
'Returns the amino acid sequence of the VDJ region.'
self.v_rf_offset = (len(self.oriented_input[self.v.query_start:self.junction.junction_nt_start]) % 3)
self.coding_start = self.v.query_start + self.v_rf_offset
self.coding_end = self.j.query_end - (len(self.oriented_input[self.coding_start:self.j.query_end])) % 3
self.coding_region = self.oriented_input[self.coding_start:self.coding_end + 1]
translated_seq = Seq(self.coding_region, generic_dna).translate()
self.log('READING FRAME OFFSET:', self.v_rf_offset)
self.log('CODING START:', self.coding_start)
self.log('CODING END:', self.coding_end)
self.log('CODING REGION:', self.coding_region)
return str(translated_seq)
示例11: _vdj_germ_aa
# 需要導入模塊: from Bio import Alphabet [as 別名]
# 或者: from Bio.Alphabet import generic_dna [as 別名]
def _vdj_germ_aa(self):
'Returns the germline amino acid sequence of the VDJ region.'
trim = len(self.vdj_germ_nt) - (len(self.vdj_germ_nt[self.v_rf_offset:]) % 3)
translated_seq = Seq(self.vdj_germ_nt[self.v_rf_offset:trim], generic_dna).translate()
return str(translated_seq)
示例12: _get_aa_sequence
# 需要導入模塊: from Bio import Alphabet [as 別名]
# 或者: from Bio.Alphabet import generic_dna [as 別名]
def _get_aa_sequence(self):
return Seq(self.coding_region, generic_dna).translate()
示例13: curate
# 需要導入模塊: from Bio import Alphabet [as 別名]
# 或者: from Bio.Alphabet import generic_dna [as 別名]
def curate(geneFile):
gene2write=''
#gene_fp2 = HTSeq.FastaReader(geneFile)
for allele in SeqIO.parse(geneFile, "fasta", generic_dna):
sequence = str(allele.seq.upper())
name = allele.name
#per gene remove the alleles that are not CDS
#for allele in gene_fp2:
# if allele is not multiple of 3 it's useless to try to translate
if (len(sequence) % 3 != 0):
pass
else:
try:
protseq,seq,reversedSeq=translateSeq(sequence, 11)
gene2write+=">"+name+"\n"+sequence+"\n"
except Exception as err:
print(err)
with open(geneFile, "wb") as f:
f.write(gene2write)
return True
示例14: check_if_list_or_folder
# 需要導入模塊: from Bio import Alphabet [as 別名]
# 或者: from Bio.Alphabet import generic_dna [as 別名]
def check_if_list_or_folder(folder_or_list):
list_files = []
# check if given a list of genomes paths or a folder to create schema
try:
list_files=[]
gene_fp = open( folder_or_list, 'r')
for gene in gene_fp:
gene = gene.strip()
list_files.append(gene)
#~ f = open(folder_or_list, 'r')
#~ f.close()
#~ list_files = folder_or_list
except IOError:
for gene in os.listdir(folder_or_list):
if not gene.endswith(".fasta"):
continue
try:
genepath = os.path.join(folder_or_list, gene)
if os.path.isdir(genepath):
continue
for allele in SeqIO.parse(genepath, "fasta", generic_dna):
break
list_files.append(os.path.abspath(genepath))
except Exception as e:
print (e)
pass
return list_files
示例15: proc_gene
# 需要導入模塊: from Bio import Alphabet [as 別名]
# 或者: from Bio.Alphabet import generic_dna [as 別名]
def proc_gene(gene,auxBar):
#~ print gene
name=''
url=''
prevName=''
prevUrl=''
for allele in SeqIO.parse(gene, "fasta", generic_dna):
params = {}
sequence=str(allele.seq)
try:
proteinSequence=translateSeq(sequence,False)
except:
continue
try:
name,url=get_protein_info(proteinSequence)
if "Uncharacterized protein" in name or "hypothetical" in name or "DUF" in name :
if not prevName=="":
name=prevName
url=prevUrl
#~ print("trying next allele")
continue
else:
prevName=name
prevUrl=url
#~ print (name)
#~ print (url)
break
except Exception as e:
#~ print (e)
#~ print("trying next allele")
continue
if gene in auxBar:
auxlen=len(auxBar)
index=auxBar.index(gene)
print ( "["+"="*index+">"+" "*(auxlen-index)+"] Querying "+str(int((float(index)/auxlen)*100))+"%")
return [gene, name, url]