本文整理汇总了Python中UI.getSpeciesInfo方法的典型用法代码示例。如果您正苦于以下问题:Python UI.getSpeciesInfo方法的具体用法?Python UI.getSpeciesInfo怎么用?Python UI.getSpeciesInfo使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类UI
的用法示例。
在下文中一共展示了UI.getSpeciesInfo方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: importUCSCTranscriptSequences
# 需要导入模块: import UI [as 别名]
# 或者: from UI import getSpeciesInfo [as 别名]
def importUCSCTranscriptSequences(species,array_type,probeset_seq_db):
start_time = time.time()
if force == 'yes':
### Download mRNA sequence file from website
import UI; species_names = UI.getSpeciesInfo()
species_full = species_names[species]
species_full = string.replace(species_full,' ','_')
ucsc_mRNA_dir = update.getFTPData('hgdownload.cse.ucsc.edu','/goldenPath/currentGenomes/'+species_full+'/bigZips','mrna.fa.gz')
output_dir = 'AltDatabase/'+species+'/SequenceData/'
try:
gz_filepath, status = update.download(ucsc_mRNA_dir,output_dir,'')
if status == 'not-removed':
try: os.remove(gz_filepath) ### Not sure why this works now and not before
except OSError: status = status
except Exception: null=[] ### Occurs when file is not available for this species
filename = 'AltDatabase/'+species+'/SequenceData/mrna.fa'
output_file = 'AltDatabase/'+species+'/SequenceData/output/'+array_type+'_UCSC-mRNA_alignments.txt'
dataw = export.ExportFile(output_file)
output_file = 'AltDatabase/'+species+'/SequenceData/output/sequences/'+array_type+'_UCSC_mRNA_seqmatches.txt'
datar = export.ExportFile(output_file)
ucsc_mrna_to_gene = importUCSCTranscriptAssociations(species)
print "Begining generic fasta import of",filename
#'>gnl|ENS|Mm#S10859962 Mus musculus 12 days embryo spinal ganglion cDNA /gb=AK051143 /gi=26094349 /ens=Mm.1 /len=2289']
#'ATCGTGGTGTGCCCAGCTCTTCCAAGGACTGCTGCGCTTCGGGGCCCAGGTGAGTCCCGC'
fn=filepath(filename); sequence = '|'; ucsc_mRNA_hit_len={}; ucsc_probeset_null_hits={}; k=0
fn=filepath(filename); sequence = '|'; ucsc_mRNA_hit_len={}; ucsc_probeset_null_hits={}; k=0
for line in open(fn,'rU').xreadlines():
try: data, newline= string.split(line,'\n')
except ValueError: continue
if len(data)>0:
if data[0] != '#':
try:
if data[0] == '>':
if len(sequence) > 1:
if accession in ucsc_mrna_to_gene:
gene_found = 'no'
for ens_gene in ucsc_mrna_to_gene[accession]:
if ens_gene in probeset_seq_db:
sequence = string.upper(sequence); gene_found = 'yes'
mRNA_seq = sequence[1:]; mRNA_length = len(mRNA_seq)
k+=1; probeset_seq_data = probeset_seq_db[ens_gene]
results = simpleSeqMatchProtocol(probeset_seq_data,mRNA_seq)
for (call,probeset) in results:
dataw.write(string.join([probeset,str(call),accession],'\t')+'\n')
if gene_found == 'yes':
values = [accession,mRNA_seq]; values = string.join(values,'\t')+'\n'
datar.write(values)
values = string.split(data,' '); accession = values[0][1:]
sequence = '|'; continue
except IndexError: null = []
try:
if data[0] != '>': sequence = sequence + data
except IndexError: print kill; continue
datar.close()
end_time = time.time(); time_diff = int(end_time-start_time)
print "UCSC mRNA sequences analyzed in %d seconds" % time_diff
示例2: runExtractUniProt
# 需要导入模块: import UI [as 别名]
# 或者: from UI import getSpeciesInfo [as 别名]
def runExtractUniProt(species,species_full,uniprot_filename_url,trembl_filename_url,force):
global uniprot_ensembl_db;uniprot_ensembl_db={}
global uniprot_db;uniprot_db={}; global species_name; global uniprot_fildir
global secondary_to_primary_db; secondary_to_primary_db={}
import update; reload(update)
species_name = species_full
import UI; species_names = UI.getSpeciesInfo()
species_full = species_names[species]
species_full = string.replace(species_full,' ','_')
uniprot_file = string.split(uniprot_filename_url,'/')[-1]; uniprot_file = string.replace(uniprot_file,'.gz','')
trembl_file = string.split(trembl_filename_url,'/')[-1]; trembl_file = string.replace(trembl_file,'.gz','')
uniprot_fildir = 'AltDatabase/uniprot/'+species+'/'
uniprot_download_fildir = 'AltDatabase/uniprot/'
uniprot_ens_file = species+'_Ensembl-UniProt.txt'; uniprot_ens_location = uniprot_fildir+uniprot_ens_file
uniprot_location = uniprot_download_fildir+uniprot_file
trembl_location = uniprot_download_fildir+trembl_file
add_trembl_annotations = 'no' ### Currently we don't need these annotations
try: importEnsemblUniprot(uniprot_ens_location)
except IOError:
try:
### Download the data from the AltAnalyze website (if there)
update.downloadCurrentVersion(uniprot_ens_location,species,'txt')
importEnsemblUniprot(uniprot_ens_location)
except Exception: null=[]
try:
uniprot_ens_location_built = string.replace(uniprot_ens_location,'UniProt','Uniprot-SWISSPROT')
uniprot_ens_location_built = string.replace(uniprot_ens_location_built,'uniprot','Uniprot-SWISSPROT')
importEnsemblUniprot(uniprot_ens_location_built)
except Exception: null=[]
### Import UniProt annotations
counts = update.verifyFile(uniprot_location,'counts')
if force == 'no' or counts > 8: import_uniprot_db(uniprot_location)
else:
### Directly download the data from UniProt
gz_filepath, status = update.download(uniprot_filename_url,uniprot_download_fildir,'')
if status == 'not-removed':
try: os.remove(gz_filepath) ### Not sure why this works now and not before
except OSError: status = status
import_uniprot_db(uniprot_location)
if add_trembl_annotations == 'yes':
### Import TreMBL annotations
try:
if force == 'yes': uniprot_location += '!!!!!' ### Force an IOError
import_uniprot_db(trembl_location)
except IOError:
### Directly download the data from UniProt
update.download(trembl_filename_url,uniprot_download_fildir,'')
import_uniprot_db(trembl_location)
export()
exportEnsemblUniprot(uniprot_ens_location)
示例3: importmiRNAMap
# 需要导入模块: import UI [as 别名]
# 或者: from UI import getSpeciesInfo [as 别名]
def importmiRNAMap(parse_sequences,force):
""" Added in AltAnalyze version 2.0, this database provides target sequences for several species and different databases,
including miRanda, RNAhybrid and TargetScan. For more information see: http://mirnamap.mbc.nctu.edu.tw/html/about.html"""
gz_filepath = verifyFileAdvanced('miRNA_targets_',species)
if force == 'yes' or len(gz_filepath)==0:
import UI; species_names = UI.getSpeciesInfo()
species_full = species_names[species]
species_full = string.replace(species_full,' ','_')
miRNAMap_dir = update.getFTPData('mirnamap.mbc.nctu.edu.tw','/miRNAMap2/miRNA_Targets/'+species_full,'.txt.tar.gz')
output_dir = 'AltDatabase/miRBS/'+species+'/'
gz_filepath, status = update.download(miRNAMap_dir,output_dir,'')
if status == 'not-removed':
try: os.remove(gz_filepath) ### Not sure why this works now and not before
except OSError: status = status
fn=filepath(string.replace(gz_filepath,'.tar.gz','')); x=0; count=0
for line in open(fn,'rU').readlines():
data = cleanUpLine(line)
t = string.split(data,'\t')
if x==0: x=1
else:
try:
miRNA, ensembl_transcript_id, target_start, target_end, miRNA_seq, alignment, target_seq, algorithm, c1, c2, c3 = t
#if 'GGCTCCTGTCACCTGGGTCCGT'in target_seq:
#print 'a'; sys.exit()
#if 'TCF7L1' in symbol or 'TCF3' in symbol:
#if '-422a' in miRNA:
#print miRNA;sys.exit()
#print symbol, mir; sys.exit()
if ensembl_transcript_id in ens_gene_to_transcript:
geneids = ens_gene_to_transcript[ensembl_transcript_id]
target_seq = string.upper(string.replace(target_seq,'-',''))
target_seq = string.replace(target_seq,'U','T')
for ensembl_geneid in geneids:
if parse_sequences == 'yes':
if (miRNA,ensembl_geneid) in combined_results:
combined_results[(miRNA,ensembl_geneid)].append(target_seq)
else:
y = MicroRNATargetData(ensembl_geneid,'',miRNA,target_seq,algorithm); count+=1
try: microRNA_target_db[miRNA].append(y)
except KeyError: microRNA_target_db[miRNA] = [y]
except Exception: x=1 ### Bad formatting
print count, 'miRNA-target relationships added for mirnamap'
return count
示例4: importUCSCTranscriptSequences
# 需要导入模块: import UI [as 别名]
# 或者: from UI import getSpeciesInfo [as 别名]
def importUCSCTranscriptSequences(species, array_type, probeset_seq_db):
start_time = time.time()
if force == "yes":
### Download mRNA sequence file from website
import UI
species_names = UI.getSpeciesInfo()
species_full = species_names[species]
species_full = string.replace(species_full, " ", "_")
ucsc_mRNA_dir = update.getFTPData(
"hgdownload.cse.ucsc.edu", "/goldenPath/currentGenomes/" + species_full + "/bigZips", "mrna.fa.gz"
)
output_dir = "AltDatabase/" + species + "/SequenceData/"
try:
gz_filepath, status = update.download(ucsc_mRNA_dir, output_dir, "")
if status == "not-removed":
try:
os.remove(gz_filepath) ### Not sure why this works now and not before
except OSError:
status = status
except Exception:
null = [] ### Occurs when file is not available for this species
filename = "AltDatabase/" + species + "/SequenceData/mrna.fa"
output_file = "AltDatabase/" + species + "/SequenceData/output/" + array_type + "_UCSC-mRNA_alignments.txt"
dataw = export.ExportFile(output_file)
output_file = (
"AltDatabase/" + species + "/SequenceData/output/sequences/" + array_type + "_UCSC_mRNA_seqmatches.txt"
)
datar = export.ExportFile(output_file)
ucsc_mrna_to_gene = importUCSCTranscriptAssociations(species)
print "Begining generic fasta import of", filename
#'>gnl|ENS|Mm#S10859962 Mus musculus 12 days embryo spinal ganglion cDNA /gb=AK051143 /gi=26094349 /ens=Mm.1 /len=2289']
#'ATCGTGGTGTGCCCAGCTCTTCCAAGGACTGCTGCGCTTCGGGGCCCAGGTGAGTCCCGC'
fn = filepath(filename)
sequence = "|"
ucsc_mRNA_hit_len = {}
ucsc_probeset_null_hits = {}
k = 0
fn = filepath(filename)
sequence = "|"
ucsc_mRNA_hit_len = {}
ucsc_probeset_null_hits = {}
k = 0
for line in open(fn, "rU").xreadlines():
try:
data, newline = string.split(line, "\n")
except ValueError:
continue
if len(data) > 0:
if data[0] != "#":
try:
if data[0] == ">":
if len(sequence) > 1:
if accession in ucsc_mrna_to_gene:
gene_found = "no"
for ens_gene in ucsc_mrna_to_gene[accession]:
if ens_gene in probeset_seq_db:
sequence = string.upper(sequence)
gene_found = "yes"
mRNA_seq = sequence[1:]
mRNA_length = len(mRNA_seq)
k += 1
probeset_seq_data = probeset_seq_db[ens_gene]
results = simpleSeqMatchProtocol(probeset_seq_data, mRNA_seq)
for (call, probeset) in results:
dataw.write(string.join([probeset, str(call), accession], "\t") + "\n")
if gene_found == "yes":
values = [accession, mRNA_seq]
values = string.join(values, "\t") + "\n"
datar.write(values)
values = string.split(data, " ")
accession = values[0][1:]
sequence = "|"
continue
except IndexError:
null = []
try:
if data[0] != ">":
sequence = sequence + data
except IndexError:
print kill
continue
datar.close()
end_time = time.time()
time_diff = int(end_time - start_time)
print "UCSC mRNA sequences analyzed in %d seconds" % time_diff